mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-15 14:27:06 +01:00
Compare commits
201 Commits
v0.5.0
...
feature/lu
Author | SHA1 | Date | |
---|---|---|---|
d68937654b | |||
7af9b87318 | |||
70f386f9c6 | |||
89cda5971a | |||
c39ec3b607 | |||
8afcc8fb8b | |||
1abbe2fd0c | |||
4fb37ececd | |||
71eaa7c79e | |||
811ca45473 | |||
bc1a4d40ba | |||
c8079e6621 | |||
8b0d171c9a | |||
1f293b76b4 | |||
8bbd9ebc27 | |||
6472b431f0 | |||
bd205a3293 | |||
496beffa88 | |||
9b63e97108 | |||
81f2aeaece | |||
2d4a45c758 | |||
0f182f033b | |||
7240d73184 | |||
42cd148f5e | |||
611b5d74ba | |||
b56c9ffa52 | |||
70c32fa49b | |||
77c8a94dae | |||
2e453dfbf5 | |||
4089984431 | |||
98439847cf | |||
c78bbd0f8c | |||
7ea4b959a4 | |||
536e2ff073 | |||
798ff34d7e | |||
04a437c92c | |||
5c190a1b8c | |||
15d8f5c88c | |||
c4ac6e7e8f | |||
510e340e16 | |||
6ffadca153 | |||
b6597b74e7 | |||
d2573189d8 | |||
65ca174dbb | |||
0724f7af75 | |||
2e74520821 | |||
6dd75ad9e5 | |||
fda408ee6f | |||
b9c80318a2 | |||
5df5d52d41 | |||
f76f281e58 | |||
aa20cc8b52 | |||
0fd179fb33 | |||
f45ef8d114 | |||
fd5614738d | |||
005dcc51aa | |||
655c893f86 | |||
843f5783b4 | |||
8986c9fedd | |||
c80a1d427c | |||
ae57032500 | |||
f75468728f | |||
5acd856663 | |||
b0d3e4bb2c | |||
b512ccbee6 | |||
8c89391c02 | |||
bfac5195b8 | |||
a782ca3238 | |||
744691097f | |||
ff6da364e8 | |||
4d11a6f5f2 | |||
88be3b39bb | |||
8a02824e08 | |||
356e7940fd | |||
73ce476890 | |||
29c4ef41de | |||
e423a09974 | |||
17097a93ec | |||
94a6373a7f | |||
4ab7dbfd57 | |||
90e70790f3 | |||
9c2e8d5e28 | |||
147e2025b9 | |||
573b8c6020 | |||
15218ec57f | |||
ec68e08dd2 | |||
fc25d2295c | |||
8dc2cfcedb | |||
836f93780c | |||
5a68715be3 | |||
32bc7a6ab8 | |||
b65e72e521 | |||
d1aaff65e8 | |||
93d29bb699 | |||
3b376ed54e | |||
d5c1f614ba | |||
2edc24225d | |||
629283726b | |||
6adb66dd08 | |||
5be92bb708 | |||
f4c049ea6d | |||
bc092ad30f | |||
dad642ed1b | |||
63ae39abc7 | |||
9e5b934d21 | |||
a7b483d67a | |||
bb99ce0680 | |||
83307df1af | |||
49b5c49851 | |||
e9f30cab2c | |||
089f0ab582 | |||
df6c9f55d1 | |||
b93e18ed50 | |||
9c77bb69a5 | |||
27f3ecc833 | |||
f9e90eeb1f | |||
fad5c675eb | |||
4908b77d46 | |||
f4dd5062d7 | |||
da34d75841 | |||
980ff18956 | |||
7edf4c6c04 | |||
1a6c7204ac | |||
49310fbab3 | |||
6049d5ac47 | |||
35d0d35238 | |||
c0e878705e | |||
5c0c8efb9e | |||
dfd714e1ef | |||
79a8ca1a62 | |||
fb45eb2eb2 | |||
a307274c96 | |||
3f2c44a5fe | |||
48fb1cdc11 | |||
8a79e93cc2 | |||
3493b51879 | |||
de3e79d300 | |||
dd62a61c5c | |||
8f47d0b5ab | |||
42af132dab | |||
9db2c6525d | |||
adbc7c1188 | |||
9dc345e8e8 | |||
8b9301a74c | |||
6f47fbb1e2 | |||
a9ae30f868 | |||
a3c0fb79b6 | |||
62601bb649 | |||
ef97e32152 | |||
daea5297ee | |||
5028969d4b | |||
c667d9fdcc | |||
7dbb94bab2 | |||
236dcc820b | |||
a42a441a6a | |||
a0676beeb1 | |||
c5106d0c03 | |||
fbf96b1bbb | |||
3c49ddfaa4 | |||
ffb8b3116c | |||
290493e162 | |||
dd8cfff111 | |||
184642adb0 | |||
4774a3bcd2 | |||
25fafa9a89 | |||
713520d3d2 | |||
85ed8175cb | |||
df5c788ef2 | |||
15f22425c8 | |||
e87182cf98 | |||
e3d5319470 | |||
ffedeb1c58 | |||
3e3b367aa9 | |||
3e80947c2b | |||
fdfbf11c6d | |||
9cb90f714e | |||
6ce174cd60 | |||
17ca5240f7 | |||
2daffdf95d | |||
149f826601 | |||
cd8ee27080 | |||
0fa66e8f3c | |||
8dd099267d | |||
1a6d65c6a4 | |||
fc4a043663 | |||
61ba50665e | |||
bfe14000a9 | |||
092fa0d8da | |||
1ceff48133 | |||
680645f849 | |||
565e9329ba | |||
5e02392f9c | |||
339be37dba | |||
a87b744621 | |||
97d0d56bcb | |||
7c7ea35ffb | |||
4b1cf580e0 | |||
2d8bb356e3 | |||
a7251f28c7 | |||
c1b1b89d17 | |||
771235017d |
30
.gitignore
vendored
30
.gitignore
vendored
@ -5,7 +5,6 @@
|
|||||||
*.o
|
*.o
|
||||||
*.obj
|
*.obj
|
||||||
|
|
||||||
|
|
||||||
# Editor files #
|
# Editor files #
|
||||||
################
|
################
|
||||||
*~
|
*~
|
||||||
@ -48,6 +47,7 @@ Config.h.in
|
|||||||
config.log
|
config.log
|
||||||
config.status
|
config.status
|
||||||
.deps
|
.deps
|
||||||
|
*.inc
|
||||||
|
|
||||||
# http://www.gnu.org/software/autoconf #
|
# http://www.gnu.org/software/autoconf #
|
||||||
########################################
|
########################################
|
||||||
@ -62,19 +62,8 @@ stamp-h1
|
|||||||
config.sub
|
config.sub
|
||||||
config.guess
|
config.guess
|
||||||
INSTALL
|
INSTALL
|
||||||
|
.dirstamp
|
||||||
# Packages #
|
ltmain.sh
|
||||||
############
|
|
||||||
# it's better to unpack these files and commit the raw source
|
|
||||||
# git has its own built in compression methods
|
|
||||||
*.7z
|
|
||||||
*.dmg
|
|
||||||
*.gz
|
|
||||||
*.iso
|
|
||||||
*.jar
|
|
||||||
*.rar
|
|
||||||
*.tar
|
|
||||||
*.zip
|
|
||||||
|
|
||||||
# Logs and databases #
|
# Logs and databases #
|
||||||
######################
|
######################
|
||||||
@ -100,3 +89,16 @@ build*/*
|
|||||||
#####################
|
#####################
|
||||||
*.xcodeproj/*
|
*.xcodeproj/*
|
||||||
build.sh
|
build.sh
|
||||||
|
|
||||||
|
# Eigen source #
|
||||||
|
################
|
||||||
|
lib/Eigen/*
|
||||||
|
|
||||||
|
# FFTW source #
|
||||||
|
################
|
||||||
|
lib/fftw/*
|
||||||
|
|
||||||
|
# libtool macros #
|
||||||
|
##################
|
||||||
|
m4/lt*
|
||||||
|
m4/libtool.m4
|
28
.travis.yml
28
.travis.yml
@ -9,10 +9,6 @@ matrix:
|
|||||||
- os: osx
|
- os: osx
|
||||||
osx_image: xcode7.2
|
osx_image: xcode7.2
|
||||||
compiler: clang
|
compiler: clang
|
||||||
- os: osx
|
|
||||||
osx_image: xcode7.2
|
|
||||||
compiler: gcc
|
|
||||||
env: VERSION=-5
|
|
||||||
- compiler: gcc
|
- compiler: gcc
|
||||||
addons:
|
addons:
|
||||||
apt:
|
apt:
|
||||||
@ -23,6 +19,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: VERSION=-4.9
|
env: VERSION=-4.9
|
||||||
- compiler: gcc
|
- compiler: gcc
|
||||||
@ -35,6 +33,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: VERSION=-5
|
env: VERSION=-5
|
||||||
- compiler: clang
|
- compiler: clang
|
||||||
@ -47,6 +47,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
- compiler: clang
|
- compiler: clang
|
||||||
@ -59,6 +61,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
|
|
||||||
@ -69,6 +73,7 @@ before_install:
|
|||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
||||||
|
|
||||||
install:
|
install:
|
||||||
@ -82,9 +87,20 @@ install:
|
|||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- ./scripts/reconfigure_script
|
- ./bootstrap.sh
|
||||||
- mkdir build
|
- mkdir build
|
||||||
- cd build
|
- cd build
|
||||||
- ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||||
|
- make -j4
|
||||||
|
- ./benchmarks/Benchmark_dwf --threads 1
|
||||||
|
- echo make clean
|
||||||
|
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||||
- make -j4
|
- make -j4
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1
|
- ./benchmarks/Benchmark_dwf --threads 1
|
||||||
|
- echo make clean
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
|
||||||
|
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
|
||||||
|
- make -j4
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
# additional include paths necessary to compile the C++ library
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/
|
SUBDIRS = lib benchmarks tests
|
||||||
SUBDIRS = lib tests benchmarks
|
|
||||||
|
|
||||||
filelist: $(SUBDIRS)
|
AM_CXXFLAGS += -I$(top_builddir)/include
|
||||||
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
110
README.md
110
README.md
@ -1,8 +1,28 @@
|
|||||||
# Grid [](https://travis-ci.org/paboyle/Grid)
|
# Grid
|
||||||
Data parallel C++ mathematical object library
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td>Last stable release</td>
|
||||||
|
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||||
|
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Development branch</td>
|
||||||
|
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||||
|
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
Last update 2015/7/30
|
**Data parallel C++ mathematical object library.**
|
||||||
|
|
||||||
|
Please send all pull requests to the `develop` branch.
|
||||||
|
|
||||||
|
License: GPL v2.
|
||||||
|
|
||||||
|
Last update 2016/08/03.
|
||||||
|
|
||||||
|
### Description
|
||||||
This library provides data parallel C++ container classes with internal memory layout
|
This library provides data parallel C++ container classes with internal memory layout
|
||||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
||||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
||||||
@ -22,37 +42,75 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
|||||||
for most programmers.
|
for most programmers.
|
||||||
|
|
||||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way).
|
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way).
|
||||||
|
|
||||||
These are presented as
|
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
|
||||||
|
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
|
||||||
vRealF, vRealD, vComplexF, vComplexD
|
|
||||||
|
|
||||||
internal vector data types. These may be useful in themselves for other programmers.
|
|
||||||
The corresponding scalar types are named
|
|
||||||
|
|
||||||
RealF, RealD, ComplexF, ComplexD
|
|
||||||
|
|
||||||
MPI, OpenMP, and SIMD parallelism are present in the library.
|
MPI, OpenMP, and SIMD parallelism are present in the library.
|
||||||
|
Please see https://arxiv.org/abs/1512.03487 for more detail.
|
||||||
|
|
||||||
You can give `configure' initial values for configuration parameters
|
### Installation
|
||||||
by setting variables in the command line or in the environment. Here
|
First, start by cloning the repository:
|
||||||
are examples:
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
|
``` bash
|
||||||
|
git clone https://github.com/paboyle/Grid.git
|
||||||
|
```
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX
|
Then enter the cloned directory and set up the build system:
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
``` bash
|
||||||
|
cd Grid
|
||||||
|
./bootstrap.sh
|
||||||
|
```
|
||||||
|
|
||||||
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
Now you can execute the `configure` script to generate makefiles (here from a build directory):
|
||||||
|
|
||||||
Note: Before running configure it could be necessary to execute the script
|
|
||||||
|
|
||||||
script/filelist
|
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
mkdir build; cd build
|
||||||
|
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||||
|
```
|
||||||
|
|
||||||
|
where `--enable-precision=` set the default precision (`single` or `double`),
|
||||||
For developers:
|
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
|
||||||
Use reconfigure_script in the scripts/ directory to create the autotools environment
|
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
|
||||||
|
`shmem`), and `<path>` should be replaced by the prefix path where you want to
|
||||||
|
install Grid. The `mpi-auto` communication option set `configure` to determine
|
||||||
|
automatically how to link to MPI. Other options are available, use `configure
|
||||||
|
--help` to display them. Like with any other program using GNU autotool, the
|
||||||
|
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
||||||
|
customise the build.
|
||||||
|
|
||||||
|
Finally, you can build and install Grid:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
make; make install
|
||||||
|
```
|
||||||
|
|
||||||
|
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
make -C tests/<subdir> tests
|
||||||
|
```
|
||||||
|
|
||||||
|
### Possible SIMD types
|
||||||
|
|
||||||
|
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
|
||||||
|
|
||||||
|
| String | Description |
|
||||||
|
| ----------- | -------------------------------------- |
|
||||||
|
| `GEN` | generic portable vector code |
|
||||||
|
| `SSE4` | SSE 4.2 (128 bit) |
|
||||||
|
| `AVX` | AVX (256 bit) |
|
||||||
|
| `AVXFMA4` | AVX (256 bit) + FMA |
|
||||||
|
| `AVX2` | AVX 2 (256 bit) |
|
||||||
|
| `AVX512` | AVX 512 bit |
|
||||||
|
| `AVX512MIC` | AVX 512 bit for Intel MIC architecture |
|
||||||
|
| `ICMI` | Intel ICMI instructions (512 bit) |
|
||||||
|
|
||||||
|
Alternatively, some CPU codenames can be directly used:
|
||||||
|
|
||||||
|
| String | Description |
|
||||||
|
| ----------- | -------------------------------------- |
|
||||||
|
| `KNC` | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
|
||||||
|
| `KNL` | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
|
@ -25,7 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -194,7 +194,128 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
for(int lat=4;lat<=32;lat+=2){
|
||||||
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||||
|
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||||
|
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<CartesianCommunicator::CommsRequest_t> empty;
|
||||||
|
std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_fwd(Nd,empty);
|
||||||
|
std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_bwd(Nd,empty);
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
ncomm=0;
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
ncomm++;
|
||||||
|
|
||||||
|
int comm_proc;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
comm_proc=1;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.SendToRecvFromInit(requests_fwd[mu],
|
||||||
|
(void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.SendToRecvFromInit(requests_bwd[mu],
|
||||||
|
(void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
|
Grid.SendToRecvFromBegin(requests_fwd[mu]);
|
||||||
|
Grid.SendToRecvFromComplete(requests_fwd[mu]);
|
||||||
|
Grid.SendToRecvFromBegin(requests_bwd[mu]);
|
||||||
|
Grid.SendToRecvFromComplete(requests_bwd[mu]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
double stop=usecond();
|
||||||
|
|
||||||
|
double dbytes = bytes;
|
||||||
|
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
double time = stop-start;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
|
Grid.SendToRecvFromBegin(requests_fwd[mu]);
|
||||||
|
Grid.SendToRecvFromBegin(requests_bwd[mu]);
|
||||||
|
Grid.SendToRecvFromComplete(requests_fwd[mu]);
|
||||||
|
Grid.SendToRecvFromComplete(requests_bwd[mu]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
double stop=usecond();
|
||||||
|
|
||||||
|
double dbytes = bytes;
|
||||||
|
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
double time = stop-start;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -26,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -46,9 +45,9 @@ struct scal {
|
|||||||
};
|
};
|
||||||
|
|
||||||
bool overlapComms = false;
|
bool overlapComms = false;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
|
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
|
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
@ -71,8 +70,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
std::cout << GridLogMessage << "Making s innermost rb grids"<<std::endl;
|
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
@ -87,8 +86,6 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion tmp(FGrid);
|
LatticeFermion tmp(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
LatticeFermion err(FGrid);
|
||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
random(RNG4,Umu);
|
random(RNG4,Umu);
|
||||||
|
|
||||||
@ -127,21 +124,20 @@ int main (int argc, char ** argv)
|
|||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
|
||||||
typename DomainWallFermionR::ImplParams params;
|
|
||||||
params.overlapCommsCompute = overlapComms;
|
|
||||||
|
|
||||||
RealD NP = UGrid->_Nprocessors;
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
|
||||||
for(int doasm=1;doasm<2;doasm++){
|
for(int doasm=1;doasm<2;doasm++){
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=doasm;
|
QCD::WilsonKernelsStatic::AsmOpt=doasm;
|
||||||
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
std::cout<<GridLogMessage << "Naive wilson implementation "<<std::endl;
|
||||||
int ncall =10;
|
std::cout << GridLogMessage<< "Calling Dw"<<std::endl;
|
||||||
|
int ncall =100;
|
||||||
if (1) {
|
if (1) {
|
||||||
|
|
||||||
|
Dw.ZeroCounters();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
__SSC_START;
|
__SSC_START;
|
||||||
@ -160,16 +156,17 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
// Dw.Report();
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
LatticeFermionF ssrc(sFGrid);
|
LatticeFermion ssrc(sFGrid);
|
||||||
LatticeFermionF sref(sFGrid);
|
LatticeFermion sref(sFGrid);
|
||||||
LatticeFermionF sresult(sFGrid);
|
LatticeFermion sresult(sFGrid);
|
||||||
WilsonFermion5DF sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params);
|
|
||||||
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
for(int x=0;x<latt4[0];x++){
|
for(int x=0;x<latt4[0];x++){
|
||||||
for(int y=0;y<latt4[1];y++){
|
for(int y=0;y<latt4[1];y++){
|
||||||
@ -177,12 +174,13 @@ int main (int argc, char ** argv)
|
|||||||
for(int t=0;t<latt4[3];t++){
|
for(int t=0;t<latt4[3];t++){
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
std::vector<int> site({s,x,y,z,t});
|
std::vector<int> site({s,x,y,z,t});
|
||||||
SpinColourVectorF tmp;
|
SpinColourVector tmp;
|
||||||
peekSite(tmp,src,site);
|
peekSite(tmp,src,site);
|
||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
|
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
|
sDw.ZeroCounters();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
__SSC_START;
|
__SSC_START;
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
@ -192,22 +190,23 @@ int main (int argc, char ** argv)
|
|||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called Dw sinner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
// sDw.Report();
|
sDw.Report();
|
||||||
|
|
||||||
if(0){
|
if(0){
|
||||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
PerformanceCounter Counter(i);
|
PerformanceCounter Counter(i);
|
||||||
Counter.Start();
|
Counter.Start();
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
Counter.Stop();
|
Counter.Stop();
|
||||||
Counter.Report();
|
Counter.Report();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
RealF sum=0;
|
RealF sum=0;
|
||||||
@ -217,21 +216,23 @@ int main (int argc, char ** argv)
|
|||||||
for(int t=0;t<latt4[3];t++){
|
for(int t=0;t<latt4[3];t++){
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
std::vector<int> site({s,x,y,z,t});
|
std::vector<int> site({s,x,y,z,t});
|
||||||
SpinColourVectorF normal, simd;
|
SpinColourVector normal, simd;
|
||||||
peekSite(normal,result,site);
|
peekSite(normal,result,site);
|
||||||
peekSite(simd,sresult,site);
|
peekSite(simd,sresult,site);
|
||||||
sum=sum+norm2(normal-simd);
|
sum=sum+norm2(normal-simd);
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
if (norm2(normal-simd) > 1.0e-6 ) {
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<normal<<std::endl;
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<simd<<std::endl;
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
|
||||||
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||||
|
}
|
||||||
}}}}}
|
}}}}}
|
||||||
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
|
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
|
|
||||||
LatticeFermionF sr_eo(sFGrid);
|
LatticeFermion sr_eo(sFGrid);
|
||||||
LatticeFermionF serr(sFGrid);
|
LatticeFermion serr(sFGrid);
|
||||||
|
|
||||||
LatticeFermion ssrc_e (sFrbGrid);
|
LatticeFermion ssrc_e (sFrbGrid);
|
||||||
LatticeFermion ssrc_o (sFrbGrid);
|
LatticeFermion ssrc_o (sFrbGrid);
|
||||||
@ -249,17 +250,21 @@ int main (int argc, char ** argv)
|
|||||||
sr_e = zero;
|
sr_e = zero;
|
||||||
sr_o = zero;
|
sr_o = zero;
|
||||||
|
|
||||||
|
sDw.ZeroCounters();
|
||||||
|
sDw.stat.init("DhopEO");
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for (int i = 0; i < ncall; i++) {
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
sDw.stat.print();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
sDw.Report();
|
||||||
|
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
|
sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
|
||||||
@ -268,9 +273,9 @@ int main (int argc, char ** argv)
|
|||||||
pickCheckerboard(Even,ssrc_e,sresult);
|
pickCheckerboard(Even,ssrc_e,sresult);
|
||||||
pickCheckerboard(Odd ,ssrc_o,sresult);
|
pickCheckerboard(Odd ,ssrc_o,sresult);
|
||||||
ssrc_e = ssrc_e - sr_e;
|
ssrc_e = ssrc_e - sr_e;
|
||||||
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<<std::endl;
|
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||||
ssrc_o = ssrc_o - sr_o;
|
ssrc_o = ssrc_o - sr_o;
|
||||||
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<<std::endl;
|
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -284,18 +289,19 @@ int main (int argc, char ** argv)
|
|||||||
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
|
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
for(int i=0;i<ref._odata.size();i++){
|
||||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
tmp =adj(U[mu])*src;
|
||||||
tmp =Cshift(tmp,mu+1,-1);
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
for(int i=0;i<ref._odata.size();i++){
|
||||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
}
|
}
|
||||||
Dw.Dhop(src,result,1);
|
Dw.Dhop(src,result,1);
|
||||||
|
std::cout << GridLogMessage << "Naive wilson implementation Dag" << std::endl;
|
||||||
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
@ -317,6 +323,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
||||||
|
|
||||||
{
|
{
|
||||||
|
Dw.ZeroCounters();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
@ -328,6 +335,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
Dw.Report();
|
||||||
}
|
}
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||||
|
@ -26,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -27,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -53,7 +51,7 @@ int main (int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
const int Ls=16;
|
const int Ls=8;
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
@ -63,6 +61,8 @@ int main (int argc, char ** argv)
|
|||||||
QCD::WilsonKernelsStatic::AsmOpt=0;
|
QCD::WilsonKernelsStatic::AsmOpt=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
@ -127,7 +127,6 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
ColourMatrix cm = Complex(1.0,0.0);
|
||||||
|
|
||||||
|
|
||||||
LatticeGaugeField Umu5d(FGrid);
|
LatticeGaugeField Umu5d(FGrid);
|
||||||
|
|
||||||
// replicate across fifth dimension
|
// replicate across fifth dimension
|
||||||
@ -146,11 +145,10 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CHECK
|
#ifdef CHECK
|
||||||
if (1)
|
if (1) {
|
||||||
{
|
|
||||||
ref = zero;
|
ref = zero;
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||||
|
|
||||||
@ -194,20 +192,19 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
Counter.Report();
|
Counter.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! report )
|
if ( ! report ) {
|
||||||
{
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double flops=1344*volume*ncall;
|
||||||
double flops=1344*volume*ncall;
|
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
|
||||||
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CHECK
|
#ifdef CHECK
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
RealD errd = norm2(err);
|
RealD errd = norm2(err);
|
||||||
if ( errd> 1.0e-4 ) {
|
if ( errd> 1.0e-4 ) {
|
||||||
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
LatticeFermion src_e (FrbGrid);
|
LatticeFermion src_e (FrbGrid);
|
||||||
@ -233,10 +230,9 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
std::cout<< flops/(t1-t0);
|
std::cout<< flops/(t1-t0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef CHECK_SDW
|
#define CHECK_SDW
|
||||||
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -244,7 +240,9 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
|||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
@ -278,93 +276,89 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
LatticeFermionF ssrc(sFGrid);
|
LatticeFermion ssrc(sFGrid);
|
||||||
LatticeFermionF sref(sFGrid);
|
LatticeFermion sref(sFGrid);
|
||||||
LatticeFermionF sresult(sFGrid);
|
LatticeFermion sresult(sFGrid);
|
||||||
WilsonFermion5DF sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5);
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
for(int x=0;x<latt4[0];x++){
|
for(int x=0;x<latt4[0];x++){
|
||||||
for(int y=0;y<latt4[1];y++){
|
for(int y=0;y<latt4[1];y++){
|
||||||
for(int z=0;z<latt4[2];z++){
|
for(int z=0;z<latt4[2];z++){
|
||||||
for(int t=0;t<latt4[3];t++){
|
for(int t=0;t<latt4[3];t++){
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
std::vector<int> site({s,x,y,z,t});
|
std::vector<int> site({s,x,y,z,t});
|
||||||
SpinColourVectorF tmp;
|
SpinColourVector tmp;
|
||||||
peekSite(tmp,src,site);
|
peekSite(tmp,src,site);
|
||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
|
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
|
||||||
#ifdef TIMERS_OFF
|
#ifdef TIMERS_OFF
|
||||||
int ncall =10;
|
int ncall =10;
|
||||||
#else
|
#else
|
||||||
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PerformanceCounter Counter(8);
|
PerformanceCounter Counter(8);
|
||||||
Counter.Start();
|
Counter.Start();
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
}
|
}
|
||||||
t1=usecond();
|
t1=usecond();
|
||||||
Counter.Stop();
|
Counter.Stop();
|
||||||
|
|
||||||
|
if ( report ) {
|
||||||
|
Counter.Report();
|
||||||
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
|
}
|
||||||
|
|
||||||
if ( report ) {
|
LatticeFermion sr_eo(sFGrid);
|
||||||
Counter.Report();
|
LatticeFermion serr(sFGrid);
|
||||||
} else {
|
|
||||||
|
LatticeFermion ssrc_e (sFrbGrid);
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
LatticeFermion ssrc_o (sFrbGrid);
|
||||||
double flops=1344*volume*ncall;
|
LatticeFermion sr_e (sFrbGrid);
|
||||||
std::cout<<"\t"<< flops/(t1-t0);
|
LatticeFermion sr_o (sFrbGrid);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
LatticeFermionF sr_eo(sFGrid);
|
|
||||||
LatticeFermionF serr(sFGrid);
|
|
||||||
|
|
||||||
LatticeFermion ssrc_e (sFrbGrid);
|
|
||||||
LatticeFermion ssrc_o (sFrbGrid);
|
|
||||||
LatticeFermion sr_e (sFrbGrid);
|
|
||||||
LatticeFermion sr_o (sFrbGrid);
|
|
||||||
|
|
||||||
pickCheckerboard(Even,ssrc_e,ssrc);
|
pickCheckerboard(Even,ssrc_e,ssrc);
|
||||||
pickCheckerboard(Odd,ssrc_o,ssrc);
|
pickCheckerboard(Odd,ssrc_o,ssrc);
|
||||||
|
|
||||||
setCheckerboard(sr_eo,ssrc_o);
|
setCheckerboard(sr_eo,ssrc_o);
|
||||||
setCheckerboard(sr_eo,ssrc_e);
|
setCheckerboard(sr_eo,ssrc_e);
|
||||||
|
|
||||||
sr_e = zero;
|
|
||||||
sr_o = zero;
|
|
||||||
|
|
||||||
|
sr_e = zero;
|
||||||
|
sr_o = zero;
|
||||||
|
|
||||||
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
|
PerformanceCounter CounterSdw(8);
|
||||||
|
CounterSdw.Start();
|
||||||
|
t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
PerformanceCounter CounterSdw(8);
|
__SSC_STOP;
|
||||||
CounterSdw.Start();
|
}
|
||||||
t0=usecond();
|
t1=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
CounterSdw.Stop();
|
||||||
__SSC_START;
|
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
|
||||||
__SSC_STOP;
|
|
||||||
}
|
|
||||||
t1=usecond();
|
|
||||||
CounterSdw.Stop();
|
|
||||||
|
|
||||||
if ( report ) {
|
if ( report ) {
|
||||||
CounterSdw.Report();
|
CounterSdw.Report();
|
||||||
} else {
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
std::cout<<"\t"<< flops/(t1-t0);
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -26,7 +26,7 @@ Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
117
benchmarks/Benchmark_wilson_sweep.cc
Normal file
117
benchmarks/Benchmark_wilson_sweep.cc
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||||
|
Copyright (C) 2015
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Richard Rollins <rprollins@users.noreply.github.com>
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
bool overlapComms = false;
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag );
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
|
||||||
|
typename WilsonFermionR::ImplParams params;
|
||||||
|
params.overlapCommsCompute = overlapComms;
|
||||||
|
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
RealD mass = 0.1;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
|
||||||
|
int Lmax = 32;
|
||||||
|
int dmin = 0;
|
||||||
|
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
||||||
|
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
||||||
|
for (int L=8; L<=Lmax; L*=2)
|
||||||
|
{
|
||||||
|
std::vector<int> latt_size = std::vector<int>(4,L);
|
||||||
|
for(int d=4; d>dmin; d--)
|
||||||
|
{
|
||||||
|
if ( d<=3 ) { latt_size[d] *= 2; }
|
||||||
|
|
||||||
|
std::cout << GridLogMessage;
|
||||||
|
std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
|
||||||
|
std::cout << latt_size.back() << "\t\t";
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||||
|
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||||
|
LatticeFermion src(&Grid); random(pRNG,src);
|
||||||
|
LatticeFermion result(&Grid); result=zero;
|
||||||
|
|
||||||
|
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
|
||||||
|
|
||||||
|
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
|
||||||
|
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerNo);
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerYes);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag )
|
||||||
|
{
|
||||||
|
int ncall = 1000;
|
||||||
|
double t0 = usecond();
|
||||||
|
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
|
||||||
|
double t1 = usecond();
|
||||||
|
double flops = 1344 * volume * ncall;
|
||||||
|
std::cout << flops/(t1-t0) << "\t\t";
|
||||||
|
}
|
@ -25,8 +25,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -41,14 +40,20 @@ int main(int argc,char **argv)
|
|||||||
std::ofstream os("zmm.dat");
|
std::ofstream os("zmm.dat");
|
||||||
|
|
||||||
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
|
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
|
||||||
for(int L=4;L<=32;L+=4){
|
for(int L=4;L<=32;L+=4){
|
||||||
for(int m=1;m<=2;m++){
|
for(int m=1;m<=2;m++){
|
||||||
for(int Ls=8;Ls<=16;Ls+=8){
|
for(int Ls=8;Ls<=16;Ls+=8){
|
||||||
std::vector<int> grid({L,L,m*L,m*L});
|
std::vector<int> grid({L,L,m*L,m*L});
|
||||||
|
std::cout << GridLogMessage <<"\t";
|
||||||
for(int i=0;i<4;i++) {
|
for(int i=0;i<4;i++) {
|
||||||
std::cout << grid[i]<<"x";
|
std::cout << grid[i]<<"x";
|
||||||
}
|
}
|
||||||
std::cout << Ls<<std::endl;
|
std::cout << Ls<<"\t\t";
|
||||||
bench(os,grid,Ls);
|
bench(os,grid,Ls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -105,7 +110,6 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
|
||||||
int ncall=50;
|
int ncall=50;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
@ -117,7 +121,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
double flops=1344*volume/2;
|
double flops=1344*volume/2;
|
||||||
|
|
||||||
mfc = flops*ncall/(t1-t0);
|
mfc = flops*ncall/(t1-t0);
|
||||||
std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s = "<< mfc<<std::endl;
|
std::cout<<mfc<<"\t\t";
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=1;
|
QCD::WilsonKernelsStatic::AsmOpt=1;
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
@ -126,7 +130,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
}
|
}
|
||||||
t1=usecond();
|
t1=usecond();
|
||||||
mfa = flops*ncall/(t1-t0);
|
mfa = flops*ncall/(t1-t0);
|
||||||
std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s = "<< mfa<<std::endl;
|
std::cout<<mfa<<"\t\t";
|
||||||
/*
|
/*
|
||||||
int dag=DaggerNo;
|
int dag=DaggerNo;
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
@ -164,8 +168,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
//resulta = (-0.5) * resulta;
|
//resulta = (-0.5) * resulta;
|
||||||
|
|
||||||
diff = resulto-resulta;
|
diff = resulto-resulta;
|
||||||
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
|
std::cout<<norm2(diff)<<std::endl;
|
||||||
std::cout<<std::endl;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
|
|
||||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_dwf_ntpf Benchmark_dwf_sweep Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson Benchmark_zmm
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
|
||||||
Benchmark_comms_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_dwf_SOURCES=Benchmark_dwf.cc
|
|
||||||
Benchmark_dwf_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_dwf_ntpf_SOURCES=Benchmark_dwf_ntpf.cc
|
|
||||||
Benchmark_dwf_ntpf_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_dwf_sweep_SOURCES=Benchmark_dwf_sweep.cc
|
|
||||||
Benchmark_dwf_sweep_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
|
|
||||||
Benchmark_memory_asynch_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
|
||||||
Benchmark_memory_bandwidth_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_su3_SOURCES=Benchmark_su3.cc
|
|
||||||
Benchmark_su3_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
|
|
||||||
Benchmark_wilson_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_zmm_SOURCES=Benchmark_zmm.cc
|
|
||||||
Benchmark_zmm_LDADD=-lGrid
|
|
||||||
|
|
@ -1,8 +1 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/lib
|
|
||||||
AM_LDFLAGS = -L$(top_builddir)/lib
|
|
||||||
|
|
||||||
#
|
|
||||||
# Test code
|
|
||||||
#
|
|
||||||
include Make.inc
|
include Make.inc
|
||||||
|
19
bootstrap.sh
Executable file
19
bootstrap.sh
Executable file
@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
|
||||||
|
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
|
||||||
|
|
||||||
|
echo "-- deploying Eigen source..."
|
||||||
|
wget ${EIGEN_URL} --no-check-certificate
|
||||||
|
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
|
||||||
|
rm `basename ${EIGEN_URL}`
|
||||||
|
|
||||||
|
echo "-- copying fftw prototypes..."
|
||||||
|
wget ${FFTW_URL}
|
||||||
|
./scripts/update_fftw.sh `basename ${FFTW_URL}`
|
||||||
|
rm `basename ${FFTW_URL}`
|
||||||
|
|
||||||
|
echo '-- generating Make.inc files...'
|
||||||
|
./scripts/filelist
|
||||||
|
echo '-- generating configure script...'
|
||||||
|
autoreconf -fvi
|
445
configure.ac
445
configure.ac
@ -1,315 +1,362 @@
|
|||||||
# -*- Autoconf -*-
|
|
||||||
# Process this file with autoconf to produce a configure script.
|
|
||||||
#
|
|
||||||
# Project Grid package
|
|
||||||
#
|
|
||||||
# Time-stamp: <2015-07-10 17:46:21 neo>
|
|
||||||
|
|
||||||
AC_PREREQ([2.63])
|
AC_PREREQ([2.63])
|
||||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_BUILD
|
||||||
|
AC_CANONICAL_HOST
|
||||||
|
AC_CANONICAL_TARGET
|
||||||
AM_INIT_AUTOMAKE(subdir-objects)
|
AM_INIT_AUTOMAKE(subdir-objects)
|
||||||
AC_CONFIG_MACRO_DIR([m4])
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
AC_CONFIG_SRCDIR([lib/Grid.h])
|
AC_CONFIG_SRCDIR([lib/Grid.h])
|
||||||
AC_CONFIG_HEADERS([lib/Config.h])
|
AC_CONFIG_HEADERS([lib/Config.h])
|
||||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
AC_MSG_NOTICE([
|
|
||||||
|
|
||||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
############### Checks for programs
|
||||||
Configuring $PACKAGE v$VERSION for $host
|
|
||||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
])
|
|
||||||
|
|
||||||
# Checks for programs.
|
|
||||||
AC_LANG(C++)
|
AC_LANG(C++)
|
||||||
|
CXXFLAGS="-O3 $CXXFLAGS"
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
AC_OPENMP
|
|
||||||
AC_PROG_RANLIB
|
AC_PROG_RANLIB
|
||||||
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
|
||||||
AX_EXT
|
|
||||||
|
|
||||||
# Checks for libraries.
|
############ openmp ###############
|
||||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
AC_OPENMP
|
||||||
|
|
||||||
# Checks for header files.
|
ac_openmp=no
|
||||||
|
|
||||||
|
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
||||||
|
ac_openmp=yes
|
||||||
|
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
############### Checks for header files
|
||||||
AC_CHECK_HEADERS(stdint.h)
|
AC_CHECK_HEADERS(stdint.h)
|
||||||
AC_CHECK_HEADERS(mm_malloc.h)
|
AC_CHECK_HEADERS(mm_malloc.h)
|
||||||
AC_CHECK_HEADERS(malloc/malloc.h)
|
AC_CHECK_HEADERS(malloc/malloc.h)
|
||||||
AC_CHECK_HEADERS(malloc.h)
|
AC_CHECK_HEADERS(malloc.h)
|
||||||
AC_CHECK_HEADERS(endian.h)
|
AC_CHECK_HEADERS(endian.h)
|
||||||
AC_CHECK_HEADERS(execinfo.h)
|
AC_CHECK_HEADERS(execinfo.h)
|
||||||
AC_CHECK_HEADERS(gmp.h)
|
|
||||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
||||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
||||||
|
|
||||||
# Checks for typedefs, structures, and compiler characteristics.
|
############### Checks for typedefs, structures, and compiler characteristics
|
||||||
AC_TYPE_SIZE_T
|
AC_TYPE_SIZE_T
|
||||||
AC_TYPE_UINT32_T
|
AC_TYPE_UINT32_T
|
||||||
AC_TYPE_UINT64_T
|
AC_TYPE_UINT64_T
|
||||||
|
|
||||||
# Checks for library functions.
|
############### GMP and MPFR #################
|
||||||
echo
|
AC_ARG_WITH([gmp],
|
||||||
echo Checking libraries
|
[AS_HELP_STRING([--with-gmp=prefix],
|
||||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
[try this for a non-standard install prefix of the GMP library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"])
|
||||||
|
AC_ARG_WITH([mpfr],
|
||||||
|
[AS_HELP_STRING([--with-mpfr=prefix],
|
||||||
|
[try this for a non-standard install prefix of the MPFR library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
|
################## lapack ####################
|
||||||
|
AC_ARG_ENABLE([lapack],
|
||||||
|
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||||
|
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
|
||||||
|
|
||||||
|
case ${ac_LAPACK} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
|
*)
|
||||||
|
AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
|
||||||
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
|
||||||
|
esac
|
||||||
|
|
||||||
|
################## first-touch ####################
|
||||||
|
AC_ARG_ENABLE([numa],
|
||||||
|
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||||
|
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
|
||||||
|
|
||||||
|
case ${ac_NUMA} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
||||||
|
*)
|
||||||
|
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
################## FFTW3 ####################
|
||||||
|
AC_ARG_WITH([fftw],
|
||||||
|
[AS_HELP_STRING([--with-fftw=prefix],
|
||||||
|
[try this for a non-standard install prefix of the FFTW3 library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
|
################ Get compiler informations
|
||||||
|
AC_LANG([C++])
|
||||||
|
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
||||||
|
AX_COMPILER_VENDOR
|
||||||
|
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
|
||||||
|
[vendor of C++ compiler that will compile the code])
|
||||||
|
AX_GXX_VERSION
|
||||||
|
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
||||||
|
[version of g++ that will compile the code])
|
||||||
|
|
||||||
|
############### Checks for library functions
|
||||||
|
CXXFLAGS_CPY=$CXXFLAGS
|
||||||
|
LDFLAGS_CPY=$LDFLAGS
|
||||||
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
AC_CHECK_LIB([gmp],[__gmpf_init],
|
||||||
|
[AC_CHECK_LIB([mpfr],[mpfr_init],
|
||||||
|
[AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])]
|
||||||
|
[have_mpfr=true]
|
||||||
|
[LIBS="$LIBS -lmpfr"],
|
||||||
|
[AC_MSG_ERROR([MPFR library not found])])]
|
||||||
|
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
|
||||||
|
[have_gmp=true]
|
||||||
|
[LIBS="$LIBS -lgmp"],
|
||||||
|
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
|
||||||
|
|
||||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
if test "${ac_LAPACK}x" != "nox"; then
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
|
||||||
#Please install or provide the correct path to your installation
|
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
||||||
#Info at: http://www.gmplib.org)])
|
fi
|
||||||
|
AC_CHECK_LIB([fftw3],[fftw_execute],
|
||||||
|
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
|
||||||
|
[have_fftw=true]
|
||||||
|
[LIBS="$LIBS -lfftw3 -lfftw3f"],
|
||||||
|
[AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
|
||||||
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
|
|
||||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
############### SIMD instruction selection
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\
|
||||||
#Please install or provide the correct path to your installation
|
|
||||||
#Info at: http://www.mpfr.org/)])
|
|
||||||
|
|
||||||
#
|
|
||||||
# SIMD instructions selection
|
|
||||||
#
|
|
||||||
|
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
|
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
||||||
[ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG])
|
[ac_SIMD=${enable_simd}],[ac_SIMD=GEN])
|
||||||
|
|
||||||
supported=no
|
case ${ax_cv_cxx_compiler_vendor} in
|
||||||
|
clang|gnu)
|
||||||
ac_ZMM=no;
|
case ${ac_SIMD} in
|
||||||
|
SSE4)
|
||||||
|
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||||
|
SIMD_FLAGS='-msse4.2';;
|
||||||
|
AVX)
|
||||||
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx';;
|
||||||
|
AVXFMA4)
|
||||||
|
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
|
||||||
|
SIMD_FLAGS='-mavx -mfma4';;
|
||||||
|
AVXFMA)
|
||||||
|
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
|
||||||
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
|
AVX2)
|
||||||
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx2 -mfma';;
|
||||||
|
AVX512|AVX512MIC|KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
||||||
|
IMCI|KNC)
|
||||||
|
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
GEN)
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
QPX|BGQ)
|
||||||
|
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
|
||||||
|
esac;;
|
||||||
|
intel)
|
||||||
|
case ${ac_SIMD} in
|
||||||
|
SSE4)
|
||||||
|
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||||
|
SIMD_FLAGS='-msse4.2 -xsse4.2';;
|
||||||
|
AVX)
|
||||||
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx -xavx';;
|
||||||
|
AVXFMA4)
|
||||||
|
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
|
||||||
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
|
AVXFMA)
|
||||||
|
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
|
||||||
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
|
AVX2)
|
||||||
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
|
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
|
||||||
|
AVX512)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-xcore-avx512';;
|
||||||
|
AVX512MIC|KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
|
||||||
|
SIMD_FLAGS='-xmic-avx512';;
|
||||||
|
IMCI|KNC)
|
||||||
|
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
GEN)
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);;
|
||||||
|
esac;;
|
||||||
|
*)
|
||||||
|
AC_MSG_WARN([Compiler unknown, using generic vector code])
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);;
|
||||||
|
esac
|
||||||
|
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
||||||
|
|
||||||
case ${ac_SIMD} in
|
case ${ac_SIMD} in
|
||||||
SSE4)
|
AVX512|AVX512MIC|KNL)
|
||||||
echo Configuring for SSE4
|
AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
|
||||||
AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
|
*)
|
||||||
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
|
;;
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support SSE4 instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX)
|
|
||||||
echo Configuring for AVX
|
|
||||||
AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
|
|
||||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVXFMA4)
|
|
||||||
echo Configuring for AVX
|
|
||||||
AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
|
|
||||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX2)
|
|
||||||
echo Configuring for AVX2
|
|
||||||
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
|
|
||||||
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX512)
|
|
||||||
echo Configuring for AVX512
|
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
|
|
||||||
supported="cross compilation"
|
|
||||||
ac_ZMM=yes;
|
|
||||||
;;
|
|
||||||
IMCI)
|
|
||||||
echo Configuring for IMCI
|
|
||||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
|
||||||
supported="cross compilation"
|
|
||||||
ac_ZMM=no;
|
|
||||||
;;
|
|
||||||
NEONv8)
|
|
||||||
echo Configuring for experimental ARMv8a support
|
|
||||||
AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
|
|
||||||
supported="cross compilation"
|
|
||||||
;;
|
|
||||||
DEBUG)
|
|
||||||
echo Configuring without SIMD support - only for compiler DEBUGGING!
|
|
||||||
AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]);
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
|
|
||||||
case ${ac_ZMM} in
|
############### precision selection
|
||||||
yes)
|
|
||||||
echo Enabling ZMM source code
|
|
||||||
;;
|
|
||||||
no)
|
|
||||||
echo Disabling ZMM source code
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ])
|
|
||||||
|
|
||||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||||
case ${ac_PRECISION} in
|
case ${ac_PRECISION} in
|
||||||
single)
|
single)
|
||||||
echo default precision is single
|
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||||
;;
|
;;
|
||||||
double)
|
double)
|
||||||
echo default precision is double
|
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
#
|
############### communication type selection
|
||||||
# Comms selection
|
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
||||||
#
|
|
||||||
|
|
||||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
|
||||||
|
|
||||||
case ${ac_COMMS} in
|
case ${ac_COMMS} in
|
||||||
none)
|
none)
|
||||||
echo Configuring for NO communications
|
|
||||||
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
||||||
;;
|
;;
|
||||||
|
mpi-auto)
|
||||||
|
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||||
|
LX_FIND_MPI
|
||||||
|
if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
|
||||||
|
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
|
||||||
|
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
|
||||||
|
LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS"
|
||||||
|
;;
|
||||||
mpi)
|
mpi)
|
||||||
echo Configuring for MPI communications
|
|
||||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||||
;;
|
;;
|
||||||
shmem)
|
shmem)
|
||||||
echo Configuring for SHMEM communications
|
|
||||||
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
|
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
|
||||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
|
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
|
||||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
||||||
|
|
||||||
#
|
############### RNG selection
|
||||||
# RNG selection
|
|
||||||
#
|
|
||||||
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
|
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
|
||||||
[Select Random Number Generator to be used])],\
|
[Select Random Number Generator to be used])],\
|
||||||
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
||||||
|
|
||||||
case ${ac_RNG} in
|
case ${ac_RNG} in
|
||||||
ranlux48)
|
ranlux48)
|
||||||
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
|
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
|
||||||
;;
|
;;
|
||||||
mt19937)
|
mt19937)
|
||||||
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
|
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
#
|
############### timer option
|
||||||
# SDE timing mode
|
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
|
||||||
#
|
|
||||||
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers=yes|no],\
|
|
||||||
[Enable system dependent high res timers])],\
|
[Enable system dependent high res timers])],\
|
||||||
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
|
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
|
||||||
case ${ac_TIMERS} in
|
case ${ac_TIMERS} in
|
||||||
yes)
|
yes)
|
||||||
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
|
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
|
||||||
;;
|
;;
|
||||||
no)
|
no)
|
||||||
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
|
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
#
|
############### Chroma regression test
|
||||||
# Chroma regression tests
|
|
||||||
#
|
|
||||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
||||||
|
|
||||||
case ${ac_CHROMA} in
|
case ${ac_CHROMA} in
|
||||||
yes)
|
yes|no)
|
||||||
echo Enabling tests regressing to Chroma
|
|
||||||
;;
|
|
||||||
no)
|
|
||||||
echo Disabling tests regressing to Chroma
|
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
||||||
|
|
||||||
#
|
############### Doxygen
|
||||||
# Lapack
|
AC_PROG_DOXYGEN
|
||||||
#
|
|
||||||
AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
|
|
||||||
|
|
||||||
case ${ac_LAPACK} in
|
if test -n "$DOXYGEN"
|
||||||
yes)
|
then
|
||||||
echo Enabling lapack
|
AC_CONFIG_FILES([docs/doxy.cfg])
|
||||||
;;
|
fi
|
||||||
no)
|
|
||||||
echo Disabling lapack
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo Enabling lapack at ${ac_LAPACK}
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
|
############### Ouput
|
||||||
AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
|
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
|
||||||
|
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
|
||||||
###################################################################
|
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
|
||||||
# Checks for doxygen support
|
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
|
||||||
# if present enables the "make doxyfile" command
|
AC_SUBST([AM_CFLAGS])
|
||||||
#echo
|
AC_SUBST([AM_CXXFLAGS])
|
||||||
#echo Checking doxygen support
|
AC_SUBST([AM_LDFLAGS])
|
||||||
#echo :::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
#AC_PROG_DOXYGEN
|
|
||||||
|
|
||||||
#if test -n "$DOXYGEN"
|
|
||||||
#then
|
|
||||||
#AC_CONFIG_FILES([docs/doxy.cfg])
|
|
||||||
#fi
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo Creating configuration files
|
|
||||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
AC_CONFIG_FILES(Makefile)
|
AC_CONFIG_FILES(Makefile)
|
||||||
AC_CONFIG_FILES(lib/Makefile)
|
AC_CONFIG_FILES(lib/Makefile)
|
||||||
AC_CONFIG_FILES(tests/Makefile)
|
AC_CONFIG_FILES(tests/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/IO/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/core/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/debug/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/forces/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/hmc/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/solver/Makefile)
|
||||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
||||||
AC_CONFIG_FILES(benchmarks/Makefile)
|
AC_CONFIG_FILES(benchmarks/Makefile)
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|
||||||
|
|
||||||
echo "
|
echo "
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
Summary of configuration for $PACKAGE v$VERSION
|
Summary of configuration for $PACKAGE v$VERSION
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
The following features are enabled:
|
----- PLATFORM ----------------------------------------
|
||||||
|
|
||||||
- architecture (build) : $build_cpu
|
- architecture (build) : $build_cpu
|
||||||
- os (build) : $build_os
|
- os (build) : $build_os
|
||||||
- architecture (target) : $target_cpu
|
- architecture (target) : $target_cpu
|
||||||
- os (target) : $target_os
|
- os (target) : $target_os
|
||||||
|
- compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||||
|
- compiler version : ${ax_cv_gxx_version}
|
||||||
|
----- BUILD OPTIONS -----------------------------------
|
||||||
|
- SIMD : ${ac_SIMD}
|
||||||
|
- Threading : ${ac_openmp}
|
||||||
|
- Communications type : ${ac_COMMS}
|
||||||
|
- Default precision : ${ac_PRECISION}
|
||||||
|
- RNG choice : ${ac_RNG}
|
||||||
|
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||||
|
- LAPACK : ${ac_LAPACK}
|
||||||
|
- FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||||
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
||||||
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
||||||
- Supported SIMD flags : $SIMD_FLAGS
|
----- BUILD FLAGS -------------------------------------
|
||||||
----------------------------------------------------------
|
- CXXFLAGS:
|
||||||
- enabled simd support : ${ac_SIMD} (config macro says supported: $supported )
|
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
- communications type : ${ac_COMMS}
|
- LDFLAGS:
|
||||||
- default precision : ${ac_PRECISION}
|
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
- RNG choice : ${ac_RNG}
|
- LIBS:
|
||||||
- LAPACK : ${ac_LAPACK}
|
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
"
|
"
|
||||||
|
1
include/Grid
Symbolic link
1
include/Grid
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../lib
|
@ -29,27 +29,28 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_ALGORITHMS_H
|
#ifndef GRID_ALGORITHMS_H
|
||||||
#define GRID_ALGORITHMS_H
|
#define GRID_ALGORITHMS_H
|
||||||
|
|
||||||
#include <algorithms/SparseMatrix.h>
|
#include <Grid/algorithms/SparseMatrix.h>
|
||||||
#include <algorithms/LinearOperator.h>
|
#include <Grid/algorithms/LinearOperator.h>
|
||||||
#include <algorithms/Preconditioner.h>
|
#include <Grid/algorithms/Preconditioner.h>
|
||||||
|
|
||||||
#include <algorithms/approx/Zolotarev.h>
|
#include <Grid/algorithms/approx/Zolotarev.h>
|
||||||
#include <algorithms/approx/Chebyshev.h>
|
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||||
#include <algorithms/approx/Remez.h>
|
#include <Grid/algorithms/approx/Remez.h>
|
||||||
#include <algorithms/approx/MultiShiftFunction.h>
|
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradient.h>
|
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||||
#include <algorithms/iterative/ConjugateResidual.h>
|
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||||
#include <algorithms/iterative/NormalEquations.h>
|
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||||
#include <algorithms/iterative/SchurRedBlack.h>
|
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||||
|
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||||
|
|
||||||
// Lanczos support
|
// Lanczos support
|
||||||
#include <algorithms/iterative/MatrixUtils.h>
|
#include <Grid/algorithms/iterative/MatrixUtils.h>
|
||||||
#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||||
|
|
||||||
#include <algorithms/CoarsenedMatrix.h>
|
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||||
|
|
||||||
// Eigen/lanczos
|
// Eigen/lanczos
|
||||||
// EigCg
|
// EigCg
|
||||||
|
@ -113,9 +113,8 @@ public:
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
_Tp tmp;
|
_Tp tmp;
|
||||||
#undef FIRST_TOUCH_OPTIMISE
|
#ifdef GRID_NUMA
|
||||||
#ifdef FIRST_TOUCH_OPTIMISE
|
#pragma omp parallel for schedule(static)
|
||||||
#pragma omp parallel for
|
|
||||||
for(int i=0;i<__n;i++){
|
for(int i=0;i<__n;i++){
|
||||||
ptr[i]=tmp;
|
ptr[i]=tmp;
|
||||||
}
|
}
|
||||||
|
@ -28,8 +28,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_CARTESIAN_H
|
#ifndef GRID_CARTESIAN_H
|
||||||
#define GRID_CARTESIAN_H
|
#define GRID_CARTESIAN_H
|
||||||
|
|
||||||
#include <cartesian/Cartesian_base.h>
|
#include <Grid/cartesian/Cartesian_base.h>
|
||||||
#include <cartesian/Cartesian_full.h>
|
#include <Grid/cartesian/Cartesian_full.h>
|
||||||
#include <cartesian/Cartesian_red_black.h>
|
#include <Grid/cartesian/Cartesian_red_black.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_COMMUNICATOR_H
|
#ifndef GRID_COMMUNICATOR_H
|
||||||
#define GRID_COMMUNICATOR_H
|
#define GRID_COMMUNICATOR_H
|
||||||
|
|
||||||
#include <communicator/Communicator_base.h>
|
#include <Grid/communicator/Communicator_base.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -28,17 +28,17 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef _GRID_CSHIFT_H_
|
#ifndef _GRID_CSHIFT_H_
|
||||||
#define _GRID_CSHIFT_H_
|
#define _GRID_CSHIFT_H_
|
||||||
|
|
||||||
#include <cshift/Cshift_common.h>
|
#include <Grid/cshift/Cshift_common.h>
|
||||||
|
|
||||||
#ifdef GRID_COMMS_NONE
|
#ifdef GRID_COMMS_NONE
|
||||||
#include <cshift/Cshift_none.h>
|
#include <Grid/cshift/Cshift_none.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
#include <cshift/Cshift_mpi.h>
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
#include <cshift/Cshift_mpi.h> // uses same implementation of communicator
|
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
276
lib/FFT.h
Normal file
276
lib/FFT.h
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Cshift.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef _GRID_FFT_H_
|
||||||
|
#define _GRID_FFT_H_
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
#include <fftw3.h>
|
||||||
|
#endif
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class scalar> struct FFTW { };
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
template<> struct FFTW<ComplexD> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftw_complex FFTW_scalar;
|
||||||
|
typedef fftw_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftw_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftw_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct FFTW<ComplexF> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftwf_complex FFTW_scalar;
|
||||||
|
typedef fftwf_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftwf_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftwf_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftwf_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FFTW_FORWARD
|
||||||
|
#define FFTW_FORWARD (-1)
|
||||||
|
#define FFTW_BACKWARD (+1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
class FFT {
|
||||||
|
private:
|
||||||
|
|
||||||
|
GridCartesian *vgrid;
|
||||||
|
GridCartesian *sgrid;
|
||||||
|
|
||||||
|
int Nd;
|
||||||
|
double flops;
|
||||||
|
double flops_call;
|
||||||
|
uint64_t usec;
|
||||||
|
|
||||||
|
std::vector<int> dimensions;
|
||||||
|
std::vector<int> processors;
|
||||||
|
std::vector<int> processor_coor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
static const int forward=FFTW_FORWARD;
|
||||||
|
static const int backward=FFTW_BACKWARD;
|
||||||
|
|
||||||
|
double Flops(void) {return flops;}
|
||||||
|
double MFlops(void) {return flops/usec;}
|
||||||
|
|
||||||
|
FFT ( GridCartesian * grid ) :
|
||||||
|
vgrid(grid),
|
||||||
|
Nd(grid->_ndimension),
|
||||||
|
dimensions(grid->_fdimensions),
|
||||||
|
processors(grid->_processors),
|
||||||
|
processor_coor(grid->_processor_coor)
|
||||||
|
{
|
||||||
|
flops=0;
|
||||||
|
usec =0;
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
sgrid = new GridCartesian(dimensions,layout,processors);
|
||||||
|
};
|
||||||
|
|
||||||
|
~FFT ( void) {
|
||||||
|
delete sgrid;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
|
||||||
|
|
||||||
|
conformable(result._grid,vgrid);
|
||||||
|
conformable(source._grid,vgrid);
|
||||||
|
|
||||||
|
int L = vgrid->_ldimensions[dim];
|
||||||
|
int G = vgrid->_fdimensions[dim];
|
||||||
|
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
std::vector<int> pencil_gd(vgrid->_fdimensions);
|
||||||
|
|
||||||
|
pencil_gd[dim] = G*processors[dim];
|
||||||
|
|
||||||
|
// Pencil global vol LxLxGxLxL per node
|
||||||
|
GridCartesian pencil_g(pencil_gd,layout,processors);
|
||||||
|
|
||||||
|
// Construct pencils
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename sobj::scalar_type scalar;
|
||||||
|
|
||||||
|
Lattice<vobj> ssource(vgrid); ssource =source;
|
||||||
|
Lattice<sobj> pgsource(&pencil_g);
|
||||||
|
Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
|
||||||
|
|
||||||
|
#ifndef HAVE_FFTW
|
||||||
|
assert(0);
|
||||||
|
#else
|
||||||
|
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||||
|
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||||
|
|
||||||
|
{
|
||||||
|
int Ncomp = sizeof(sobj)/sizeof(scalar);
|
||||||
|
int Nlow = 1;
|
||||||
|
for(int d=0;d<dim;d++){
|
||||||
|
Nlow*=vgrid->_ldimensions[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
int rank = 1; /* 1d transforms */
|
||||||
|
int n[] = {G}; /* 1d transforms of length G */
|
||||||
|
int howmany = Ncomp;
|
||||||
|
int odist,idist,istride,ostride;
|
||||||
|
idist = odist = 1; /* Distance between consecutive FT's */
|
||||||
|
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
|
||||||
|
int *inembed = n, *onembed = n;
|
||||||
|
|
||||||
|
|
||||||
|
int sign = FFTW_FORWARD;
|
||||||
|
if (inverse) sign = FFTW_BACKWARD;
|
||||||
|
|
||||||
|
FFTW_plan p;
|
||||||
|
{
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
|
||||||
|
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
|
||||||
|
in,inembed,
|
||||||
|
istride,idist,
|
||||||
|
out,onembed,
|
||||||
|
ostride, odist,
|
||||||
|
sign,FFTW_ESTIMATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
double add,mul,fma;
|
||||||
|
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
|
||||||
|
flops_call = add+mul+2.0*fma;
|
||||||
|
|
||||||
|
GridStopWatch timer;
|
||||||
|
|
||||||
|
// Barrel shift and collect global pencil
|
||||||
|
for(int p=0;p<processors[dim];p++) {
|
||||||
|
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
|
||||||
|
std::vector<int> lcoor(Nd);
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
peekLocalSite(s,ssource,lcoor);
|
||||||
|
|
||||||
|
lcoor[dim]+=p*L;
|
||||||
|
|
||||||
|
pokeLocalSite(s,pgsource,lcoor);
|
||||||
|
}
|
||||||
|
|
||||||
|
ssource = Cshift(ssource,dim,L);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over orthog coords
|
||||||
|
int NN=pencil_g.lSites();
|
||||||
|
|
||||||
|
GridStopWatch Timer;
|
||||||
|
Timer.Start();
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<NN;idx++) {
|
||||||
|
|
||||||
|
std::vector<int> lcoor(Nd);
|
||||||
|
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
|
||||||
|
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
|
||||||
|
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Timer.Stop();
|
||||||
|
usec += Timer.useconds();
|
||||||
|
flops+= flops_call*NN;
|
||||||
|
|
||||||
|
int pc = processor_coor[dim];
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
std::vector<int> lcoor(Nd);
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
std::vector<int> gcoor = lcoor;
|
||||||
|
// extract the result
|
||||||
|
sobj s;
|
||||||
|
gcoor[dim] = lcoor[dim]+L*pc;
|
||||||
|
peekLocalSite(s,pgresult,gcoor);
|
||||||
|
pokeLocalSite(s,result,lcoor);
|
||||||
|
}
|
||||||
|
|
||||||
|
FFTW<scalar>::fftw_destroy_plan(p);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
46
lib/Grid.h
46
lib/Grid.h
@ -59,29 +59,31 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
///////////////////
|
///////////////////
|
||||||
// Grid headers
|
// Grid headers
|
||||||
///////////////////
|
///////////////////
|
||||||
#include <serialisation/Serialisation.h>
|
#include <Grid/serialisation/Serialisation.h>
|
||||||
#include <Config.h>
|
#include "Config.h"
|
||||||
#include <Timer.h>
|
#include <Grid/Timer.h>
|
||||||
#include <PerfCount.h>
|
#include <Grid/PerfCount.h>
|
||||||
#include <Log.h>
|
#include <Grid/Log.h>
|
||||||
#include <AlignedAllocator.h>
|
#include <Grid/AlignedAllocator.h>
|
||||||
#include <Simd.h>
|
#include <Grid/Simd.h>
|
||||||
#include <Threads.h>
|
#include <Grid/Threads.h>
|
||||||
#include <Lexicographic.h>
|
#include <Grid/Lexicographic.h>
|
||||||
#include <Communicator.h>
|
#include <Grid/Init.h>
|
||||||
#include <Cartesian.h>
|
#include <Grid/Communicator.h>
|
||||||
#include <Tensors.h>
|
#include <Grid/Cartesian.h>
|
||||||
#include <Lattice.h>
|
#include <Grid/Tensors.h>
|
||||||
#include <Cshift.h>
|
#include <Grid/Lattice.h>
|
||||||
#include <Stencil.h>
|
#include <Grid/Cshift.h>
|
||||||
#include <Algorithms.h>
|
#include <Grid/Stencil.h>
|
||||||
#include <parallelIO/BinaryIO.h>
|
#include <Grid/Algorithms.h>
|
||||||
#include <qcd/QCD.h>
|
#include <Grid/parallelIO/BinaryIO.h>
|
||||||
#include <parallelIO/NerscIO.h>
|
#include <Grid/qcd/QCD.h>
|
||||||
#include <Init.h>
|
#include <Grid/parallelIO/NerscIO.h>
|
||||||
|
|
||||||
#include <qcd/hmc/NerscCheckpointer.h>
|
#include <Grid/FFT.h>
|
||||||
#include <qcd/hmc/HmcRunner.h>
|
|
||||||
|
#include <Grid/qcd/hmc/NerscCheckpointer.h>
|
||||||
|
#include <Grid/qcd/hmc/HmcRunner.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
47
lib/Init.cc
47
lib/Init.cc
@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc,
|
|||||||
assert(ompthreads.size()==1);
|
assert(ompthreads.size()==1);
|
||||||
GridThread::SetThreads(ompthreads[0]);
|
GridThread::SetThreads(ompthreads[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
||||||
std::vector<int> cores(0);
|
std::vector<int> cores(0);
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
||||||
@ -193,7 +194,7 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"--threads n : default number of OMP threads"<<std::endl;
|
std::cout<<GridLogMessage<<"--threads n : default number of OMP threads"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl;
|
std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug"<<std::endl;
|
std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
GridLogConfigure(logstreams);
|
GridLogConfigure(logstreams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||||
Grid_debug_handler_init();
|
Grid_debug_handler_init();
|
||||||
}
|
}
|
||||||
@ -234,26 +234,34 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string COL_RED = GridLogColours.colour["RED"];
|
||||||
|
std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
|
||||||
|
std::string COL_BLACK = GridLogColours.colour["BLACK"];
|
||||||
|
std::string COL_GREEN = GridLogColours.colour["GREEN"];
|
||||||
|
std::string COL_BLUE = GridLogColours.colour["BLUE"];
|
||||||
|
std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
|
||||||
|
std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
|
||||||
|
|
||||||
|
|
||||||
std::cout <<std::endl;
|
std::cout <<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__|__|__|__"<< "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__|__|__|__"<< "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__| | | "<< "| | | "<<Logger::PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ | | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__ "<< " "<<Logger::PURPLE<<" "<< " _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<" GGGG "<<Logger::RED<<" RRRR "<<Logger::BLUE <<" III "<<Logger::PURPLE<<"DDDD "<<Logger::PURPLE<<" _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<"G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D "<<Logger::PURPLE<<" _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<"G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D"<<Logger::PURPLE<<" _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<"G GG "<<Logger::RED<<" RRRR "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D"<<Logger::GREEN <<" _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<"G G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D "<<Logger::GREEN <<" _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<" GGGG "<<Logger::RED<<" R R "<<Logger::BLUE <<" III "<<Logger::PURPLE<<"DDDD "<<Logger::GREEN <<" _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|__ "<< " "<<Logger::GREEN <<" "<< " _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|__|__|__|__"<< "|__|__|_"<<Logger::GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|__|__|__|__"<< "|__|__|_"<<Logger::GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << " | | | | "<< "| | | "<<Logger::GREEN <<" | | |"<< " | | | | "<<std::endl;
|
std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout <<Logger::YELLOW<< std::endl;
|
std::cout <<COL_YELLOW<< std::endl;
|
||||||
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
|
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
|
||||||
std::cout << "Colours by Tadahito Boyle "<<std::endl;
|
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
|
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
|
||||||
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
|
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
|
||||||
@ -264,7 +272,8 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
|
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
|
||||||
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
|
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
|
||||||
std::cout << "GNU General Public License for more details."<<std::endl;
|
std::cout << "GNU General Public License for more details."<<std::endl;
|
||||||
std::cout << Logger::BLACK <<std::endl;
|
std::cout << COL_BACKGROUND <<std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_LATTICE_H
|
#ifndef GRID_LATTICE_H
|
||||||
#define GRID_LATTICE_H
|
#define GRID_LATTICE_H
|
||||||
|
|
||||||
#include <lattice/Lattice_base.h>
|
#include <Grid/lattice/Lattice_base.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
128
lib/Log.cc
128
lib/Log.cc
@ -1,126 +1,92 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/Log.cc
|
Source file: ./lib/Log.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
GridStopWatch Logger::StopWatch;
|
GridStopWatch Logger::StopWatch;
|
||||||
std::ostream Logger::devnull(0);
|
std::ostream Logger::devnull(0);
|
||||||
std::string Logger::BLACK("\033[30m");
|
|
||||||
std::string Logger::RED("\033[31m");
|
|
||||||
std::string Logger::GREEN("\033[32m");
|
|
||||||
std::string Logger::YELLOW("\033[33m");
|
|
||||||
std::string Logger::BLUE("\033[34m");
|
|
||||||
std::string Logger::PURPLE("\033[35m");
|
|
||||||
std::string Logger::CYAN("\033[36m");
|
|
||||||
std::string Logger::WHITE("\033[37m");
|
|
||||||
std::string Logger::NORMAL("\033[0;39m");
|
|
||||||
std::string EMPTY("");
|
|
||||||
|
|
||||||
#if 0
|
Colours GridLogColours(0);
|
||||||
GridLogger GridLogError (1,"Error",Logger::RED);
|
GridLogger GridLogError(1, "Error", GridLogColours, "RED");
|
||||||
GridLogger GridLogWarning (1,"Warning",Logger::YELLOW);
|
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
|
||||||
GridLogger GridLogMessage (1,"Message",Logger::BLACK);
|
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
|
||||||
GridLogger GridLogDebug (1,"Debug",Logger::PURPLE);
|
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE");
|
||||||
GridLogger GridLogPerformance(1,"Performance",Logger::GREEN);
|
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
||||||
GridLogger GridLogIterative (1,"Iterative",Logger::BLUE);
|
GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE");
|
||||||
GridLogger GridLogIntegrator (1,"Integrator",Logger::BLUE);
|
GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE");
|
||||||
#else
|
|
||||||
GridLogger GridLogError (1,"Error",EMPTY);
|
|
||||||
GridLogger GridLogWarning (1,"Warning",EMPTY);
|
|
||||||
GridLogger GridLogMessage (1,"Message",EMPTY);
|
|
||||||
GridLogger GridLogDebug (1,"Debug",EMPTY);
|
|
||||||
GridLogger GridLogPerformance(1,"Performance",EMPTY);
|
|
||||||
GridLogger GridLogIterative (1,"Iterative",EMPTY);
|
|
||||||
GridLogger GridLogIntegrator (1,"Integrator",EMPTY);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void GridLogConfigure(std::vector<std::string> &logstreams)
|
void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||||
{
|
|
||||||
GridLogError.Active(0);
|
GridLogError.Active(0);
|
||||||
GridLogWarning.Active(0);
|
GridLogWarning.Active(0);
|
||||||
GridLogMessage.Active(0);
|
GridLogMessage.Active(1); // at least the messages should be always on
|
||||||
GridLogIterative.Active(0);
|
GridLogIterative.Active(0);
|
||||||
GridLogDebug.Active(0);
|
GridLogDebug.Active(0);
|
||||||
GridLogPerformance.Active(0);
|
GridLogPerformance.Active(0);
|
||||||
GridLogIntegrator.Active(0);
|
GridLogIntegrator.Active(0);
|
||||||
|
GridLogColours.Active(0);
|
||||||
|
|
||||||
int blackAndWhite = 1;
|
for (int i = 0; i < logstreams.size(); i++) {
|
||||||
if(blackAndWhite){
|
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
|
||||||
Logger::BLACK = std::string("");
|
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
|
||||||
Logger::RED =Logger::BLACK;
|
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
|
||||||
Logger::GREEN =Logger::BLACK;
|
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
|
||||||
Logger::YELLOW =Logger::BLACK;
|
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
||||||
Logger::BLUE =Logger::BLACK;
|
if (logstreams[i] == std::string("Performance"))
|
||||||
Logger::PURPLE =Logger::BLACK;
|
GridLogPerformance.Active(1);
|
||||||
Logger::CYAN =Logger::BLACK;
|
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
|
||||||
Logger::WHITE =Logger::BLACK;
|
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
||||||
Logger::NORMAL =Logger::BLACK;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i=0;i<logstreams.size();i++){
|
|
||||||
if ( logstreams[i]== std::string("Error") ) GridLogError.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Warning") ) GridLogWarning.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Message") ) GridLogMessage.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Iterative") ) GridLogIterative.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Debug") ) GridLogDebug.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Integrator" ) ) GridLogIntegrator.Active(1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// Verbose limiter on MPI tasks
|
// Verbose limiter on MPI tasks
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
void Grid_quiesce_nodes(void)
|
void Grid_quiesce_nodes(void) {
|
||||||
{
|
int me = 0;
|
||||||
int me=0;
|
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
me = shmem_my_pe();
|
me = shmem_my_pe();
|
||||||
#endif
|
#endif
|
||||||
if ( me ) {
|
if (me) {
|
||||||
std::cout.setstate(std::ios::badbit);
|
std::cout.setstate(std::ios::badbit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Grid_unquiesce_nodes(void)
|
void Grid_unquiesce_nodes(void) {
|
||||||
{
|
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
std::cout.clear();
|
std::cout.clear();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
158
lib/Log.h
158
lib/Log.h
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -27,6 +27,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#ifndef GRID_LOG_H
|
#ifndef GRID_LOG_H
|
||||||
#define GRID_LOG_H
|
#define GRID_LOG_H
|
||||||
|
|
||||||
@ -34,56 +37,99 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <execinfo.h>
|
#include <execinfo.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||||
int Rank(void); // used for early stage debug before library init
|
int Rank(void); // used for early stage debug before library init
|
||||||
|
|
||||||
|
|
||||||
|
class Colours{
|
||||||
|
protected:
|
||||||
|
bool is_active;
|
||||||
|
public:
|
||||||
|
std::map<std::string, std::string> colour;
|
||||||
|
|
||||||
|
Colours(bool activate=false){
|
||||||
|
Active(activate);
|
||||||
|
};
|
||||||
|
|
||||||
|
void Active(bool activate){
|
||||||
|
is_active=activate;
|
||||||
|
|
||||||
|
if (is_active){
|
||||||
|
colour["BLACK"] ="\033[30m";
|
||||||
|
colour["RED"] ="\033[31m";
|
||||||
|
colour["GREEN"] ="\033[32m";
|
||||||
|
colour["YELLOW"] ="\033[33m";
|
||||||
|
colour["BLUE"] ="\033[34m";
|
||||||
|
colour["PURPLE"] ="\033[35m";
|
||||||
|
colour["CYAN"] ="\033[36m";
|
||||||
|
colour["WHITE"] ="\033[37m";
|
||||||
|
colour["NORMAL"] ="\033[0;39m";
|
||||||
|
} else {
|
||||||
|
colour["BLACK"] ="";
|
||||||
|
colour["RED"] ="";
|
||||||
|
colour["GREEN"] ="";
|
||||||
|
colour["YELLOW"]="";
|
||||||
|
colour["BLUE"] ="";
|
||||||
|
colour["PURPLE"]="";
|
||||||
|
colour["CYAN"] ="";
|
||||||
|
colour["WHITE"] ="";
|
||||||
|
colour["NORMAL"]="";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class Logger {
|
class Logger {
|
||||||
protected:
|
protected:
|
||||||
int active;
|
Colours &Painter;
|
||||||
std::string name, topName, COLOUR;
|
int active;
|
||||||
public:
|
std::string name, topName;
|
||||||
static GridStopWatch StopWatch;
|
std::string COLOUR;
|
||||||
static std::ostream devnull;
|
|
||||||
|
|
||||||
static std::string BLACK;
|
public:
|
||||||
static std::string RED ;
|
static GridStopWatch StopWatch;
|
||||||
static std::string GREEN;
|
static std::ostream devnull;
|
||||||
static std::string YELLOW;
|
|
||||||
static std::string BLUE ;
|
std::string background() {return Painter.colour["NORMAL"];}
|
||||||
static std::string PURPLE;
|
std::string evidence() {return Painter.colour["YELLOW"];}
|
||||||
static std::string CYAN ;
|
std::string colour() {return Painter.colour[COLOUR];}
|
||||||
static std::string WHITE ;
|
|
||||||
static std::string NORMAL;
|
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col)
|
||||||
|
: active(on),
|
||||||
Logger(std::string topNm, int on, std::string nm,std::string col)
|
name(nm),
|
||||||
: active(on), name(nm), topName(topNm), COLOUR(col) {};
|
topName(topNm),
|
||||||
|
Painter(col_class),
|
||||||
void Active(int on) {active = on;};
|
COLOUR(col){} ;
|
||||||
int isActive(void) {return active;};
|
|
||||||
|
void Active(int on) {active = on;};
|
||||||
friend std::ostream& operator<< (std::ostream& stream, const Logger& log){
|
int isActive(void) {return active;};
|
||||||
if ( log.active ) {
|
|
||||||
StopWatch.Stop();
|
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||||
GridTime now = StopWatch.Elapsed();
|
|
||||||
StopWatch.Start();
|
if ( log.active ) {
|
||||||
stream << BLACK <<std::setw(8) << std::left << log.topName << BLACK<< " : ";
|
StopWatch.Stop();
|
||||||
stream << log.COLOUR <<std::setw(11) << log.name << BLACK << " : ";
|
GridTime now = StopWatch.Elapsed();
|
||||||
stream << YELLOW <<std::setw(6) << now <<BLACK << " : " ;
|
StopWatch.Start();
|
||||||
stream << log.COLOUR;
|
stream << log.background()<< log.topName << log.background()<< " : ";
|
||||||
return stream;
|
stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : ";
|
||||||
} else {
|
stream << log.evidence()<< now << log.background() << " : " << log.colour();
|
||||||
return devnull;
|
return stream;
|
||||||
}
|
} else {
|
||||||
|
return devnull;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class GridLogger: public Logger {
|
class GridLogger: public Logger {
|
||||||
public:
|
public:
|
||||||
GridLogger(int on, std::string nm, std::string col = Logger::BLACK): Logger("Grid", on, nm, col){};
|
GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"):
|
||||||
|
Logger("Grid", on, nm, col_class, col_key){};
|
||||||
};
|
};
|
||||||
|
|
||||||
void GridLogConfigure(std::vector<std::string> &logstreams);
|
void GridLogConfigure(std::vector<std::string> &logstreams);
|
||||||
@ -95,38 +141,40 @@ extern GridLogger GridLogDebug ;
|
|||||||
extern GridLogger GridLogPerformance;
|
extern GridLogger GridLogPerformance;
|
||||||
extern GridLogger GridLogIterative ;
|
extern GridLogger GridLogIterative ;
|
||||||
extern GridLogger GridLogIntegrator ;
|
extern GridLogger GridLogIntegrator ;
|
||||||
|
extern Colours GridLogColours;
|
||||||
|
|
||||||
|
|
||||||
#define _NBACKTRACE (256)
|
#define _NBACKTRACE (256)
|
||||||
extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||||
|
|
||||||
#define BACKTRACEFILE() {\
|
#define BACKTRACEFILE() {\
|
||||||
char string[20]; \
|
char string[20]; \
|
||||||
std::sprintf(string,"backtrace.%d",Rank()); \
|
std::sprintf(string,"backtrace.%d",Rank()); \
|
||||||
std::FILE * fp = std::fopen(string,"w"); \
|
std::FILE * fp = std::fopen(string,"w"); \
|
||||||
BACKTRACEFP(fp)\
|
BACKTRACEFP(fp)\
|
||||||
std::fclose(fp); \
|
std::fclose(fp); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_EXECINFO_H
|
#ifdef HAVE_EXECINFO_H
|
||||||
#define BACKTRACEFP(fp) { \
|
#define BACKTRACEFP(fp) { \
|
||||||
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
|
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
|
||||||
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
|
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
|
||||||
for (int i = 0; i < symbols; i++){\
|
for (int i = 0; i < symbols; i++){\
|
||||||
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \
|
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \
|
||||||
}\
|
}\
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define BACKTRACEFP(fp) { \
|
#define BACKTRACEFP(fp) { \
|
||||||
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
|
||||||
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
|
||||||
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
|
||||||
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define BACKTRACE() BACKTRACEFP(stdout)
|
#define BACKTRACE() BACKTRACEFP(stdout)
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because one or more lines are too long
@ -1,6 +1,3 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/
|
|
||||||
|
|
||||||
extra_sources=
|
extra_sources=
|
||||||
if BUILD_COMMS_MPI
|
if BUILD_COMMS_MPI
|
||||||
extra_sources+=communicator/Communicator_mpi.cc
|
extra_sources+=communicator/Communicator_mpi.cc
|
||||||
@ -17,16 +14,11 @@ endif
|
|||||||
#
|
#
|
||||||
# Libraries
|
# Libraries
|
||||||
#
|
#
|
||||||
|
|
||||||
include Make.inc
|
include Make.inc
|
||||||
|
include Eigen.inc
|
||||||
|
|
||||||
lib_LIBRARIES = libGrid.a
|
lib_LIBRARIES = libGrid.a
|
||||||
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
|
||||||
|
|
||||||
|
|
||||||
# qcd/action/fermion/PartialFractionFermion5D.cc\ \
|
|
||||||
#
|
|
||||||
# Include files
|
|
||||||
#
|
|
||||||
nobase_include_HEADERS=$(HFILES)
|
|
||||||
|
|
||||||
|
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
||||||
|
libGrid_adir = $(pkgincludedir)
|
||||||
|
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||||
|
49
lib/Simd.h
49
lib/Simd.h
@ -1,32 +1,33 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/Simd.h
|
Source file: ./lib/Simd.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_SIMD_H
|
#ifndef GRID_SIMD_H
|
||||||
#define GRID_SIMD_H
|
#define GRID_SIMD_H
|
||||||
|
|
||||||
@ -118,6 +119,14 @@ namespace Grid {
|
|||||||
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
||||||
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
||||||
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
||||||
|
|
||||||
|
// define projections to real and imaginay parts
|
||||||
|
inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
|
||||||
|
inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
|
||||||
|
inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
|
||||||
|
inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
|
||||||
|
|
||||||
|
// define auxiliary functions for complex computations
|
||||||
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
||||||
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
||||||
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
||||||
@ -163,8 +172,8 @@ namespace Grid {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include <simd/Grid_vector_types.h>
|
#include "simd/Grid_vector_types.h"
|
||||||
#include <simd/Grid_vector_unops.h>
|
#include "simd/Grid_vector_unops.h"
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
// Default precision
|
// Default precision
|
||||||
|
247
lib/Stat.cc
Normal file
247
lib/Stat.cc
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
#include <PerfCount.h>
|
||||||
|
#include <Stat.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
|
bool PmuStat::pmu_initialized=false;
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::init(const char *regname)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
name = regname;
|
||||||
|
if (!pmu_initialized)
|
||||||
|
{
|
||||||
|
std::cout<<"initialising pmu"<<std::endl;
|
||||||
|
pmu_initialized = true;
|
||||||
|
pmu_init();
|
||||||
|
}
|
||||||
|
clear();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::clear(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
count = 0;
|
||||||
|
tregion = 0;
|
||||||
|
pmc0 = 0;
|
||||||
|
pmc1 = 0;
|
||||||
|
inst = 0;
|
||||||
|
cyc = 0;
|
||||||
|
ref = 0;
|
||||||
|
tcycles = 0;
|
||||||
|
reads = 0;
|
||||||
|
writes = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::print(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
std::cout <<"Reg "<<std::string(name)<<":\n";
|
||||||
|
std::cout <<" region "<<tregion<<std::endl;
|
||||||
|
std::cout <<" cycles "<<tcycles<<std::endl;
|
||||||
|
std::cout <<" inst "<<inst <<std::endl;
|
||||||
|
std::cout <<" cyc "<<cyc <<std::endl;
|
||||||
|
std::cout <<" ref "<<ref <<std::endl;
|
||||||
|
std::cout <<" pmc0 "<<pmc0 <<std::endl;
|
||||||
|
std::cout <<" pmc1 "<<pmc1 <<std::endl;
|
||||||
|
std::cout <<" count "<<count <<std::endl;
|
||||||
|
std::cout <<" reads "<<reads <<std::endl;
|
||||||
|
std::cout <<" writes "<<writes <<std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::start(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
pmu_start();
|
||||||
|
++count;
|
||||||
|
xmemctrs(&mrstart, &mwstart);
|
||||||
|
tstart = __rdtsc();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::enter(int t)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
counters[0][t] = __rdpmc(0);
|
||||||
|
counters[1][t] = __rdpmc(1);
|
||||||
|
counters[2][t] = __rdpmc((1<<30)|0);
|
||||||
|
counters[3][t] = __rdpmc((1<<30)|1);
|
||||||
|
counters[4][t] = __rdpmc((1<<30)|2);
|
||||||
|
counters[5][t] = __rdtsc();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::exit(int t)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
counters[0][t] = __rdpmc(0) - counters[0][t];
|
||||||
|
counters[1][t] = __rdpmc(1) - counters[1][t];
|
||||||
|
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
|
||||||
|
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
|
||||||
|
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
|
||||||
|
counters[5][t] = __rdtsc() - counters[5][t];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::accum(int nthreads)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
tend = __rdtsc();
|
||||||
|
xmemctrs(&mrend, &mwend);
|
||||||
|
pmu_stop();
|
||||||
|
for (int t = 0; t < nthreads; ++t) {
|
||||||
|
pmc0 += counters[0][t];
|
||||||
|
pmc1 += counters[1][t];
|
||||||
|
inst += counters[2][t];
|
||||||
|
cyc += counters[3][t];
|
||||||
|
ref += counters[4][t];
|
||||||
|
tcycles += counters[5][t];
|
||||||
|
}
|
||||||
|
uint64_t region = tend - tstart;
|
||||||
|
tregion += region;
|
||||||
|
uint64_t mreads = mrend - mrstart;
|
||||||
|
reads += mreads;
|
||||||
|
uint64_t mwrites = mwend - mwstart;
|
||||||
|
writes += mwrites;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::pmu_fini(void) {}
|
||||||
|
void PmuStat::pmu_start(void) {};
|
||||||
|
void PmuStat::pmu_stop(void) {};
|
||||||
|
void PmuStat::pmu_init(void)
|
||||||
|
{
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
KNLsetup();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
|
||||||
|
{
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
ctrs c;
|
||||||
|
KNLreadctrs(c);
|
||||||
|
uint64_t emr = 0, emw = 0;
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
emr += c.edcrd[i];
|
||||||
|
emw += c.edcwr[i];
|
||||||
|
}
|
||||||
|
*mr = emr;
|
||||||
|
*mw = emw;
|
||||||
|
#else
|
||||||
|
*mr = *mw = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
|
||||||
|
struct knl_gbl_ PmuStat::gbl;
|
||||||
|
|
||||||
|
#define PMU_MEM
|
||||||
|
|
||||||
|
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
|
||||||
|
{
|
||||||
|
char fname[1024];
|
||||||
|
snprintf(fname, sizeof(fname), "%s/type", ename);
|
||||||
|
FILE *fp = fopen(fname, "r");
|
||||||
|
if (fp == 0) {
|
||||||
|
::printf("open %s", fname);
|
||||||
|
::exit(0);
|
||||||
|
}
|
||||||
|
int type;
|
||||||
|
int ret = fscanf(fp, "%d", &type);
|
||||||
|
assert(ret == 1);
|
||||||
|
fclose(fp);
|
||||||
|
// std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
|
||||||
|
|
||||||
|
struct perf_event_attr hw = {};
|
||||||
|
hw.size = sizeof(hw);
|
||||||
|
hw.type = type;
|
||||||
|
// see /sys/devices/uncore_*/format/*
|
||||||
|
// All of the events we are interested in are configured the same way, but
|
||||||
|
// that isn't always true. Proper code would parse the format files
|
||||||
|
hw.config = event | (umask << 8);
|
||||||
|
//hw.read_format = PERF_FORMAT_GROUP;
|
||||||
|
// unfortunately the above only works within a single PMU; might
|
||||||
|
// as well just read them one at a time
|
||||||
|
int cpu = 0;
|
||||||
|
fd = perf_event_open(&hw, -1, cpu, -1, 0);
|
||||||
|
if (fd == -1) {
|
||||||
|
::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
|
||||||
|
::exit(0);
|
||||||
|
} else {
|
||||||
|
// std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::KNLsetup(void){
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
char fname[1024];
|
||||||
|
|
||||||
|
// MC RPQ inserts and WPQ inserts (reads & writes)
|
||||||
|
for (int mc = 0; mc < NMC; ++mc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
|
||||||
|
// RPQ Inserts
|
||||||
|
KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
|
||||||
|
// WPQ Inserts
|
||||||
|
KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
|
||||||
|
}
|
||||||
|
// EDC RPQ inserts and WPQ inserts
|
||||||
|
for (int edc=0; edc < NEDC; ++edc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
|
||||||
|
// RPQ inserts
|
||||||
|
KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
|
||||||
|
// WPQ inserts
|
||||||
|
KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
|
||||||
|
}
|
||||||
|
// EDC HitE, HitM, MissE, MissM
|
||||||
|
for (int edc=0; edc < NEDC; ++edc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
|
||||||
|
KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
|
||||||
|
KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
|
||||||
|
KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
|
||||||
|
KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t PmuStat::KNLreadctr(int fd)
|
||||||
|
{
|
||||||
|
uint64_t data;
|
||||||
|
size_t s = ::read(fd, &data, sizeof(data));
|
||||||
|
if (s != sizeof(uint64_t)){
|
||||||
|
::printf("read counter %lu", s);
|
||||||
|
::exit(0);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PmuStat::KNLreadctrs(ctrs &c)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < NMC; ++i)
|
||||||
|
{
|
||||||
|
c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
|
||||||
|
c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
|
||||||
|
c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
|
||||||
|
c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
|
||||||
|
c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
|
||||||
|
c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
104
lib/Stat.h
Normal file
104
lib/Stat.h
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
#ifndef _GRID_STAT_H
|
||||||
|
#define _GRID_STAT_H
|
||||||
|
|
||||||
|
#ifdef AVX512
|
||||||
|
#define _KNIGHTS_LANDING_ROOTONLY
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Extra KNL counters from MCDRAM
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
#define NMC 6
|
||||||
|
#define NEDC 8
|
||||||
|
struct ctrs
|
||||||
|
{
|
||||||
|
uint64_t mcrd[NMC];
|
||||||
|
uint64_t mcwr[NMC];
|
||||||
|
uint64_t edcrd[NEDC];
|
||||||
|
uint64_t edcwr[NEDC];
|
||||||
|
uint64_t edchite[NEDC];
|
||||||
|
uint64_t edchitm[NEDC];
|
||||||
|
uint64_t edcmisse[NEDC];
|
||||||
|
uint64_t edcmissm[NEDC];
|
||||||
|
};
|
||||||
|
// Peter/Azusa:
|
||||||
|
// Our modification of a code provided by Larry Meadows from Intel
|
||||||
|
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
|
||||||
|
// so is already public and in the linux kernel for KNL.
|
||||||
|
struct knl_gbl_
|
||||||
|
{
|
||||||
|
int mc_rd[NMC];
|
||||||
|
int mc_wr[NMC];
|
||||||
|
int edc_rd[NEDC];
|
||||||
|
int edc_wr[NEDC];
|
||||||
|
int edc_hite[NEDC];
|
||||||
|
int edc_hitm[NEDC];
|
||||||
|
int edc_misse[NEDC];
|
||||||
|
int edc_missm[NEDC];
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
class PmuStat
|
||||||
|
{
|
||||||
|
uint64_t counters[8][256];
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
static struct knl_gbl_ gbl;
|
||||||
|
#endif
|
||||||
|
const char *name;
|
||||||
|
|
||||||
|
uint64_t reads; // memory reads
|
||||||
|
uint64_t writes; // memory writes
|
||||||
|
uint64_t mrstart; // memory read counter at start of parallel region
|
||||||
|
uint64_t mrend; // memory read counter at end of parallel region
|
||||||
|
uint64_t mwstart; // memory write counter at start of parallel region
|
||||||
|
uint64_t mwend; // memory write counter at end of parallel region
|
||||||
|
|
||||||
|
// cumulative counters
|
||||||
|
uint64_t count; // number of invocations
|
||||||
|
uint64_t tregion; // total time in parallel region (from thread 0)
|
||||||
|
uint64_t tcycles; // total cycles inside parallel region
|
||||||
|
uint64_t inst, ref, cyc; // fixed counters
|
||||||
|
uint64_t pmc0, pmc1;// pmu
|
||||||
|
// add memory counters here
|
||||||
|
// temp variables
|
||||||
|
uint64_t tstart; // tsc at start of parallel region
|
||||||
|
uint64_t tend; // tsc at end of parallel region
|
||||||
|
// map for ctrs values
|
||||||
|
// 0 pmc0 start
|
||||||
|
// 1 pmc0 end
|
||||||
|
// 2 pmc1 start
|
||||||
|
// 3 pmc1 end
|
||||||
|
// 4 tsc start
|
||||||
|
// 5 tsc end
|
||||||
|
static bool pmu_initialized;
|
||||||
|
public:
|
||||||
|
static bool is_init(void){ return pmu_initialized;}
|
||||||
|
static void pmu_init(void);
|
||||||
|
static void pmu_fini(void);
|
||||||
|
static void pmu_start(void);
|
||||||
|
static void pmu_stop(void);
|
||||||
|
void accum(int nthreads);
|
||||||
|
static void xmemctrs(uint64_t *mr, uint64_t *mw);
|
||||||
|
void start(void);
|
||||||
|
void enter(int t);
|
||||||
|
void exit(int t);
|
||||||
|
void print(void);
|
||||||
|
void init(const char *regname);
|
||||||
|
void clear(void);
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
static void KNLsetup(void);
|
||||||
|
static uint64_t KNLreadctr(int fd);
|
||||||
|
static void KNLreadctrs(ctrs &c);
|
||||||
|
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
184
lib/Stencil.h
184
lib/Stencil.h
@ -30,7 +30,7 @@
|
|||||||
|
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include <stencil/Lebesgue.h> // subdir aggregate
|
#include <Grid/stencil/Lebesgue.h> // subdir aggregate
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Must not lose sight that goal is to be able to construct really efficient
|
// Must not lose sight that goal is to be able to construct really efficient
|
||||||
@ -70,9 +70,70 @@
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class vobj,class cobj,class compressor> void
|
||||||
|
Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table)
|
||||||
|
{
|
||||||
|
table.resize(0);
|
||||||
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
|
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||||
|
cbmask = 0x3;
|
||||||
|
}
|
||||||
|
int so= plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
|
if ( cbmask == 0x3 ) {
|
||||||
|
table.resize(e1*e2);
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o = n*stride;
|
||||||
|
int bo = n*e2;
|
||||||
|
table[bo+b]=std::pair<int,int>(bo+b,o+b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int bo=0;
|
||||||
|
table.resize(e1*e2/2);
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o = n*stride;
|
||||||
|
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
|
||||||
|
if ( ocb &cbmask ) {
|
||||||
|
table[bo]=std::pair<int,int>(bo,o+b); bo++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class cobj,class compressor> void
|
||||||
|
Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,
|
||||||
|
compressor &compress, int off,int so)
|
||||||
|
{
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int i=0;i<table.size();i++){
|
||||||
|
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj,class cobj,class compressor> void
|
||||||
|
Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,
|
||||||
|
double &t_table ,double & t_data )
|
||||||
|
{
|
||||||
|
std::vector<std::pair<int,int> > table;
|
||||||
|
Gather_plane_simple_table_compute (rhs, buffer,dimension,plane,cbmask,compress,off,table);
|
||||||
|
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
Gather_plane_simple_table (table,rhs,buffer,compress,off,so);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct StencilEntry {
|
struct StencilEntry {
|
||||||
uint32_t _offset;
|
uint64_t _offset;
|
||||||
uint32_t _byte_offset;
|
uint64_t _byte_offset;
|
||||||
uint16_t _is_local;
|
uint16_t _is_local;
|
||||||
uint16_t _permute;
|
uint16_t _permute;
|
||||||
uint32_t _around_the_world; //256 bits, 32 bytes, 1/2 cacheline
|
uint32_t _around_the_world; //256 bits, 32 bytes, 1/2 cacheline
|
||||||
@ -101,12 +162,14 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
std::vector<Packet> Packets;
|
std::vector<Packet> Packets;
|
||||||
|
|
||||||
|
int face_table_computed;
|
||||||
|
std::vector<std::vector<std::pair<int,int> > > face_table ;
|
||||||
|
|
||||||
#define SEND_IMMEDIATE
|
#define SEND_IMMEDIATE
|
||||||
#define SERIAL_SENDS
|
#define SERIAL_SENDS
|
||||||
|
|
||||||
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
|
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
|
||||||
comms_bytes+=2.0*bytes;
|
|
||||||
#ifdef SEND_IMMEDIATE
|
#ifdef SEND_IMMEDIATE
|
||||||
commtime-=usecond();
|
commtime-=usecond();
|
||||||
_grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
|
_grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
|
||||||
@ -256,7 +319,8 @@
|
|||||||
if( _entries[i]._is_local ) {
|
if( _entries[i]._is_local ) {
|
||||||
_entries[i]._byte_offset = _entries[i]._offset*sizeof(vobj);
|
_entries[i]._byte_offset = _entries[i]._offset*sizeof(vobj);
|
||||||
} else {
|
} else {
|
||||||
_entries[i]._byte_offset =(uint64_t)&comm_buf[0]+ _entries[i]._offset*sizeof(cobj);
|
// PrecomputeByteOffsets [5] 16384/32768 140735768678528 140735781261056 2581581952
|
||||||
|
_entries[i]._byte_offset = _entries[i]._offset*sizeof(cobj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -265,17 +329,21 @@
|
|||||||
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
|
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
|
||||||
}
|
}
|
||||||
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
|
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
|
||||||
_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
|
uint64_t cbase = (uint64_t)&comm_buf[0];
|
||||||
local = _entries[ent]._is_local;
|
local = _entries[ent]._is_local;
|
||||||
perm = _entries[ent]._permute;
|
perm = _entries[ent]._permute;
|
||||||
if (perm) ptype = _permute_type[point];
|
if (perm) ptype = _permute_type[point];
|
||||||
if (local) return base + _entries[ent]._byte_offset;
|
if (local) {
|
||||||
else return _entries[ent]._byte_offset;
|
return base + _entries[ent]._byte_offset;
|
||||||
|
} else {
|
||||||
|
return cbase + _entries[ent]._byte_offset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
inline uint64_t GetPFInfo(int ent,uint64_t base) {
|
inline uint64_t GetPFInfo(int ent,uint64_t base) {
|
||||||
|
uint64_t cbase = (uint64_t)&comm_buf[0];
|
||||||
int local = _entries[ent]._is_local;
|
int local = _entries[ent]._is_local;
|
||||||
if (local) return base + _entries[ent]._byte_offset;
|
if (local) return base + _entries[ent]._byte_offset;
|
||||||
else return _entries[ent]._byte_offset;
|
else return cbase + _entries[ent]._byte_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Comms buffers
|
// Comms buffers
|
||||||
@ -301,6 +369,48 @@
|
|||||||
double gathermtime;
|
double gathermtime;
|
||||||
double splicetime;
|
double splicetime;
|
||||||
double nosplicetime;
|
double nosplicetime;
|
||||||
|
double t_data;
|
||||||
|
double t_table;
|
||||||
|
double calls;
|
||||||
|
|
||||||
|
void ZeroCounters(void) {
|
||||||
|
gathertime = 0.;
|
||||||
|
jointime = 0.;
|
||||||
|
commtime = 0.;
|
||||||
|
halogtime = 0.;
|
||||||
|
mergetime = 0.;
|
||||||
|
spintime = 0.;
|
||||||
|
gathermtime = 0.;
|
||||||
|
splicetime = 0.;
|
||||||
|
nosplicetime = 0.;
|
||||||
|
t_data = 0.0;
|
||||||
|
t_table= 0.0;
|
||||||
|
comms_bytes = 0.;
|
||||||
|
calls = 0.;
|
||||||
|
};
|
||||||
|
|
||||||
|
void Report(void) {
|
||||||
|
#define PRINTIT(A) \
|
||||||
|
std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
|
||||||
|
if ( calls > 0. ) {
|
||||||
|
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
|
||||||
|
PRINTIT(halogtime);
|
||||||
|
PRINTIT(gathertime);
|
||||||
|
PRINTIT(gathermtime);
|
||||||
|
PRINTIT(mergetime);
|
||||||
|
if(comms_bytes>1.0){
|
||||||
|
PRINTIT(comms_bytes);
|
||||||
|
PRINTIT(commtime);
|
||||||
|
std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s "<<std::endl;
|
||||||
|
}
|
||||||
|
PRINTIT(jointime);
|
||||||
|
PRINTIT(spintime);
|
||||||
|
PRINTIT(splicetime);
|
||||||
|
PRINTIT(nosplicetime);
|
||||||
|
PRINTIT(t_table);
|
||||||
|
PRINTIT(t_data);
|
||||||
|
}
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CartesianStencil(GridBase *grid,
|
CartesianStencil(GridBase *grid,
|
||||||
@ -310,18 +420,7 @@
|
|||||||
const std::vector<int> &distances)
|
const std::vector<int> &distances)
|
||||||
: _permute_type(npoints), _comm_buf_size(npoints)
|
: _permute_type(npoints), _comm_buf_size(npoints)
|
||||||
{
|
{
|
||||||
#ifdef TIMING_HACK
|
face_table_computed=0;
|
||||||
gathertime=0;
|
|
||||||
jointime=0;
|
|
||||||
commtime=0;
|
|
||||||
halogtime=0;
|
|
||||||
mergetime=0;
|
|
||||||
spintime=0;
|
|
||||||
gathermtime=0;
|
|
||||||
splicetime=0;
|
|
||||||
nosplicetime=0;
|
|
||||||
comms_bytes=0;
|
|
||||||
#endif
|
|
||||||
_npoints = npoints;
|
_npoints = npoints;
|
||||||
_grid = grid;
|
_grid = grid;
|
||||||
_directions = directions;
|
_directions = directions;
|
||||||
@ -623,6 +722,7 @@
|
|||||||
template<class compressor>
|
template<class compressor>
|
||||||
void HaloExchange(const Lattice<vobj> &source,compressor &compress)
|
void HaloExchange(const Lattice<vobj> &source,compressor &compress)
|
||||||
{
|
{
|
||||||
|
calls++;
|
||||||
Mergers.resize(0);
|
Mergers.resize(0);
|
||||||
Packets.resize(0);
|
Packets.resize(0);
|
||||||
HaloGather(source,compress);
|
HaloGather(source,compress);
|
||||||
@ -648,7 +748,7 @@
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
template<class compressor>
|
template<class compressor>
|
||||||
void HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point)
|
void HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point,int & face_idx)
|
||||||
{
|
{
|
||||||
int dimension = _directions[point];
|
int dimension = _directions[point];
|
||||||
int displacement = _distances[point];
|
int displacement = _distances[point];
|
||||||
@ -676,23 +776,23 @@
|
|||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
if (splice_dim) {
|
if (splice_dim) {
|
||||||
splicetime-=usecond();
|
splicetime-=usecond();
|
||||||
GatherSimd(source,dimension,shift,0x3,compress);
|
GatherSimd(source,dimension,shift,0x3,compress,face_idx);
|
||||||
splicetime+=usecond();
|
splicetime+=usecond();
|
||||||
} else {
|
} else {
|
||||||
nosplicetime-=usecond();
|
nosplicetime-=usecond();
|
||||||
Gather(source,dimension,shift,0x3,compress);
|
Gather(source,dimension,shift,0x3,compress,face_idx);
|
||||||
nosplicetime+=usecond();
|
nosplicetime+=usecond();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if(splice_dim){
|
if(splice_dim){
|
||||||
splicetime-=usecond();
|
splicetime-=usecond();
|
||||||
GatherSimd(source,dimension,shift,0x1,compress);// if checkerboard is unfavourable take two passes
|
GatherSimd(source,dimension,shift,0x1,compress,face_idx);// if checkerboard is unfavourable take two passes
|
||||||
GatherSimd(source,dimension,shift,0x2,compress);// both with block stride loop iteration
|
GatherSimd(source,dimension,shift,0x2,compress,face_idx);// both with block stride loop iteration
|
||||||
splicetime+=usecond();
|
splicetime+=usecond();
|
||||||
} else {
|
} else {
|
||||||
nosplicetime-=usecond();
|
nosplicetime-=usecond();
|
||||||
Gather(source,dimension,shift,0x1,compress);
|
Gather(source,dimension,shift,0x1,compress,face_idx);
|
||||||
Gather(source,dimension,shift,0x2,compress);
|
Gather(source,dimension,shift,0x2,compress,face_idx);
|
||||||
nosplicetime+=usecond();
|
nosplicetime+=usecond();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -710,17 +810,19 @@
|
|||||||
u_comm_offset=0;
|
u_comm_offset=0;
|
||||||
|
|
||||||
// Gather all comms buffers
|
// Gather all comms buffers
|
||||||
|
int face_idx=0;
|
||||||
for(int point = 0 ; point < _npoints; point++) {
|
for(int point = 0 ; point < _npoints; point++) {
|
||||||
compress.Point(point);
|
compress.Point(point);
|
||||||
HaloGatherDir(source,compress,point);
|
HaloGatherDir(source,compress,point,face_idx);
|
||||||
}
|
}
|
||||||
|
face_table_computed=1;
|
||||||
|
|
||||||
assert(u_comm_offset==_unified_buffer_size);
|
assert(u_comm_offset==_unified_buffer_size);
|
||||||
halogtime+=usecond();
|
halogtime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class compressor>
|
template<class compressor>
|
||||||
void Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress)
|
void Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx)
|
||||||
{
|
{
|
||||||
typedef typename cobj::vector_type vector_type;
|
typedef typename cobj::vector_type vector_type;
|
||||||
typedef typename cobj::scalar_type scalar_type;
|
typedef typename cobj::scalar_type scalar_type;
|
||||||
@ -757,8 +859,20 @@
|
|||||||
int bytes = words * sizeof(cobj);
|
int bytes = words * sizeof(cobj);
|
||||||
|
|
||||||
gathertime-=usecond();
|
gathertime-=usecond();
|
||||||
Gather_plane_simple (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset);
|
int so = sx*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
|
if ( !face_table_computed ) {
|
||||||
|
t_table-=usecond();
|
||||||
|
face_table.resize(face_idx+1);
|
||||||
|
Gather_plane_simple_table_compute (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset,face_table[face_idx]);
|
||||||
|
t_table+=usecond();
|
||||||
|
}
|
||||||
|
t_data-=usecond();
|
||||||
|
Gather_plane_simple_table (face_table[face_idx],rhs,u_send_buf,compress,u_comm_offset,so);
|
||||||
|
face_idx++;
|
||||||
|
t_data+=usecond();
|
||||||
gathertime+=usecond();
|
gathertime+=usecond();
|
||||||
|
|
||||||
|
// Gather_plane_simple_stencil (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset,t_table,t_data);
|
||||||
|
|
||||||
int rank = _grid->_processor;
|
int rank = _grid->_processor;
|
||||||
int recv_from_rank;
|
int recv_from_rank;
|
||||||
@ -781,7 +895,7 @@
|
|||||||
|
|
||||||
|
|
||||||
template<class compressor>
|
template<class compressor>
|
||||||
void GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress)
|
void GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx)
|
||||||
{
|
{
|
||||||
const int Nsimd = _grid->Nsimd();
|
const int Nsimd = _grid->Nsimd();
|
||||||
|
|
||||||
|
@ -30,22 +30,22 @@ Author: neo <cossu@post.kek.jp>
|
|||||||
#ifndef GRID_MATH_H
|
#ifndef GRID_MATH_H
|
||||||
#define GRID_MATH_H
|
#define GRID_MATH_H
|
||||||
|
|
||||||
#include <tensors/Tensor_traits.h>
|
#include <Grid/tensors/Tensor_traits.h>
|
||||||
#include <tensors/Tensor_class.h>
|
#include <Grid/tensors/Tensor_class.h>
|
||||||
#include <tensors/Tensor_arith.h>
|
#include <Grid/tensors/Tensor_arith.h>
|
||||||
#include <tensors/Tensor_inner.h>
|
#include <Grid/tensors/Tensor_inner.h>
|
||||||
#include <tensors/Tensor_outer.h>
|
#include <Grid/tensors/Tensor_outer.h>
|
||||||
#include <tensors/Tensor_transpose.h>
|
#include <Grid/tensors/Tensor_transpose.h>
|
||||||
#include <tensors/Tensor_trace.h>
|
#include <Grid/tensors/Tensor_trace.h>
|
||||||
#include <tensors/Tensor_index.h>
|
#include <Grid/tensors/Tensor_index.h>
|
||||||
#include <tensors/Tensor_Ta.h>
|
#include <Grid/tensors/Tensor_Ta.h>
|
||||||
#include <tensors/Tensor_determinant.h>
|
#include <Grid/tensors/Tensor_determinant.h>
|
||||||
#include <tensors/Tensor_exp.h>
|
#include <Grid/tensors/Tensor_exp.h>
|
||||||
//#include <tensors/Tensor_peek.h>
|
//#include <Grid/tensors/Tensor_peek.h>
|
||||||
//#include <tensors/Tensor_poke.h>
|
//#include <Grid/tensors/Tensor_poke.h>
|
||||||
#include <tensors/Tensor_reality.h>
|
#include <Grid/tensors/Tensor_reality.h>
|
||||||
#include <tensors/Tensor_unary.h>
|
#include <Grid/tensors/Tensor_unary.h>
|
||||||
#include <tensors/Tensor_extract_merge.h>
|
#include <Grid/tensors/Tensor_extract_merge.h>
|
||||||
#include <tensors/Tensor_logical.h>
|
#include <Grid/tensors/Tensor_logical.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -37,7 +37,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for ")
|
#ifdef GRID_NUMA
|
||||||
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||||
|
#else
|
||||||
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||||
|
#endif
|
||||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||||
#else
|
#else
|
||||||
#define PARALLEL_FOR_LOOP
|
#define PARALLEL_FOR_LOOP
|
||||||
|
@ -31,7 +31,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||||
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -28,7 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||||
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,8 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_CHEBYSHEV_H
|
#ifndef GRID_CHEBYSHEV_H
|
||||||
#define GRID_CHEBYSHEV_H
|
#define GRID_CHEBYSHEV_H
|
||||||
|
|
||||||
#include<Grid.h>
|
#include <Grid/algorithms/LinearOperator.h>
|
||||||
#include<algorithms/LinearOperator.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -18,10 +18,10 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <Config.h>
|
#include <Config.h>
|
||||||
|
|
||||||
#ifdef HAVE_GMP_H
|
#ifdef HAVE_LIBGMP
|
||||||
#include <algorithms/approx/bigfloat.h>
|
#include "bigfloat.h"
|
||||||
#else
|
#else
|
||||||
#include <algorithms/approx/bigfloat_double.h>
|
#include "bigfloat_double.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||||
|
@ -1,150 +1,168 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CONJUGATE_GRADIENT_H
|
#ifndef GRID_CONJUGATE_GRADIENT_H
|
||||||
#define GRID_CONJUGATE_GRADIENT_H
|
#define GRID_CONJUGATE_GRADIENT_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Base classes for iterative processes based on operators
|
// Base classes for iterative processes based on operators
|
||||||
// single input vec, single output vec.
|
// single input vec, single output vec.
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<class Field>
|
template <class Field>
|
||||||
class ConjugateGradient : public OperatorFunction<Field> {
|
class ConjugateGradient : public OperatorFunction<Field> {
|
||||||
public:
|
public:
|
||||||
RealD Tolerance;
|
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||||
Integer MaxIterations;
|
// Defaults true.
|
||||||
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
RealD Tolerance;
|
||||||
};
|
Integer MaxIterations;
|
||||||
|
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||||
|
: Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
ErrorOnNoConverge(err_on_no_conv){};
|
||||||
|
|
||||||
|
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
|
||||||
|
Field &psi) {
|
||||||
|
psi.checkerboard = src.checkerboard;
|
||||||
|
conformable(psi, src);
|
||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||||
|
|
||||||
psi.checkerboard = src.checkerboard;
|
Field p(src);
|
||||||
conformable(psi,src);
|
Field mmp(src);
|
||||||
|
Field r(src);
|
||||||
|
|
||||||
RealD cp,c,a,d,b,ssq,qq,b_pred;
|
// Initial residual computation & set up
|
||||||
|
RealD guess = norm2(psi);
|
||||||
Field p(src);
|
assert(std::isnan(guess) == 0);
|
||||||
Field mmp(src);
|
|
||||||
Field r(src);
|
|
||||||
|
|
||||||
//Initial residual computation & set up
|
|
||||||
RealD guess = norm2(psi);
|
|
||||||
assert(std::isnan(guess)==0);
|
|
||||||
|
|
||||||
Linop.HermOpAndNorm(psi,mmp,d,b);
|
|
||||||
|
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||||
r= src-mmp;
|
|
||||||
p= r;
|
|
||||||
|
|
||||||
a =norm2(p);
|
|
||||||
cp =a;
|
|
||||||
ssq=norm2(src);
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
r = src - mmp;
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
p = r;
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
|
||||||
|
|
||||||
RealD rsq = Tolerance* Tolerance*ssq;
|
a = norm2(p);
|
||||||
|
cp = a;
|
||||||
//Check if guess is really REALLY good :)
|
ssq = norm2(src);
|
||||||
if ( cp <= rsq ) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
|
|
||||||
|
|
||||||
GridStopWatch LinalgTimer;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
GridStopWatch MatrixTimer;
|
<< "ConjugateGradient: guess " << guess << std::endl;
|
||||||
GridStopWatch SolverTimer;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: src " << ssq << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: mp " << d << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: mmp " << b << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: cp,r " << cp << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: p " << a << std::endl;
|
||||||
|
|
||||||
SolverTimer.Start();
|
RealD rsq = Tolerance * Tolerance * ssq;
|
||||||
int k;
|
|
||||||
for (k=1;k<=MaxIterations;k++){
|
|
||||||
|
|
||||||
c=cp;
|
|
||||||
|
|
||||||
MatrixTimer.Start();
|
// Check if guess is really REALLY good :)
|
||||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
if (cp <= rsq) {
|
||||||
MatrixTimer.Stop();
|
return;
|
||||||
|
|
||||||
LinalgTimer.Start();
|
|
||||||
// RealD qqck = norm2(mmp);
|
|
||||||
// ComplexD dck = innerProduct(p,mmp);
|
|
||||||
|
|
||||||
a = c/d;
|
|
||||||
b_pred = a*(a*qq-d)/c;
|
|
||||||
|
|
||||||
cp = axpy_norm(r,-a,mmp,r);
|
|
||||||
b = cp/c;
|
|
||||||
|
|
||||||
// Fuse these loops ; should be really easy
|
|
||||||
psi= a*p+psi;
|
|
||||||
p = p*b+r;
|
|
||||||
|
|
||||||
LinalgTimer.Stop();
|
|
||||||
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
|
|
||||||
|
|
||||||
// Stopping condition
|
|
||||||
if ( cp <= rsq ) {
|
|
||||||
|
|
||||||
SolverTimer.Stop();
|
|
||||||
Linop.HermOpAndNorm(psi,mmp,d,qq);
|
|
||||||
p=mmp-src;
|
|
||||||
|
|
||||||
RealD mmpnorm = sqrt(norm2(mmp));
|
|
||||||
RealD psinorm = sqrt(norm2(psi));
|
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
|
||||||
RealD resnorm = sqrt(norm2(p));
|
|
||||||
RealD true_residual = resnorm/srcnorm;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
|
|
||||||
<<" computed residual "<<sqrt(cp/ssq)
|
|
||||||
<<" true residual " <<true_residual
|
|
||||||
<<" target "<<Tolerance<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
|
|
||||||
std::cout<<std::endl;
|
|
||||||
|
|
||||||
assert(true_residual/Tolerance < 1000.0);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
GridStopWatch LinalgTimer;
|
||||||
|
GridStopWatch MatrixTimer;
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
|
||||||
|
SolverTimer.Start();
|
||||||
|
int k;
|
||||||
|
for (k = 1; k <= MaxIterations; k++) {
|
||||||
|
c = cp;
|
||||||
|
|
||||||
|
MatrixTimer.Start();
|
||||||
|
Linop.HermOpAndNorm(p, mmp, d, qq);
|
||||||
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
// RealD qqck = norm2(mmp);
|
||||||
|
// ComplexD dck = innerProduct(p,mmp);
|
||||||
|
|
||||||
|
a = c / d;
|
||||||
|
b_pred = a * (a * qq - d) / c;
|
||||||
|
|
||||||
|
cp = axpy_norm(r, -a, mmp, r);
|
||||||
|
b = cp / c;
|
||||||
|
|
||||||
|
// Fuse these loops ; should be really easy
|
||||||
|
psi = a * p + psi;
|
||||||
|
p = p * b + r;
|
||||||
|
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||||
|
<< " residual " << cp << " target " << rsq << std::endl;
|
||||||
|
|
||||||
|
// Stopping condition
|
||||||
|
if (cp <= rsq) {
|
||||||
|
SolverTimer.Stop();
|
||||||
|
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
|
p = mmp - src;
|
||||||
|
|
||||||
|
RealD mmpnorm = sqrt(norm2(mmp));
|
||||||
|
RealD psinorm = sqrt(norm2(psi));
|
||||||
|
RealD srcnorm = sqrt(norm2(src));
|
||||||
|
RealD resnorm = sqrt(norm2(p));
|
||||||
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
|
||||||
|
<< " true residual " << true_residual << " target "
|
||||||
|
<< Tolerance << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Time elapsed: Iterations "
|
||||||
|
<< SolverTimer.Elapsed() << " Matrix "
|
||||||
|
<< MatrixTimer.Elapsed() << " Linalg "
|
||||||
|
<< LinalgTimer.Elapsed();
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||||
|
<< std::endl;
|
||||||
|
if (ErrorOnNoConverge) assert(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
142
lib/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
142
lib/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||||
|
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
//Mixed precision restarted defect correction CG
|
||||||
|
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||||
|
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
|
||||||
|
public:
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxInnerIterations;
|
||||||
|
Integer MaxOuterIterations;
|
||||||
|
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||||
|
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||||
|
LinearOperatorBase<FieldF> &Linop_f;
|
||||||
|
LinearOperatorBase<FieldD> &Linop_d;
|
||||||
|
|
||||||
|
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||||
|
LinearFunction<FieldF> *guesser;
|
||||||
|
|
||||||
|
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
|
||||||
|
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||||
|
Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||||
|
OuterLoopNormMult(100.), guesser(NULL){ };
|
||||||
|
|
||||||
|
void useGuesser(LinearFunction<FieldF> &g){
|
||||||
|
guesser = &g;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||||
|
GridStopWatch TotalTimer;
|
||||||
|
TotalTimer.Start();
|
||||||
|
|
||||||
|
int cb = src_d_in.checkerboard;
|
||||||
|
sol_d.checkerboard = cb;
|
||||||
|
|
||||||
|
RealD src_norm = norm2(src_d_in);
|
||||||
|
RealD stop = src_norm * Tolerance*Tolerance;
|
||||||
|
|
||||||
|
GridBase* DoublePrecGrid = src_d_in._grid;
|
||||||
|
FieldD tmp_d(DoublePrecGrid);
|
||||||
|
tmp_d.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldD tmp2_d(DoublePrecGrid);
|
||||||
|
tmp2_d.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldD src_d(DoublePrecGrid);
|
||||||
|
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||||
|
|
||||||
|
RealD inner_tol = Tolerance;
|
||||||
|
|
||||||
|
FieldF src_f(SinglePrecGrid);
|
||||||
|
src_f.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldF sol_f(SinglePrecGrid);
|
||||||
|
sol_f.checkerboard = cb;
|
||||||
|
|
||||||
|
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||||
|
CG_f.ErrorOnNoConverge = false;
|
||||||
|
|
||||||
|
GridStopWatch InnerCGtimer;
|
||||||
|
|
||||||
|
GridStopWatch PrecChangeTimer;
|
||||||
|
|
||||||
|
for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||||
|
//Compute double precision rsd and also new RHS vector.
|
||||||
|
Linop_d.HermOp(sol_d, tmp_d);
|
||||||
|
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||||
|
|
||||||
|
if(norm < OuterLoopNormMult * stop){
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||||
|
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(src_f, src_d);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
zeroit(sol_f);
|
||||||
|
|
||||||
|
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||||
|
if(guesser != NULL)
|
||||||
|
(*guesser)(src_f, sol_f);
|
||||||
|
|
||||||
|
//Inner CG
|
||||||
|
CG_f.Tolerance = inner_tol;
|
||||||
|
InnerCGtimer.Start();
|
||||||
|
CG_f(Linop_f, src_f, sol_f);
|
||||||
|
InnerCGtimer.Stop();
|
||||||
|
|
||||||
|
//Convert sol back to double and add to double prec solution
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(tmp_d, sol_f);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Final trial CG
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||||
|
|
||||||
|
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||||
|
CG_d(Linop_d, src_d_in, sol_d);
|
||||||
|
|
||||||
|
TotalTimer.Stop();
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -130,8 +130,8 @@ DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#include <algorithms/iterative/Householder.h>
|
#include "Householder.h"
|
||||||
#include <algorithms/iterative/Francis.h>
|
#include "Francis.h"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -33,8 +33,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifdef USE_LAPACK
|
#ifdef USE_LAPACK
|
||||||
#include <lapacke.h>
|
#include <lapacke.h>
|
||||||
#endif
|
#endif
|
||||||
#include <algorithms/iterative/DenseMatrix.h>
|
#include "DenseMatrix.h"
|
||||||
#include <algorithms/iterative/EigenSort.h>
|
#include "EigenSort.h"
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_CARTESIAN_BASE_H
|
#ifndef GRID_CARTESIAN_BASE_H
|
||||||
#define GRID_CARTESIAN_BASE_H
|
#define GRID_CARTESIAN_BASE_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid{
|
namespace Grid{
|
||||||
|
|
||||||
@ -82,11 +81,8 @@ public:
|
|||||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
||||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
||||||
int CheckerBoardFromOindex (int Oindex){
|
virtual int CheckerBoardFromOindex (int Oindex)=0;
|
||||||
std::vector<int> ocoor;
|
virtual int CheckerBoardFromOindexTable (int Oindex)=0;
|
||||||
oCoorFromOindex(ocoor,Oindex);
|
|
||||||
return CheckerBoard(ocoor);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Local layout calculations
|
// Local layout calculations
|
||||||
@ -107,6 +103,12 @@ public:
|
|||||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
virtual int iIndex(std::vector<int> &lcoor)
|
||||||
|
{
|
||||||
|
int idx=0;
|
||||||
|
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
inline int oIndexReduced(std::vector<int> &ocoor)
|
inline int oIndexReduced(std::vector<int> &ocoor)
|
||||||
{
|
{
|
||||||
int idx=0;
|
int idx=0;
|
||||||
@ -123,12 +125,6 @@ public:
|
|||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
// SIMD lane addressing
|
// SIMD lane addressing
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
inline int iIndex(std::vector<int> &lcoor)
|
|
||||||
{
|
|
||||||
int idx=0;
|
|
||||||
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
|
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
|
||||||
{
|
{
|
||||||
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
||||||
@ -220,7 +216,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
|
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
|
||||||
o_idx= oIndex(lcoor);// this implies divide by 2 on checkerdim
|
o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim
|
||||||
}
|
}
|
||||||
|
|
||||||
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
||||||
|
@ -39,6 +39,13 @@ class GridCartesian: public GridBase {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -32,23 +32,18 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
static const int CbRed =0;
|
static const int CbRed =0;
|
||||||
static const int CbBlack=1;
|
static const int CbBlack=1;
|
||||||
static const int Even =CbRed;
|
static const int Even =CbRed;
|
||||||
static const int Odd =CbBlack;
|
static const int Odd =CbBlack;
|
||||||
|
|
||||||
// Perhaps these are misplaced and
|
|
||||||
// should be in sparse matrix.
|
|
||||||
// Also should make these a named enum type
|
|
||||||
static const int DaggerNo=0;
|
|
||||||
static const int DaggerYes=1;
|
|
||||||
|
|
||||||
// Specialise this for red black grids storing half the data like a chess board.
|
// Specialise this for red black grids storing half the data like a chess board.
|
||||||
class GridRedBlackCartesian : public GridBase
|
class GridRedBlackCartesian : public GridBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
std::vector<int> _checker_dim_mask;
|
std::vector<int> _checker_dim_mask;
|
||||||
int _checker_dim;
|
int _checker_dim;
|
||||||
|
std::vector<int> _checker_board;
|
||||||
|
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
if( dim==_checker_dim) return 1;
|
if( dim==_checker_dim) return 1;
|
||||||
@ -78,12 +73,20 @@ public:
|
|||||||
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
||||||
|
|
||||||
if ( (source_cb+ocb)&1 ) {
|
if ( (source_cb+ocb)&1 ) {
|
||||||
|
|
||||||
return (shift)/2;
|
return (shift)/2;
|
||||||
} else {
|
} else {
|
||||||
return (shift+1)/2;
|
return (shift+1)/2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return _checker_board[Oindex];
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
std::vector<int> ocoor;
|
||||||
|
oCoorFromOindex(ocoor,Oindex);
|
||||||
|
return CheckerBoard(ocoor);
|
||||||
|
}
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
||||||
|
|
||||||
if(dim != _checker_dim) return shift;
|
if(dim != _checker_dim) return shift;
|
||||||
@ -175,7 +178,7 @@ public:
|
|||||||
// all elements of a simd vector must have same checkerboard.
|
// all elements of a simd vector must have same checkerboard.
|
||||||
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
||||||
if ( _simd_layout[d]>1 ) {
|
if ( _simd_layout[d]>1 ) {
|
||||||
if ( d != _checker_dim ) {
|
if ( checker_dim_mask[d] ) {
|
||||||
assert( (_rdimensions[d]&0x1) == 0 );
|
assert( (_rdimensions[d]&0x1) == 0 );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -191,6 +194,8 @@ public:
|
|||||||
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
||||||
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -211,6 +216,18 @@ public:
|
|||||||
_slice_nblock[d]=nblock;
|
_slice_nblock[d]=nblock;
|
||||||
block = block*_rdimensions[d];
|
block = block*_rdimensions[d];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Create a checkerboard lookup table
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
int rvol = 1;
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
rvol=rvol * _rdimensions[d];
|
||||||
|
}
|
||||||
|
_checker_board.resize(rvol);
|
||||||
|
for(int osite=0;osite<_osites;osite++){
|
||||||
|
_checker_board[osite] = CheckerBoardFromOindex (osite);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
protected:
|
protected:
|
||||||
@ -224,9 +241,21 @@ protected:
|
|||||||
idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return idx;
|
return idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
virtual int iIndex(std::vector<int> &lcoor)
|
||||||
|
{
|
||||||
|
int idx=0;
|
||||||
|
for(int d=0;d<_ndimension;d++) {
|
||||||
|
if( d==_checker_dim ) {
|
||||||
|
idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d]));
|
||||||
|
} else {
|
||||||
|
idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
0
lib/communicator/.dirstamp
Normal file
0
lib/communicator/.dirstamp
Normal file
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int bo=0;
|
int bo=0;
|
||||||
|
std::vector<std::pair<int,int> > table;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
|
||||||
if ( ocb &cbmask ) {
|
if ( ocb &cbmask ) {
|
||||||
buffer[off+bo++]=compress(rhs._odata[so+o+b]);
|
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int i=0;i<table.size();i++){
|
||||||
|
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
412
lib/fftw/fftw3.h
Normal file
412
lib/fftw/fftw3.h
Normal file
@ -0,0 +1,412 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||||
|
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||||
|
*
|
||||||
|
* The following statement of license applies *only* to this header file,
|
||||||
|
* and *not* to the other files distributed with FFTW or derived therefrom:
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||||
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||||
|
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/***************************** NOTE TO USERS *********************************
|
||||||
|
*
|
||||||
|
* THIS IS A HEADER FILE, NOT A MANUAL
|
||||||
|
*
|
||||||
|
* If you want to know how to use FFTW, please read the manual,
|
||||||
|
* online at http://www.fftw.org/doc/ and also included with FFTW.
|
||||||
|
* For a quick start, see the manual's tutorial section.
|
||||||
|
*
|
||||||
|
* (Reading header files to learn how to use a library is a habit
|
||||||
|
* stemming from code lacking a proper manual. Arguably, it's a
|
||||||
|
* *bad* habit in most cases, because header files can contain
|
||||||
|
* interfaces that are not part of the public, stable API.)
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef FFTW3_H
|
||||||
|
#define FFTW3_H
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
/* If <complex.h> is included, use the C99 complex type. Otherwise
|
||||||
|
define a type bit-compatible with C99 complex */
|
||||||
|
#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
|
||||||
|
# define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C
|
||||||
|
#else
|
||||||
|
# define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define FFTW_CONCAT(prefix, name) prefix ## name
|
||||||
|
#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name)
|
||||||
|
#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name)
|
||||||
|
#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name)
|
||||||
|
#define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name)
|
||||||
|
|
||||||
|
/* IMPORTANT: for Windows compilers, you should add a line
|
||||||
|
#define FFTW_DLL
|
||||||
|
here and in kernel/ifftw.h if you are compiling/using FFTW as a
|
||||||
|
DLL, in order to do the proper importing/exporting, or
|
||||||
|
alternatively compile with -DFFTW_DLL or the equivalent
|
||||||
|
command-line flag. This is not necessary under MinGW/Cygwin, where
|
||||||
|
libtool does the imports/exports automatically. */
|
||||||
|
#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__))
|
||||||
|
/* annoying Windows syntax for shared-library declarations */
|
||||||
|
# if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */
|
||||||
|
# define FFTW_EXTERN extern __declspec(dllexport)
|
||||||
|
# else /* user is calling FFTW; import symbol */
|
||||||
|
# define FFTW_EXTERN extern __declspec(dllimport)
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define FFTW_EXTERN extern
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum fftw_r2r_kind_do_not_use_me {
|
||||||
|
FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
|
||||||
|
FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
|
||||||
|
FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10
|
||||||
|
};
|
||||||
|
|
||||||
|
struct fftw_iodim_do_not_use_me {
|
||||||
|
int n; /* dimension size */
|
||||||
|
int is; /* input stride */
|
||||||
|
int os; /* output stride */
|
||||||
|
};
|
||||||
|
|
||||||
|
#include <stddef.h> /* for ptrdiff_t */
|
||||||
|
struct fftw_iodim64_do_not_use_me {
|
||||||
|
ptrdiff_t n; /* dimension size */
|
||||||
|
ptrdiff_t is; /* input stride */
|
||||||
|
ptrdiff_t os; /* output stride */
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *);
|
||||||
|
typedef int (*fftw_read_char_func_do_not_use_me)(void *);
|
||||||
|
|
||||||
|
/*
|
||||||
|
huge second-order macro that defines prototypes for all API
|
||||||
|
functions. We expand this macro for each supported precision
|
||||||
|
|
||||||
|
X: name-mangling macro
|
||||||
|
R: real data type
|
||||||
|
C: complex data type
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define FFTW_DEFINE_API(X, R, C) \
|
||||||
|
\
|
||||||
|
FFTW_DEFINE_COMPLEX(R, C); \
|
||||||
|
\
|
||||||
|
typedef struct X(plan_s) *X(plan); \
|
||||||
|
\
|
||||||
|
typedef struct fftw_iodim_do_not_use_me X(iodim); \
|
||||||
|
typedef struct fftw_iodim64_do_not_use_me X(iodim64); \
|
||||||
|
\
|
||||||
|
typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind); \
|
||||||
|
\
|
||||||
|
typedef fftw_write_char_func_do_not_use_me X(write_char_func); \
|
||||||
|
typedef fftw_read_char_func_do_not_use_me X(read_char_func); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(execute)(const X(plan) p); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n, \
|
||||||
|
C *in, C *out, int sign, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, \
|
||||||
|
unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1, \
|
||||||
|
C *in, C *out, int sign, unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2, \
|
||||||
|
C *in, C *out, int sign, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n, \
|
||||||
|
int howmany, \
|
||||||
|
C *in, const int *inembed, \
|
||||||
|
int istride, int idist, \
|
||||||
|
C *out, const int *onembed, \
|
||||||
|
int ostride, int odist, \
|
||||||
|
int sign, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim) *howmany_dims, \
|
||||||
|
C *in, C *out, \
|
||||||
|
int sign, unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim) *howmany_dims, \
|
||||||
|
R *ri, R *ii, R *ro, R *io, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank, \
|
||||||
|
const X(iodim64) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim64) *howmany_dims, \
|
||||||
|
C *in, C *out, \
|
||||||
|
int sign, unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank, \
|
||||||
|
const X(iodim64) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim64) *howmany_dims, \
|
||||||
|
R *ri, R *ii, R *ro, R *io, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out); \
|
||||||
|
FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, \
|
||||||
|
R *ro, R *io); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n, \
|
||||||
|
int howmany, \
|
||||||
|
R *in, const int *inembed, \
|
||||||
|
int istride, int idist, \
|
||||||
|
C *out, const int *onembed, \
|
||||||
|
int ostride, int odist, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n, \
|
||||||
|
R *in, C *out, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1, \
|
||||||
|
R *in, C *out, unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1, \
|
||||||
|
int n2, \
|
||||||
|
R *in, C *out, unsigned flags); \
|
||||||
|
\
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n, \
|
||||||
|
int howmany, \
|
||||||
|
C *in, const int *inembed, \
|
||||||
|
int istride, int idist, \
|
||||||
|
R *out, const int *onembed, \
|
||||||
|
int ostride, int odist, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n, \
|
||||||
|
C *in, R *out, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1, \
|
||||||
|
C *in, R *out, unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1, \
|
||||||
|
int n2, \
|
||||||
|
C *in, R *out, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim) *howmany_dims, \
|
||||||
|
R *in, C *out, \
|
||||||
|
unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim) *howmany_dims, \
|
||||||
|
C *in, R *out, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)( \
|
||||||
|
int rank, const X(iodim) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim) *howmany_dims, \
|
||||||
|
R *in, R *ro, R *io, \
|
||||||
|
unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)( \
|
||||||
|
int rank, const X(iodim) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim) *howmany_dims, \
|
||||||
|
R *ri, R *ii, R *out, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank, \
|
||||||
|
const X(iodim64) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim64) *howmany_dims, \
|
||||||
|
R *in, C *out, \
|
||||||
|
unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank, \
|
||||||
|
const X(iodim64) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim64) *howmany_dims, \
|
||||||
|
C *in, R *out, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)( \
|
||||||
|
int rank, const X(iodim64) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim64) *howmany_dims, \
|
||||||
|
R *in, R *ro, R *io, \
|
||||||
|
unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)( \
|
||||||
|
int rank, const X(iodim64) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim64) *howmany_dims, \
|
||||||
|
R *ri, R *ii, R *out, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out); \
|
||||||
|
FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p, \
|
||||||
|
R *in, R *ro, R *io); \
|
||||||
|
FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p, \
|
||||||
|
R *ri, R *ii, R *out); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n, \
|
||||||
|
int howmany, \
|
||||||
|
R *in, const int *inembed, \
|
||||||
|
int istride, int idist, \
|
||||||
|
R *out, const int *onembed, \
|
||||||
|
int ostride, int odist, \
|
||||||
|
const X(r2r_kind) *kind, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out, \
|
||||||
|
const X(r2r_kind) *kind, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out, \
|
||||||
|
X(r2r_kind) kind, unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out, \
|
||||||
|
X(r2r_kind) kind0, X(r2r_kind) kind1, \
|
||||||
|
unsigned flags); \
|
||||||
|
FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2, \
|
||||||
|
R *in, R *out, X(r2r_kind) kind0, \
|
||||||
|
X(r2r_kind) kind1, X(r2r_kind) kind2, \
|
||||||
|
unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim) *howmany_dims, \
|
||||||
|
R *in, R *out, \
|
||||||
|
const X(r2r_kind) *kind, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims, \
|
||||||
|
int howmany_rank, \
|
||||||
|
const X(iodim64) *howmany_dims, \
|
||||||
|
R *in, R *out, \
|
||||||
|
const X(r2r_kind) *kind, unsigned flags); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(destroy_plan)(X(plan) p); \
|
||||||
|
FFTW_EXTERN void X(forget_wisdom)(void); \
|
||||||
|
FFTW_EXTERN void X(cleanup)(void); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(set_timelimit)(double t); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(plan_with_nthreads)(int nthreads); \
|
||||||
|
FFTW_EXTERN int X(init_threads)(void); \
|
||||||
|
FFTW_EXTERN void X(cleanup_threads)(void); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename); \
|
||||||
|
FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file); \
|
||||||
|
FFTW_EXTERN char *X(export_wisdom_to_string)(void); \
|
||||||
|
FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char, \
|
||||||
|
void *data); \
|
||||||
|
FFTW_EXTERN int X(import_system_wisdom)(void); \
|
||||||
|
FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename); \
|
||||||
|
FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file); \
|
||||||
|
FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string); \
|
||||||
|
FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file); \
|
||||||
|
FFTW_EXTERN void X(print_plan)(const X(plan) p); \
|
||||||
|
FFTW_EXTERN char *X(sprint_plan)(const X(plan) p); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void *X(malloc)(size_t n); \
|
||||||
|
FFTW_EXTERN R *X(alloc_real)(size_t n); \
|
||||||
|
FFTW_EXTERN C *X(alloc_complex)(size_t n); \
|
||||||
|
FFTW_EXTERN void X(free)(void *p); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN void X(flops)(const X(plan) p, \
|
||||||
|
double *add, double *mul, double *fmas); \
|
||||||
|
FFTW_EXTERN double X(estimate_cost)(const X(plan) p); \
|
||||||
|
FFTW_EXTERN double X(cost)(const X(plan) p); \
|
||||||
|
\
|
||||||
|
FFTW_EXTERN int X(alignment_of)(R *p); \
|
||||||
|
FFTW_EXTERN const char X(version)[]; \
|
||||||
|
FFTW_EXTERN const char X(cc)[]; \
|
||||||
|
FFTW_EXTERN const char X(codelet_optim)[];
|
||||||
|
|
||||||
|
|
||||||
|
/* end of FFTW_DEFINE_API macro */
|
||||||
|
|
||||||
|
FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex)
|
||||||
|
FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex)
|
||||||
|
FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex)
|
||||||
|
|
||||||
|
/* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64
|
||||||
|
for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */
|
||||||
|
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \
|
||||||
|
&& !(defined(__ICC) || defined(__INTEL_COMPILER)) \
|
||||||
|
&& (defined(__i386__) || defined(__x86_64__) || defined(__ia64__))
|
||||||
|
# if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
|
||||||
|
/* note: __float128 is a typedef, which is not supported with the _Complex
|
||||||
|
keyword in gcc, so instead we use this ugly __attribute__ version.
|
||||||
|
However, we can't simply pass the __attribute__ version to
|
||||||
|
FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer
|
||||||
|
types. Hence redefining FFTW_DEFINE_COMPLEX. Ugh. */
|
||||||
|
# undef FFTW_DEFINE_COMPLEX
|
||||||
|
# define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C
|
||||||
|
# endif
|
||||||
|
FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define FFTW_FORWARD (-1)
|
||||||
|
#define FFTW_BACKWARD (+1)
|
||||||
|
|
||||||
|
#define FFTW_NO_TIMELIMIT (-1.0)
|
||||||
|
|
||||||
|
/* documented flags */
|
||||||
|
#define FFTW_MEASURE (0U)
|
||||||
|
#define FFTW_DESTROY_INPUT (1U << 0)
|
||||||
|
#define FFTW_UNALIGNED (1U << 1)
|
||||||
|
#define FFTW_CONSERVE_MEMORY (1U << 2)
|
||||||
|
#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */
|
||||||
|
#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */
|
||||||
|
#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */
|
||||||
|
#define FFTW_ESTIMATE (1U << 6)
|
||||||
|
#define FFTW_WISDOM_ONLY (1U << 21)
|
||||||
|
|
||||||
|
/* undocumented beyond-guru flags */
|
||||||
|
#define FFTW_ESTIMATE_PATIENT (1U << 7)
|
||||||
|
#define FFTW_BELIEVE_PCOST (1U << 8)
|
||||||
|
#define FFTW_NO_DFT_R2HC (1U << 9)
|
||||||
|
#define FFTW_NO_NONTHREADED (1U << 10)
|
||||||
|
#define FFTW_NO_BUFFERING (1U << 11)
|
||||||
|
#define FFTW_NO_INDIRECT_OP (1U << 12)
|
||||||
|
#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */
|
||||||
|
#define FFTW_NO_RANK_SPLITS (1U << 14)
|
||||||
|
#define FFTW_NO_VRANK_SPLITS (1U << 15)
|
||||||
|
#define FFTW_NO_VRECURSE (1U << 16)
|
||||||
|
#define FFTW_NO_SIMD (1U << 17)
|
||||||
|
#define FFTW_NO_SLOW (1U << 18)
|
||||||
|
#define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19)
|
||||||
|
#define FFTW_ALLOW_PRUNING (1U << 20)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif /* FFTW3_H */
|
@ -1,73 +1,74 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/lattice/Lattice_ET.h
|
Source file: ./lib/lattice/Lattice_ET.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_ET_H
|
#ifndef GRID_LATTICE_ET_H
|
||||||
#define GRID_LATTICE_ET_H
|
#define GRID_LATTICE_ET_H
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// Predicated where support
|
// Predicated where support
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
template<class iobj,class vobj,class robj>
|
template <class iobj, class vobj, class robj>
|
||||||
inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) {
|
inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
|
||||||
|
const robj &iffalse) {
|
||||||
|
typename std::remove_const<vobj>::type ret;
|
||||||
|
|
||||||
typename std::remove_const<vobj>::type ret;
|
typedef typename vobj::scalar_object scalar_object;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
typedef typename vobj::scalar_object scalar_object;
|
const int Nsimd = vobj::vector_type::Nsimd();
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
const int words = sizeof(vobj) / sizeof(vector_type);
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
|
|
||||||
const int Nsimd = vobj::vector_type::Nsimd();
|
std::vector<Integer> mask(Nsimd);
|
||||||
const int words = sizeof(vobj)/sizeof(vector_type);
|
std::vector<scalar_object> truevals(Nsimd);
|
||||||
|
std::vector<scalar_object> falsevals(Nsimd);
|
||||||
|
|
||||||
std::vector<Integer> mask(Nsimd);
|
extract(iftrue, truevals);
|
||||||
std::vector<scalar_object> truevals (Nsimd);
|
extract(iffalse, falsevals);
|
||||||
std::vector<scalar_object> falsevals(Nsimd);
|
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||||
|
|
||||||
extract(iftrue ,truevals);
|
for (int s = 0; s < Nsimd; s++) {
|
||||||
extract(iffalse ,falsevals);
|
if (mask[s]) falsevals[s] = truevals[s];
|
||||||
extract<vInteger,Integer>(TensorRemove(predicate),mask);
|
|
||||||
|
|
||||||
for(int s=0;s<Nsimd;s++){
|
|
||||||
if (mask[s]) falsevals[s]=truevals[s];
|
|
||||||
}
|
|
||||||
|
|
||||||
merge(ret,falsevals);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
merge(ret, falsevals);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// recursive evaluation of expressions; Could
|
// recursive evaluation of expressions; Could
|
||||||
// switch to generic approach with variadics, a la
|
// switch to generic approach with variadics, a la
|
||||||
@ -75,303 +76,351 @@ namespace Grid {
|
|||||||
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
|
// leaf eval of lattice ; should enable if protect using traits
|
||||||
|
|
||||||
//leaf eval of lattice ; should enable if protect using traits
|
template <typename T>
|
||||||
|
using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||||
|
|
||||||
template <typename T> using is_lattice = std::is_base_of<LatticeBase,T >;
|
template <typename T>
|
||||||
|
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
|
||||||
|
|
||||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||||
|
|
||||||
|
//Specialization of getVectorType for lattices
|
||||||
|
template<typename T>
|
||||||
|
struct getVectorType<Lattice<T> >{
|
||||||
|
typedef typename Lattice<T>::vector_object type;
|
||||||
|
};
|
||||||
|
|
||||||
template<class sobj>
|
template<class sobj>
|
||||||
inline sobj eval(const unsigned int ss, const sobj &arg)
|
inline sobj eval(const unsigned int ss, const sobj &arg)
|
||||||
{
|
{
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
template<class lobj>
|
template <class lobj>
|
||||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg)
|
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) {
|
||||||
{
|
return arg._odata[ss];
|
||||||
return arg._odata[ss];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle nodes in syntax tree
|
// handle nodes in syntax tree
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
auto inline eval(const unsigned int ss, const LatticeUnaryExpression<Op,T1 > &expr) // eval one operand
|
auto inline eval(
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second))))
|
const unsigned int ss,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) // eval one operand
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)));
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
auto inline eval(const unsigned int ss, const LatticeBinaryExpression<Op,T1,T2> &expr) // eval two operands
|
auto inline eval(
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second))))
|
const unsigned int ss,
|
||||||
{
|
const LatticeBinaryExpression<Op, T1, T2> &expr) // eval two operands
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)));
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
auto inline eval(const unsigned int ss, const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) // eval three operands
|
auto inline eval(const unsigned int ss,
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second))))
|
const LatticeTrinaryExpression<Op, T1, T2, T3>
|
||||||
{
|
&expr) // eval three operands
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)) );
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)),
|
||||||
|
eval(ss, std::get<2>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)),
|
||||||
|
eval(ss, std::get<2>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Obtain the grid from an expression, ensuring conformable. This must follow a tree recursion
|
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||||
|
// tree recursion
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
|
template <class T1,
|
||||||
inline void GridFromExpression(GridBase * &grid,const T1& lat) // Lattice leaf
|
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
{
|
inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||||
if ( grid ) {
|
|
||||||
conformable(grid,lat._grid);
|
|
||||||
}
|
|
||||||
grid=lat._grid;
|
|
||||||
}
|
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
|
||||||
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
|
|
||||||
{
|
{
|
||||||
|
if (grid) {
|
||||||
|
conformable(grid, lat._grid);
|
||||||
|
}
|
||||||
|
grid = lat._grid;
|
||||||
}
|
}
|
||||||
|
template <class T1,
|
||||||
|
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void GridFromExpression(GridBase *&grid,
|
||||||
|
const T1 ¬lat) // non-lattice leaf
|
||||||
|
{}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void GridFromExpression(GridBase *&grid,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
inline void GridFromExpression(GridBase * &grid,const LatticeBinaryExpression<Op,T1,T2> &expr)
|
inline void GridFromExpression(
|
||||||
{
|
GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
GridFromExpression(grid,std::get<1>(expr.second));
|
GridFromExpression(grid, std::get<1>(expr.second));
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
inline void GridFromExpression(
|
||||||
{
|
GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
GridFromExpression(grid,std::get<1>(expr.second));
|
GridFromExpression(grid, std::get<1>(expr.second));
|
||||||
GridFromExpression(grid,std::get<2>(expr.second));
|
GridFromExpression(grid, std::get<2>(expr.second));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion
|
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||||
|
// tree recursion
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
|
template <class T1,
|
||||||
inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
|
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||||
{
|
{
|
||||||
if ( (cb==Odd) || (cb==Even) ) {
|
if ((cb == Odd) || (cb == Even)) {
|
||||||
assert(cb==lat.checkerboard);
|
assert(cb == lat.checkerboard);
|
||||||
}
|
}
|
||||||
cb=lat.checkerboard;
|
cb = lat.checkerboard;
|
||||||
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
template <class T1,
|
||||||
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
|
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf
|
||||||
{
|
{
|
||||||
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void CBFromExpression(int &cb,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr)
|
inline void CBFromExpression(int &cb,
|
||||||
{
|
const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb, std::get<1>(expr.second));
|
||||||
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
inline void CBFromExpression(
|
||||||
{
|
int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb, std::get<1>(expr.second));
|
||||||
CBFromExpression(cb,std::get<2>(expr.second));
|
CBFromExpression(cb, std::get<2>(expr.second));
|
||||||
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Unary operators and funcs
|
// Unary operators and funcs
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#define GridUnopClass(name,ret)\
|
#define GridUnopClass(name, ret) \
|
||||||
template <class arg> struct name\
|
template <class arg> \
|
||||||
{\
|
struct name { \
|
||||||
static auto inline func(const arg a)-> decltype(ret) { return ret; } \
|
static auto inline func(const arg a) -> decltype(ret) { return ret; } \
|
||||||
};
|
};
|
||||||
|
|
||||||
GridUnopClass(UnarySub,-a);
|
GridUnopClass(UnarySub, -a);
|
||||||
GridUnopClass(UnaryNot,Not(a));
|
GridUnopClass(UnaryNot, Not(a));
|
||||||
GridUnopClass(UnaryAdj,adj(a));
|
GridUnopClass(UnaryAdj, adj(a));
|
||||||
GridUnopClass(UnaryConj,conjugate(a));
|
GridUnopClass(UnaryConj, conjugate(a));
|
||||||
GridUnopClass(UnaryTrace,trace(a));
|
GridUnopClass(UnaryTrace, trace(a));
|
||||||
GridUnopClass(UnaryTranspose,transpose(a));
|
GridUnopClass(UnaryTranspose, transpose(a));
|
||||||
GridUnopClass(UnaryTa,Ta(a));
|
GridUnopClass(UnaryTa, Ta(a));
|
||||||
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
|
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||||
GridUnopClass(UnaryReal,real(a));
|
GridUnopClass(UnaryReal, real(a));
|
||||||
GridUnopClass(UnaryImag,imag(a));
|
GridUnopClass(UnaryImag, imag(a));
|
||||||
GridUnopClass(UnaryToReal,toReal(a));
|
GridUnopClass(UnaryToReal, toReal(a));
|
||||||
GridUnopClass(UnaryToComplex,toComplex(a));
|
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||||
GridUnopClass(UnaryAbs,abs(a));
|
GridUnopClass(UnaryTimesI, timesI(a));
|
||||||
GridUnopClass(UnarySqrt,sqrt(a));
|
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||||
GridUnopClass(UnaryRsqrt,rsqrt(a));
|
GridUnopClass(UnaryAbs, abs(a));
|
||||||
GridUnopClass(UnarySin,sin(a));
|
GridUnopClass(UnarySqrt, sqrt(a));
|
||||||
GridUnopClass(UnaryCos,cos(a));
|
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
||||||
GridUnopClass(UnaryLog,log(a));
|
GridUnopClass(UnarySin, sin(a));
|
||||||
GridUnopClass(UnaryExp,exp(a));
|
GridUnopClass(UnaryCos, cos(a));
|
||||||
|
GridUnopClass(UnaryAsin, asin(a));
|
||||||
|
GridUnopClass(UnaryAcos, acos(a));
|
||||||
|
GridUnopClass(UnaryLog, log(a));
|
||||||
|
GridUnopClass(UnaryExp, exp(a));
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Binary operators
|
// Binary operators
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#define GridBinOpClass(name,combination)\
|
#define GridBinOpClass(name, combination) \
|
||||||
template <class left,class right>\
|
template <class left, class right> \
|
||||||
struct name\
|
struct name { \
|
||||||
{\
|
static auto inline func(const left &lhs, const right &rhs) \
|
||||||
static auto inline func(const left &lhs,const right &rhs)-> decltype(combination) const \
|
-> decltype(combination) const { \
|
||||||
{\
|
return combination; \
|
||||||
return combination;\
|
} \
|
||||||
}\
|
}
|
||||||
}
|
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||||
GridBinOpClass(BinaryAdd,lhs+rhs);
|
GridBinOpClass(BinarySub, lhs - rhs);
|
||||||
GridBinOpClass(BinarySub,lhs-rhs);
|
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||||
GridBinOpClass(BinaryMul,lhs*rhs);
|
|
||||||
|
|
||||||
GridBinOpClass(BinaryAnd ,lhs&rhs);
|
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||||
GridBinOpClass(BinaryOr ,lhs|rhs);
|
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||||
GridBinOpClass(BinaryAndAnd,lhs&&rhs);
|
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||||
GridBinOpClass(BinaryOrOr ,lhs||rhs);
|
GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// Trinary conditional op
|
// Trinary conditional op
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
#define GridTrinOpClass(name,combination)\
|
#define GridTrinOpClass(name, combination) \
|
||||||
template <class predicate,class left, class right> \
|
template <class predicate, class left, class right> \
|
||||||
struct name\
|
struct name { \
|
||||||
{\
|
static auto inline func(const predicate &pred, const left &lhs, \
|
||||||
static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \
|
const right &rhs) -> decltype(combination) const { \
|
||||||
{\
|
return combination; \
|
||||||
return combination;\
|
} \
|
||||||
}\
|
}
|
||||||
}
|
|
||||||
|
|
||||||
GridTrinOpClass(TrinaryWhere,(predicatedWhere<predicate, \
|
GridTrinOpClass(
|
||||||
typename std::remove_reference<left>::type, \
|
TrinaryWhere,
|
||||||
typename std::remove_reference<right>::type> (pred,lhs,rhs)));
|
(predicatedWhere<predicate, typename std::remove_reference<left>::type,
|
||||||
|
typename std::remove_reference<right>::type>(pred, lhs,
|
||||||
|
rhs)));
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Operator syntactical glue
|
// Operator syntactical glue
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
|
||||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
|
||||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
|
||||||
|
|
||||||
#define GRID_DEF_UNOP(op, name)\
|
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||||
template <typename T1,\
|
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr> inline auto op(const T1 &arg) \
|
#define GRID_TRINOP(name) \
|
||||||
-> decltype(LatticeUnaryExpression<GRID_UNOP(name),const T1&>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg)))) \
|
name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||||
{ return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg))); }
|
|
||||||
|
|
||||||
#define GRID_BINOP_LEFT(op, name)\
|
#define GRID_DEF_UNOP(op, name) \
|
||||||
template <typename T1,typename T2,\
|
template <typename T1, \
|
||||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr>\
|
typename std::enable_if<is_lattice<T1>::value || \
|
||||||
inline auto op(const T1 &lhs,const T2&rhs) \
|
is_lattice_expr<T1>::value, \
|
||||||
-> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
T1>::type * = nullptr> \
|
||||||
std::forward_as_tuple(lhs, rhs)))) \
|
inline auto op(const T1 &arg) \
|
||||||
{\
|
->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
|
||||||
std::forward_as_tuple(lhs, rhs))); \
|
return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||||
}
|
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_BINOP_RIGHT(op, name)\
|
#define GRID_BINOP_LEFT(op, name) \
|
||||||
template <typename T1,typename T2,\
|
template <typename T1, typename T2, \
|
||||||
typename std::enable_if<!is_lattice<T1>::value && !is_lattice_expr<T1>::value, T1>::type* = nullptr,\
|
typename std::enable_if<is_lattice<T1>::value || \
|
||||||
typename std::enable_if< is_lattice<T2>::value || is_lattice_expr<T2>::value, T2>::type* = nullptr> \
|
is_lattice_expr<T1>::value, \
|
||||||
inline auto op(const T1 &lhs,const T2&rhs) \
|
T1>::type * = nullptr> \
|
||||||
-> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||||
std::forward_as_tuple(lhs, rhs)))) \
|
->decltype( \
|
||||||
{\
|
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
std::make_pair(GRID_BINOP(name)(), \
|
||||||
std::forward_as_tuple(lhs, rhs))); \
|
std::forward_as_tuple(lhs, rhs)))) { \
|
||||||
}
|
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_DEF_BINOP(op, name)\
|
#define GRID_BINOP_RIGHT(op, name) \
|
||||||
GRID_BINOP_LEFT(op,name);\
|
template <typename T1, typename T2, \
|
||||||
GRID_BINOP_RIGHT(op,name);
|
typename std::enable_if<!is_lattice<T1>::value && \
|
||||||
|
!is_lattice_expr<T1>::value, \
|
||||||
|
T1>::type * = nullptr, \
|
||||||
|
typename std::enable_if<is_lattice<T2>::value || \
|
||||||
|
is_lattice_expr<T2>::value, \
|
||||||
|
T2>::type * = nullptr> \
|
||||||
|
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||||
|
->decltype( \
|
||||||
|
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), \
|
||||||
|
std::forward_as_tuple(lhs, rhs)))) { \
|
||||||
|
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GRID_DEF_BINOP(op, name) \
|
||||||
|
GRID_BINOP_LEFT(op, name); \
|
||||||
|
GRID_BINOP_RIGHT(op, name);
|
||||||
|
|
||||||
#define GRID_DEF_TRINOP(op, name)\
|
#define GRID_DEF_TRINOP(op, name) \
|
||||||
template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,const T2&lhs,const T3 &rhs) \
|
template <typename T1, typename T2, typename T3> \
|
||||||
-> decltype(LatticeTrinaryExpression<GRID_TRINOP(name),const T1&,const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(),\
|
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||||
std::forward_as_tuple(pred,lhs,rhs)))) \
|
->decltype( \
|
||||||
{\
|
LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||||
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(), \
|
const T3 &>(std::make_pair( \
|
||||||
std::forward_as_tuple(pred,lhs, rhs))); \
|
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \
|
||||||
}
|
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||||
|
const T3 &>(std::make_pair( \
|
||||||
|
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \
|
||||||
|
}
|
||||||
////////////////////////
|
////////////////////////
|
||||||
//Operator definitions
|
// Operator definitions
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
GRID_DEF_UNOP(operator -,UnarySub);
|
GRID_DEF_UNOP(operator-, UnarySub);
|
||||||
GRID_DEF_UNOP(Not,UnaryNot);
|
GRID_DEF_UNOP(Not, UnaryNot);
|
||||||
GRID_DEF_UNOP(operator !,UnaryNot);
|
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||||
GRID_DEF_UNOP(adj,UnaryAdj);
|
GRID_DEF_UNOP(adj, UnaryAdj);
|
||||||
GRID_DEF_UNOP(conjugate,UnaryConj);
|
GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||||
GRID_DEF_UNOP(trace,UnaryTrace);
|
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||||
GRID_DEF_UNOP(Ta,UnaryTa);
|
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||||
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
|
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||||
GRID_DEF_UNOP(real,UnaryReal);
|
GRID_DEF_UNOP(real, UnaryReal);
|
||||||
GRID_DEF_UNOP(imag,UnaryImag);
|
GRID_DEF_UNOP(imag, UnaryImag);
|
||||||
GRID_DEF_UNOP(toReal,UnaryToReal);
|
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||||
GRID_DEF_UNOP(toComplex,UnaryToComplex);
|
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||||
GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
|
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||||
GRID_DEF_UNOP(sqrt ,UnarySqrt);
|
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||||
GRID_DEF_UNOP(rsqrt,UnaryRsqrt);
|
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||||
GRID_DEF_UNOP(sin ,UnarySin);
|
// abs-fabs-dabs-labs thing
|
||||||
GRID_DEF_UNOP(cos ,UnaryCos);
|
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||||
GRID_DEF_UNOP(log ,UnaryLog);
|
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
||||||
GRID_DEF_UNOP(exp ,UnaryExp);
|
GRID_DEF_UNOP(sin, UnarySin);
|
||||||
|
GRID_DEF_UNOP(cos, UnaryCos);
|
||||||
|
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||||
|
GRID_DEF_UNOP(acos, UnaryAcos);
|
||||||
|
GRID_DEF_UNOP(log, UnaryLog);
|
||||||
|
GRID_DEF_UNOP(exp, UnaryExp);
|
||||||
|
|
||||||
GRID_DEF_BINOP(operator+,BinaryAdd);
|
GRID_DEF_BINOP(operator+, BinaryAdd);
|
||||||
GRID_DEF_BINOP(operator-,BinarySub);
|
GRID_DEF_BINOP(operator-, BinarySub);
|
||||||
GRID_DEF_BINOP(operator*,BinaryMul);
|
GRID_DEF_BINOP(operator*, BinaryMul);
|
||||||
|
|
||||||
GRID_DEF_BINOP(operator&,BinaryAnd);
|
GRID_DEF_BINOP(operator&, BinaryAnd);
|
||||||
GRID_DEF_BINOP(operator|,BinaryOr);
|
GRID_DEF_BINOP(operator|, BinaryOr);
|
||||||
GRID_DEF_BINOP(operator&&,BinaryAndAnd);
|
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
|
||||||
GRID_DEF_BINOP(operator||,BinaryOrOr);
|
GRID_DEF_BINOP(operator||, BinaryOrOr);
|
||||||
|
|
||||||
GRID_DEF_TRINOP(where,TrinaryWhere);
|
GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Closure convenience to force expression to evaluate
|
// Closure convenience to force expression to evaluate
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
template<class Op,class T1>
|
template <class Op, class T1>
|
||||||
auto closure(const LatticeUnaryExpression<Op,T1> & expr)
|
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))>
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret(
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))> ret(expr);
|
expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class Op,class T1, class T2>
|
template <class Op, class T1, class T2>
|
||||||
auto closure(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second))))>
|
eval(0, std::get<1>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
eval(0, std::get<1>(expr.second))))>
|
||||||
eval(0,std::get<1>(expr.second))))> ret(expr);
|
ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class Op,class T1, class T2, class T3>
|
template <class Op, class T1, class T2, class T3>
|
||||||
auto closure(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second)),
|
eval(0, std::get<1>(expr.second)),
|
||||||
eval(0,std::get<2>(expr.second))))>
|
eval(0, std::get<2>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
eval(0, std::get<1>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second)),
|
eval(0, std::get<2>(expr.second))))>
|
||||||
eval(0,std::get<2>(expr.second))))> ret(expr);
|
ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -382,12 +431,11 @@ template<class Op,class T1, class T2, class T3>
|
|||||||
#undef GRID_DEF_UNOP
|
#undef GRID_DEF_UNOP
|
||||||
#undef GRID_DEF_BINOP
|
#undef GRID_DEF_BINOP
|
||||||
#undef GRID_DEF_TRINOP
|
#undef GRID_DEF_TRINOP
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
int main(int argc,char **argv){
|
int main(int argc,char **argv){
|
||||||
|
|
||||||
Lattice<double> v1(16);
|
Lattice<double> v1(16);
|
||||||
@ -397,7 +445,7 @@ using namespace Grid;
|
|||||||
BinaryAdd<double,double> tmp;
|
BinaryAdd<double,double> tmp;
|
||||||
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
||||||
expr(std::make_pair(tmp,
|
expr(std::make_pair(tmp,
|
||||||
std::forward_as_tuple(v1,v2)));
|
std::forward_as_tuple(v1,v2)));
|
||||||
tmp.func(eval(0,v1),eval(0,v2));
|
tmp.func(eval(0,v1),eval(0,v2));
|
||||||
|
|
||||||
auto var = v1+v2;
|
auto var = v1+v2;
|
||||||
|
@ -1,32 +1,33 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/lattice/Lattice_base.h
|
Source file: ./lib/lattice/Lattice_base.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_BASE_H
|
#ifndef GRID_LATTICE_BASE_H
|
||||||
#define GRID_LATTICE_BASE_H
|
#define GRID_LATTICE_BASE_H
|
||||||
|
|
||||||
@ -101,6 +102,7 @@ public:
|
|||||||
int begin(void) { return 0;};
|
int begin(void) { return 0;};
|
||||||
int end(void) { return _odata.size(); }
|
int end(void) { return _odata.size(); }
|
||||||
vobj & operator[](int i) { return _odata[i]; };
|
vobj & operator[](int i) { return _odata[i]; };
|
||||||
|
const vobj & operator[](int i) const { return _odata[i]; };
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
@ -255,6 +257,18 @@ PARALLEL_FOR_LOOP
|
|||||||
checkerboard=0;
|
checkerboard=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Lattice(const Lattice& r){ // copy constructor
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata.resize(_grid->oSites());// essential
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
virtual ~Lattice(void) = default;
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
@ -267,7 +281,7 @@ PARALLEL_FOR_LOOP
|
|||||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
this->checkerboard = r.checkerboard;
|
this->checkerboard = r.checkerboard;
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
this->_odata[ss]=r._odata[ss];
|
this->_odata[ss]=r._odata[ss];
|
||||||
@ -324,27 +338,27 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <lattice/Lattice_conformable.h>
|
#include "Lattice_conformable.h"
|
||||||
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
#include <lattice/Lattice_ET.h>
|
#include "Lattice_ET.h"
|
||||||
#else
|
#else
|
||||||
#include <lattice/Lattice_overload.h>
|
#include "Lattice_overload.h"
|
||||||
#endif
|
#endif
|
||||||
#include <lattice/Lattice_arith.h>
|
#include "Lattice_arith.h"
|
||||||
#include <lattice/Lattice_trace.h>
|
#include "Lattice_trace.h"
|
||||||
#include <lattice/Lattice_transpose.h>
|
#include "Lattice_transpose.h"
|
||||||
#include <lattice/Lattice_local.h>
|
#include "Lattice_local.h"
|
||||||
#include <lattice/Lattice_reduction.h>
|
#include "Lattice_reduction.h"
|
||||||
#include <lattice/Lattice_peekpoke.h>
|
#include "Lattice_peekpoke.h"
|
||||||
#include <lattice/Lattice_reality.h>
|
#include "Lattice_reality.h"
|
||||||
#include <lattice/Lattice_comparison_utils.h>
|
#include "Lattice_comparison_utils.h"
|
||||||
#include <lattice/Lattice_comparison.h>
|
#include "Lattice_comparison.h"
|
||||||
#include <lattice/Lattice_coordinate.h>
|
#include "Lattice_coordinate.h"
|
||||||
#include <lattice/Lattice_where.h>
|
#include "Lattice_where.h"
|
||||||
#include <lattice/Lattice_rng.h>
|
#include "Lattice_rng.h"
|
||||||
#include <lattice/Lattice_unary.h>
|
#include "Lattice_unary.h"
|
||||||
#include <lattice/Lattice_transfer.h>
|
#include "Lattice_transfer.h"
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -40,7 +40,7 @@ namespace Grid {
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||||
ComplexD nrm = innerProduct(arg,arg);
|
ComplexD nrm = innerProduct(arg,arg);
|
||||||
return real(nrm);
|
return std::real(nrm);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
|
@ -31,6 +31,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
|
// Have not enable RNG_SPRNG_SHA256 by default yet.
|
||||||
|
// Uncomment the following line to see the effect of the new RNG.
|
||||||
|
// #define RNG_SPRNG_SHA256
|
||||||
|
|
||||||
|
#ifdef RNG_SPRNG_SHA256
|
||||||
|
#include "rng/sprng-sha256.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
@ -110,7 +118,11 @@ namespace Grid {
|
|||||||
int _seeded;
|
int _seeded;
|
||||||
// One generator per site.
|
// One generator per site.
|
||||||
// Uniform and Gaussian distributions from these generators.
|
// Uniform and Gaussian distributions from these generators.
|
||||||
#ifdef RNG_RANLUX
|
#ifdef RNG_SPRNG_SHA256
|
||||||
|
typedef uint32_t RngStateType;
|
||||||
|
typedef SprngSha256 RngEngine;
|
||||||
|
static const int RngStateCount = 22;
|
||||||
|
#elif defined RNG_RANLUX
|
||||||
typedef uint64_t RngStateType;
|
typedef uint64_t RngStateType;
|
||||||
typedef std::ranlux48 RngEngine;
|
typedef std::ranlux48 RngEngine;
|
||||||
static const int RngStateCount = 15;
|
static const int RngStateCount = 15;
|
||||||
@ -273,6 +285,34 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef RNG_SPRNG_SHA256
|
||||||
|
template<class source> void Seed(source &src)
|
||||||
|
{
|
||||||
|
std::vector<int> gcoor;
|
||||||
|
|
||||||
|
long gsites = _grid->_gsites;
|
||||||
|
|
||||||
|
RngState rs;
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
splitRngState(rs, rs, src());
|
||||||
|
}
|
||||||
|
|
||||||
|
for(long gidx=0;gidx<gsites;gidx++){
|
||||||
|
|
||||||
|
int rank,o_idx,i_idx;
|
||||||
|
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||||
|
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||||
|
|
||||||
|
int l_idx=generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
|
if( rank == _grid->ThisRank() ){
|
||||||
|
splitRngState(_generators[l_idx].rs, rs, gidx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_seeded=1;
|
||||||
|
|
||||||
|
}
|
||||||
|
#else
|
||||||
// This loop could be made faster to avoid the Ahmdahl by
|
// This loop could be made faster to avoid the Ahmdahl by
|
||||||
// i) seed generators on each timeslice, for x=y=z=0;
|
// i) seed generators on each timeslice, for x=y=z=0;
|
||||||
// ii) seed generators on each z for x=y=0
|
// ii) seed generators on each z for x=y=0
|
||||||
@ -312,6 +352,7 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
_seeded=1;
|
_seeded=1;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//FIXME implement generic IO and create state save/restore
|
//FIXME implement generic IO and create state save/restore
|
||||||
//void SaveState(const std::string<char> &file);
|
//void SaveState(const std::string<char> &file);
|
||||||
|
@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
|||||||
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
|
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
|
||||||
}
|
}
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<ig->lSites();idx++){
|
for(int idx=0;idx<ig->lSites();idx++){
|
||||||
std::vector<int> lcoor(ni);
|
std::vector<int> lcoor(ni);
|
||||||
ig->LocalIndexToLocalCoor(idx,lcoor);
|
ig->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
@ -386,7 +386,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<lg->lSites();idx++){
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
std::vector<int> lcoor(nl);
|
std::vector<int> lcoor(nl);
|
||||||
std::vector<int> hcoor(nh);
|
std::vector<int> hcoor(nh);
|
||||||
@ -420,15 +420,15 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
|
|||||||
assert(hg->_processors[orthog]==1);
|
assert(hg->_processors[orthog]==1);
|
||||||
|
|
||||||
int dl; dl = 0;
|
int dl; dl = 0;
|
||||||
for(int d=0;d<nh;d++){
|
for(int d=0;d<nh;d++){
|
||||||
if ( d != orthog) {
|
if ( d != orthog) {
|
||||||
assert(lg->_processors[dl] == hg->_processors[d]);
|
assert(lg->_processors[dl] == hg->_processors[d]);
|
||||||
assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
|
assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
|
||||||
dl++;
|
dl++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<lg->lSites();idx++){
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
std::vector<int> lcoor(nl);
|
std::vector<int> lcoor(nl);
|
||||||
std::vector<int> hcoor(nh);
|
std::vector<int> hcoor(nh);
|
||||||
@ -446,6 +446,79 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
GridBase *lg = lowDim._grid;
|
||||||
|
GridBase *hg = higherDim._grid;
|
||||||
|
int nl = lg->_ndimension;
|
||||||
|
int nh = hg->_ndimension;
|
||||||
|
|
||||||
|
assert(nl == nh);
|
||||||
|
assert(orthog<nh);
|
||||||
|
assert(orthog>=0);
|
||||||
|
|
||||||
|
for(int d=0;d<nh;d++){
|
||||||
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the above should guarantee that the operations are local
|
||||||
|
//PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
|
std::vector<int> lcoor(nl);
|
||||||
|
std::vector<int> hcoor(nh);
|
||||||
|
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
|
hcoor=lcoor;
|
||||||
|
hcoor[orthog] = slice_hi;
|
||||||
|
peekLocalSite(s,lowDim,lcoor);
|
||||||
|
pokeLocalSite(s,higherDim,hcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
GridBase *lg = lowDim._grid;
|
||||||
|
GridBase *hg = higherDim._grid;
|
||||||
|
int nl = lg->_ndimension;
|
||||||
|
int nh = hg->_ndimension;
|
||||||
|
|
||||||
|
assert(nl == nh);
|
||||||
|
assert(orthog<nh);
|
||||||
|
assert(orthog>=0);
|
||||||
|
|
||||||
|
for(int d=0;d<nh;d++){
|
||||||
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the above should guarantee that the operations are local
|
||||||
|
//PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
|
std::vector<int> lcoor(nl);
|
||||||
|
std::vector<int> hcoor(nh);
|
||||||
|
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
|
hcoor=lcoor;
|
||||||
|
hcoor[orthog] = slice_hi;
|
||||||
|
peekLocalSite(s,higherDim,hcoor);
|
||||||
|
pokeLocalSite(s,lowDim,lcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||||
{
|
{
|
||||||
@ -482,6 +555,96 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||||
|
template<typename vobj, typename sobj>
|
||||||
|
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){
|
||||||
|
typedef typename vobj::vector_type vtype;
|
||||||
|
|
||||||
|
GridBase* in_grid = in._grid;
|
||||||
|
out.resize(in_grid->lSites());
|
||||||
|
|
||||||
|
int ndim = in_grid->Nd();
|
||||||
|
int in_nsimd = vtype::Nsimd();
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > in_icoor(in_nsimd);
|
||||||
|
|
||||||
|
for(int lane=0; lane < in_nsimd; lane++){
|
||||||
|
in_icoor[lane].resize(ndim);
|
||||||
|
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||||
|
}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||||
|
//Assemble vector of pointers to output elements
|
||||||
|
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||||
|
|
||||||
|
std::vector<int> in_ocoor(ndim);
|
||||||
|
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||||
|
|
||||||
|
std::vector<int> lcoor(in_grid->Nd());
|
||||||
|
|
||||||
|
for(int lane=0; lane < in_nsimd; lane++){
|
||||||
|
for(int mu=0;mu<ndim;mu++)
|
||||||
|
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||||
|
|
||||||
|
int lex;
|
||||||
|
Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions);
|
||||||
|
out_ptrs[lane] = &out[lex];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Unpack into those ptrs
|
||||||
|
const vobj & in_vobj = in._odata[in_oidx];
|
||||||
|
extract1(in_vobj, out_ptrs, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Convert a Lattice from one precision to another
|
||||||
|
template<class VobjOut, class VobjIn>
|
||||||
|
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||||
|
assert(out._grid->Nd() == in._grid->Nd());
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
GridBase *in_grid=in._grid;
|
||||||
|
GridBase *out_grid = out._grid;
|
||||||
|
|
||||||
|
typedef typename VobjOut::scalar_object SobjOut;
|
||||||
|
typedef typename VobjIn::scalar_object SobjIn;
|
||||||
|
|
||||||
|
int ndim = out._grid->Nd();
|
||||||
|
int out_nsimd = out_grid->Nsimd();
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > out_icoor(out_nsimd);
|
||||||
|
|
||||||
|
for(int lane=0; lane < out_nsimd; lane++){
|
||||||
|
out_icoor[lane].resize(ndim);
|
||||||
|
out_grid->iCoorFromIindex(out_icoor[lane], lane);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<SobjOut> in_slex_conv(in_grid->lSites());
|
||||||
|
unvectorizeToLexOrdArray(in_slex_conv, in);
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){
|
||||||
|
std::vector<int> out_ocoor(ndim);
|
||||||
|
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
|
||||||
|
|
||||||
|
std::vector<SobjOut*> ptrs(out_nsimd);
|
||||||
|
|
||||||
|
std::vector<int> lcoor(out_grid->Nd());
|
||||||
|
|
||||||
|
for(int lane=0; lane < out_nsimd; lane++){
|
||||||
|
for(int mu=0;mu<ndim;mu++)
|
||||||
|
lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu];
|
||||||
|
|
||||||
|
int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions);
|
||||||
|
ptrs[lane] = &in_slex_conv[llex];
|
||||||
|
}
|
||||||
|
merge(out._odata[out_oidx], ptrs, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
353
lib/lattice/rng/rng-state.h
Normal file
353
lib/lattice/rng/rng-state.h
Normal file
@ -0,0 +1,353 @@
|
|||||||
|
// vim: set ts=2 sw=2 expandtab:
|
||||||
|
|
||||||
|
// Copyright (c) 2016 Luchang Jin
|
||||||
|
// All rights reserved.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef INCLUDE_RNG_STATE_H
|
||||||
|
#define INCLUDE_RNG_STATE_H
|
||||||
|
|
||||||
|
#include "show.h"
|
||||||
|
|
||||||
|
#ifndef USE_OPENSSL
|
||||||
|
#include "sha256.h"
|
||||||
|
#else
|
||||||
|
#include <openssl/sha.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <endian.h>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cassert>
|
||||||
|
#include <string>
|
||||||
|
#include <ostream>
|
||||||
|
#include <istream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
namespace CURRENT_DEFAULT_NAMESPACE_NAME {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct RngState;
|
||||||
|
|
||||||
|
inline void reset(RngState& rs);
|
||||||
|
|
||||||
|
inline void reset(RngState& rs, const std::string& seed);
|
||||||
|
|
||||||
|
inline void reset(RngState& rs, const long seed)
|
||||||
|
{
|
||||||
|
reset(rs, show(seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void splitRngState(RngState& rs, const RngState& rs0, const std::string& sindex);
|
||||||
|
|
||||||
|
inline void splitRngState(RngState& rs, const RngState& rs0, const long sindex = 0)
|
||||||
|
{
|
||||||
|
splitRngState(rs, rs0, show(sindex));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64_t randGen(RngState& rs);
|
||||||
|
|
||||||
|
inline double uRandGen(RngState& rs, const double upper = 1.0, const double lower = 0.0);
|
||||||
|
|
||||||
|
inline double gRandGen(RngState& rs, const double sigma = 1.0, const double center = 0.0);
|
||||||
|
|
||||||
|
inline void computeHashWithInput(uint32_t hash[8], const RngState& rs, const std::string& input);
|
||||||
|
|
||||||
|
struct RngState
|
||||||
|
{
|
||||||
|
uint64_t numBytes;
|
||||||
|
uint32_t hash[8];
|
||||||
|
unsigned long index;
|
||||||
|
//
|
||||||
|
uint64_t cache[3];
|
||||||
|
double gaussian;
|
||||||
|
int cacheAvail;
|
||||||
|
bool gaussianAvail;
|
||||||
|
//
|
||||||
|
inline void init()
|
||||||
|
{
|
||||||
|
reset(*this);
|
||||||
|
}
|
||||||
|
//
|
||||||
|
RngState()
|
||||||
|
{
|
||||||
|
init();
|
||||||
|
}
|
||||||
|
RngState(const std::string& seed)
|
||||||
|
{
|
||||||
|
reset(*this, seed);
|
||||||
|
}
|
||||||
|
RngState(const long seed)
|
||||||
|
{
|
||||||
|
reset(*this, seed);
|
||||||
|
}
|
||||||
|
RngState(const RngState& rs0, const std::string& sindex)
|
||||||
|
{
|
||||||
|
splitRngState(*this, rs0, sindex);
|
||||||
|
}
|
||||||
|
RngState(const RngState& rs0, const long sindex)
|
||||||
|
{
|
||||||
|
splitRngState(*this, rs0, sindex);
|
||||||
|
}
|
||||||
|
//
|
||||||
|
RngState split(const std::string& sindex)
|
||||||
|
{
|
||||||
|
RngState rs(*this, sindex);
|
||||||
|
return rs;
|
||||||
|
}
|
||||||
|
RngState split(const long sindex)
|
||||||
|
{
|
||||||
|
RngState rs(*this, sindex);
|
||||||
|
return rs;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const size_t RNG_STATE_NUM_OF_INT32 = 2 + 8 + 2 + 3 * 2 + 2 + 1 + 1;
|
||||||
|
|
||||||
|
inline uint64_t patchTwoUint32(const uint32_t a, const uint32_t b)
|
||||||
|
{
|
||||||
|
return (uint64_t)a << 32 | (uint64_t)b;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void splitTwoUint32(uint32_t& a, uint32_t& b, const uint64_t x)
|
||||||
|
{
|
||||||
|
b = (uint32_t)x;
|
||||||
|
a = (uint32_t)(x >> 32);
|
||||||
|
assert(x == patchTwoUint32(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void exportRngState(uint32_t* v, const RngState& rs)
|
||||||
|
{
|
||||||
|
assert(22 == RNG_STATE_NUM_OF_INT32);
|
||||||
|
splitTwoUint32(v[0], v[1], rs.numBytes);
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
v[2 + i] = rs.hash[i];
|
||||||
|
}
|
||||||
|
splitTwoUint32(v[10], v[11], rs.index);
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
splitTwoUint32(v[12 + i * 2], v[12 + i * 2 + 1], rs.cache[i]);
|
||||||
|
}
|
||||||
|
union {
|
||||||
|
double d;
|
||||||
|
uint64_t l;
|
||||||
|
} g;
|
||||||
|
g.d = rs.gaussian;
|
||||||
|
splitTwoUint32(v[18], v[19], g.l);
|
||||||
|
v[20] = rs.cacheAvail;
|
||||||
|
v[21] = rs.gaussianAvail;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void importRngState(RngState& rs, const uint32_t* v)
|
||||||
|
{
|
||||||
|
assert(22 == RNG_STATE_NUM_OF_INT32);
|
||||||
|
rs.numBytes = patchTwoUint32(v[0], v[1]);
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
rs.hash[i] = v[2 + i];
|
||||||
|
}
|
||||||
|
rs.index = patchTwoUint32(v[10], v[11]);
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
rs.cache[i] = patchTwoUint32(v[12 + i * 2], v[12 + i * 2 + 1]);
|
||||||
|
}
|
||||||
|
union {
|
||||||
|
double d;
|
||||||
|
uint64_t l;
|
||||||
|
} g;
|
||||||
|
g.l = patchTwoUint32(v[18], v[19]);
|
||||||
|
rs.gaussian = g.d;
|
||||||
|
rs.cacheAvail = v[20];
|
||||||
|
rs.gaussianAvail = v[21];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void exportRngState(std::vector<uint32_t>& v, const RngState& rs)
|
||||||
|
{
|
||||||
|
v.resize(RNG_STATE_NUM_OF_INT32);
|
||||||
|
exportRngState(v.data(), rs);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void importRngState(RngState& rs, const std::vector<uint32_t>& v)
|
||||||
|
{
|
||||||
|
assert(RNG_STATE_NUM_OF_INT32 == v.size());
|
||||||
|
importRngState(rs, v.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::ostream& operator<<(std::ostream& os, const RngState& rs)
|
||||||
|
{
|
||||||
|
std::vector<uint32_t> v(RNG_STATE_NUM_OF_INT32);
|
||||||
|
exportRngState(v, rs);
|
||||||
|
for (size_t i = 0; i < v.size() - 1; ++i) {
|
||||||
|
os << v[i] << " ";
|
||||||
|
}
|
||||||
|
os << v.back();
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::istream& operator>>(std::istream& is, RngState& rs)
|
||||||
|
{
|
||||||
|
std::vector<uint32_t> v(RNG_STATE_NUM_OF_INT32);
|
||||||
|
for (size_t i = 0; i < v.size(); ++i) {
|
||||||
|
is >> v[i];
|
||||||
|
}
|
||||||
|
importRngState(rs, v);
|
||||||
|
return is;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const RngState& rs)
|
||||||
|
{
|
||||||
|
return shows(rs);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator==(const RngState& rs1, const RngState& rs2)
|
||||||
|
{
|
||||||
|
return 0 == memcmp(&rs1, &rs2, sizeof(RngState));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void reset(RngState& rs)
|
||||||
|
{
|
||||||
|
std::memset(&rs, 0, sizeof(RngState));
|
||||||
|
rs.numBytes = 0;
|
||||||
|
rs.hash[0] = 0;
|
||||||
|
rs.hash[1] = 0;
|
||||||
|
rs.hash[2] = 0;
|
||||||
|
rs.hash[3] = 0;
|
||||||
|
rs.hash[4] = 0;
|
||||||
|
rs.hash[5] = 0;
|
||||||
|
rs.hash[6] = 0;
|
||||||
|
rs.hash[7] = 0;
|
||||||
|
rs.index = 0;
|
||||||
|
rs.cache[0] = 0;
|
||||||
|
rs.cache[1] = 0;
|
||||||
|
rs.cache[2] = 0;
|
||||||
|
rs.gaussian = 0.0;
|
||||||
|
rs.cacheAvail = 0;
|
||||||
|
rs.gaussianAvail = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void reset(RngState& rs, const std::string& seed)
|
||||||
|
{
|
||||||
|
reset(rs);
|
||||||
|
splitRngState(rs, rs, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void computeHashWithInput(uint32_t hash[8], const RngState& rs, const std::string& input)
|
||||||
|
{
|
||||||
|
std::string data(32, ' ');
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
data[i*4 + 0] = (rs.hash[i] >> 24) & 0xFF;
|
||||||
|
data[i*4 + 1] = (rs.hash[i] >> 16) & 0xFF;
|
||||||
|
data[i*4 + 2] = (rs.hash[i] >> 8) & 0xFF;
|
||||||
|
data[i*4 + 3] = rs.hash[i] & 0xFF;
|
||||||
|
}
|
||||||
|
data += input;
|
||||||
|
#ifndef USE_OPENSSL
|
||||||
|
sha256::computeHash(hash, (const uint8_t*)data.c_str(), data.length());
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
uint8_t rawHash[32];
|
||||||
|
SHA256((unsigned char*)data.c_str(), data.length(), rawHash);
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
hash[i] = (((uint32_t)rawHash[i*4 + 0]) << 24)
|
||||||
|
+ (((uint32_t)rawHash[i*4 + 1]) << 16)
|
||||||
|
+ (((uint32_t)rawHash[i*4 + 2]) << 8)
|
||||||
|
+ ( (uint32_t)rawHash[i*4 + 3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void splitRngState(RngState& rs, const RngState& rs0, const std::string& sindex)
|
||||||
|
// produce a new rng ``rs'' uniquely identified by ``rs0'' and ``sindex''
|
||||||
|
// will not affect old rng ``rs0''
|
||||||
|
// the function should behave correctly even if ``rs'' is actually ``rs0''
|
||||||
|
{
|
||||||
|
std::string input = ssprintf("[%lu] {%s}", rs0.index, sindex.c_str());
|
||||||
|
rs.numBytes = rs0.numBytes + 64 * ((32 + input.length() + 1 + 8 - 1) / 64 + 1);
|
||||||
|
computeHashWithInput(rs.hash, rs0, input);
|
||||||
|
rs.index = 0;
|
||||||
|
rs.cache[0] = 0;
|
||||||
|
rs.cache[1] = 0;
|
||||||
|
rs.cache[2] = 0;
|
||||||
|
rs.gaussian = 0.0;
|
||||||
|
rs.cacheAvail = 0;
|
||||||
|
rs.gaussianAvail = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64_t randGen(RngState& rs)
|
||||||
|
{
|
||||||
|
assert(0 <= rs.cacheAvail && rs.cacheAvail <= 3);
|
||||||
|
rs.index += 1;
|
||||||
|
if (rs.cacheAvail > 0) {
|
||||||
|
rs.cacheAvail -= 1;
|
||||||
|
uint64_t r = rs.cache[rs.cacheAvail];
|
||||||
|
rs.cache[rs.cacheAvail] = 0;
|
||||||
|
return r;
|
||||||
|
} else {
|
||||||
|
uint32_t hash[8];
|
||||||
|
computeHashWithInput(hash, rs, ssprintf("[%lu]", rs.index));
|
||||||
|
rs.cache[0] = patchTwoUint32(hash[0], hash[1]);
|
||||||
|
rs.cache[1] = patchTwoUint32(hash[2], hash[3]);
|
||||||
|
rs.cache[2] = patchTwoUint32(hash[4], hash[5]);
|
||||||
|
rs.cacheAvail = 3;
|
||||||
|
return patchTwoUint32(hash[6], hash[7]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double uRandGen(RngState& rs, const double upper, const double lower)
|
||||||
|
{
|
||||||
|
uint64_t u = randGen(rs);
|
||||||
|
const double fac = 1.0 / (256.0 * 256.0 * 256.0 * 256.0) / (256.0 * 256.0 * 256.0 * 256.0);
|
||||||
|
return u * fac * (upper - lower) + lower;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double gRandGen(RngState& rs, const double sigma, const double center)
|
||||||
|
{
|
||||||
|
rs.index += 1;
|
||||||
|
if (rs.gaussianAvail) {
|
||||||
|
rs.gaussianAvail = false;
|
||||||
|
return rs.gaussian * sigma + center;
|
||||||
|
} else {
|
||||||
|
// pick 2 uniform numbers in the square extending from
|
||||||
|
// -1 to 1 in each direction, see if they are in the
|
||||||
|
// unit circle, and try again if they are not.
|
||||||
|
int num_try = 1;
|
||||||
|
double v1, v2, rsq;
|
||||||
|
do {
|
||||||
|
v1 = uRandGen(rs, 1.0, -1.0);
|
||||||
|
v2 = uRandGen(rs, 1.0, -1.0);
|
||||||
|
if ((num_try % 1000)==0) {
|
||||||
|
printf("gRandGen : WARNING num_try=%d v1=%e v2=%e\n",num_try,v1,v2);
|
||||||
|
}
|
||||||
|
rsq = v1*v1 + v2*v2;
|
||||||
|
num_try++;
|
||||||
|
} while ((num_try < 10000) && (rsq >= 1.0 || rsq == 0));
|
||||||
|
if (num_try > 9999) {
|
||||||
|
printf("gRandGen : WARNING failed after 10000 tries (corrupted RNG?), returning ridiculous numbers (1e+10)\n");
|
||||||
|
return 1e+10;
|
||||||
|
}
|
||||||
|
double fac = std::sqrt(-2.0 * std::log(rsq)/rsq);
|
||||||
|
rs.gaussian = v1 * fac;
|
||||||
|
rs.gaussianAvail = true;
|
||||||
|
return v2 * fac * sigma + center;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
348
lib/lattice/rng/sha256.h
Normal file
348
lib/lattice/rng/sha256.h
Normal file
@ -0,0 +1,348 @@
|
|||||||
|
// vim: set ts=2 sw=2 expandtab:
|
||||||
|
|
||||||
|
// Copyright (c) 2016 Luchang Jin
|
||||||
|
// All rights reserved.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// Code within namespace sha256 are originally from Stephan Brumme.
|
||||||
|
// see http://create.stephan-brumme.com/disclaimer.html
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <endian.h>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cassert>
|
||||||
|
#include <string>
|
||||||
|
#include <ostream>
|
||||||
|
#include <istream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
namespace CURRENT_DEFAULT_NAMESPACE_NAME {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace sha256 {
|
||||||
|
|
||||||
|
const size_t BlockSize = 512 / 8;
|
||||||
|
|
||||||
|
const size_t HashBytes = 32;
|
||||||
|
|
||||||
|
const size_t HashValues = HashBytes / 4;
|
||||||
|
|
||||||
|
inline uint32_t rotate(uint32_t a, uint32_t c)
|
||||||
|
{
|
||||||
|
return (a >> c) | (a << (32 - c));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint32_t swap(uint32_t x)
|
||||||
|
{
|
||||||
|
return (x >> 24) |
|
||||||
|
((x >> 8) & 0x0000FF00) |
|
||||||
|
((x << 8) & 0x00FF0000) |
|
||||||
|
(x << 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint32_t f1(uint32_t e, uint32_t f, uint32_t g)
|
||||||
|
// mix functions for processBlock()
|
||||||
|
{
|
||||||
|
uint32_t term1 = rotate(e, 6) ^ rotate(e, 11) ^ rotate(e, 25);
|
||||||
|
uint32_t term2 = (e & f) ^ (~e & g); //(g ^ (e & (f ^ g)))
|
||||||
|
return term1 + term2;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint32_t f2(uint32_t a, uint32_t b, uint32_t c)
|
||||||
|
// mix functions for processBlock()
|
||||||
|
{
|
||||||
|
uint32_t term1 = rotate(a, 2) ^ rotate(a, 13) ^ rotate(a, 22);
|
||||||
|
uint32_t term2 = ((a | b) & c) | (a & b); //(a & (b ^ c)) ^ (b & c);
|
||||||
|
return term1 + term2;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void processBlock(uint32_t newHash[8], const uint32_t oldHash[8], const uint8_t data[64])
|
||||||
|
// process 64 bytes of data
|
||||||
|
// newHash and oldHash and be the same
|
||||||
|
{
|
||||||
|
// get last hash
|
||||||
|
uint32_t a = oldHash[0];
|
||||||
|
uint32_t b = oldHash[1];
|
||||||
|
uint32_t c = oldHash[2];
|
||||||
|
uint32_t d = oldHash[3];
|
||||||
|
uint32_t e = oldHash[4];
|
||||||
|
uint32_t f = oldHash[5];
|
||||||
|
uint32_t g = oldHash[6];
|
||||||
|
uint32_t h = oldHash[7];
|
||||||
|
// data represented as 16x 32-bit words
|
||||||
|
const uint32_t* input = (uint32_t*) data;
|
||||||
|
// convert to big endian
|
||||||
|
uint32_t words[64];
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
#if defined(__BYTE_ORDER) && (__BYTE_ORDER != 0) && (__BYTE_ORDER == __BIG_ENDIAN)
|
||||||
|
words[i] = input[i];
|
||||||
|
#else
|
||||||
|
words[i] = swap(input[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
uint32_t x,y; // temporaries
|
||||||
|
// first round
|
||||||
|
x = h + f1(e,f,g) + 0x428a2f98 + words[ 0]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0x71374491 + words[ 1]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0xb5c0fbcf + words[ 2]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0xe9b5dba5 + words[ 3]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0x3956c25b + words[ 4]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0x59f111f1 + words[ 5]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0x923f82a4 + words[ 6]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0xab1c5ed5 + words[ 7]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// secound round
|
||||||
|
x = h + f1(e,f,g) + 0xd807aa98 + words[ 8]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0x12835b01 + words[ 9]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0x243185be + words[10]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0x550c7dc3 + words[11]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0x72be5d74 + words[12]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0x80deb1fe + words[13]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0x9bdc06a7 + words[14]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0xc19bf174 + words[15]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// extend to 24 words
|
||||||
|
for (; i < 24; i++)
|
||||||
|
words[i] = words[i-16] +
|
||||||
|
(rotate(words[i-15], 7) ^ rotate(words[i-15], 18) ^ (words[i-15] >> 3)) +
|
||||||
|
words[i-7] +
|
||||||
|
(rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10));
|
||||||
|
// third round
|
||||||
|
x = h + f1(e,f,g) + 0xe49b69c1 + words[16]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0xefbe4786 + words[17]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0x0fc19dc6 + words[18]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0x240ca1cc + words[19]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0x2de92c6f + words[20]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0x4a7484aa + words[21]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0x5cb0a9dc + words[22]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0x76f988da + words[23]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// extend to 32 words
|
||||||
|
for (; i < 32; i++)
|
||||||
|
words[i] = words[i-16] +
|
||||||
|
(rotate(words[i-15], 7) ^ rotate(words[i-15], 18) ^ (words[i-15] >> 3)) +
|
||||||
|
words[i-7] +
|
||||||
|
(rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10));
|
||||||
|
// fourth round
|
||||||
|
x = h + f1(e,f,g) + 0x983e5152 + words[24]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0xa831c66d + words[25]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0xb00327c8 + words[26]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0xbf597fc7 + words[27]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0xc6e00bf3 + words[28]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0xd5a79147 + words[29]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0x06ca6351 + words[30]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0x14292967 + words[31]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// extend to 40 words
|
||||||
|
for (; i < 40; i++)
|
||||||
|
words[i] = words[i-16] +
|
||||||
|
(rotate(words[i-15], 7) ^ rotate(words[i-15], 18) ^ (words[i-15] >> 3)) +
|
||||||
|
words[i-7] +
|
||||||
|
(rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10));
|
||||||
|
// fifth round
|
||||||
|
x = h + f1(e,f,g) + 0x27b70a85 + words[32]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0x2e1b2138 + words[33]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0x4d2c6dfc + words[34]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0x53380d13 + words[35]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0x650a7354 + words[36]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0x766a0abb + words[37]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0x81c2c92e + words[38]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0x92722c85 + words[39]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// extend to 48 words
|
||||||
|
for (; i < 48; i++)
|
||||||
|
words[i] = words[i-16] +
|
||||||
|
(rotate(words[i-15], 7) ^ rotate(words[i-15], 18) ^ (words[i-15] >> 3)) +
|
||||||
|
words[i-7] +
|
||||||
|
(rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10));
|
||||||
|
// sixth round
|
||||||
|
x = h + f1(e,f,g) + 0xa2bfe8a1 + words[40]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0xa81a664b + words[41]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0xc24b8b70 + words[42]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0xc76c51a3 + words[43]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0xd192e819 + words[44]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0xd6990624 + words[45]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0xf40e3585 + words[46]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0x106aa070 + words[47]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// extend to 56 words
|
||||||
|
for (; i < 56; i++)
|
||||||
|
words[i] = words[i-16] +
|
||||||
|
(rotate(words[i-15], 7) ^ rotate(words[i-15], 18) ^ (words[i-15] >> 3)) +
|
||||||
|
words[i-7] +
|
||||||
|
(rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10));
|
||||||
|
// seventh round
|
||||||
|
x = h + f1(e,f,g) + 0x19a4c116 + words[48]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0x1e376c08 + words[49]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0x2748774c + words[50]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0x34b0bcb5 + words[51]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0x391c0cb3 + words[52]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0x4ed8aa4a + words[53]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0x5b9cca4f + words[54]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0x682e6ff3 + words[55]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// extend to 64 words
|
||||||
|
for (; i < 64; i++)
|
||||||
|
words[i] = words[i-16] +
|
||||||
|
(rotate(words[i-15], 7) ^ rotate(words[i-15], 18) ^ (words[i-15] >> 3)) +
|
||||||
|
words[i-7] +
|
||||||
|
(rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10));
|
||||||
|
// eigth round
|
||||||
|
x = h + f1(e,f,g) + 0x748f82ee + words[56]; y = f2(a,b,c); d += x; h = x + y;
|
||||||
|
x = g + f1(d,e,f) + 0x78a5636f + words[57]; y = f2(h,a,b); c += x; g = x + y;
|
||||||
|
x = f + f1(c,d,e) + 0x84c87814 + words[58]; y = f2(g,h,a); b += x; f = x + y;
|
||||||
|
x = e + f1(b,c,d) + 0x8cc70208 + words[59]; y = f2(f,g,h); a += x; e = x + y;
|
||||||
|
x = d + f1(a,b,c) + 0x90befffa + words[60]; y = f2(e,f,g); h += x; d = x + y;
|
||||||
|
x = c + f1(h,a,b) + 0xa4506ceb + words[61]; y = f2(d,e,f); g += x; c = x + y;
|
||||||
|
x = b + f1(g,h,a) + 0xbef9a3f7 + words[62]; y = f2(c,d,e); f += x; b = x + y;
|
||||||
|
x = a + f1(f,g,h) + 0xc67178f2 + words[63]; y = f2(b,c,d); e += x; a = x + y;
|
||||||
|
// update hash
|
||||||
|
newHash[0] = a + oldHash[0];
|
||||||
|
newHash[1] = b + oldHash[1];
|
||||||
|
newHash[2] = c + oldHash[2];
|
||||||
|
newHash[3] = d + oldHash[3];
|
||||||
|
newHash[4] = e + oldHash[4];
|
||||||
|
newHash[5] = f + oldHash[5];
|
||||||
|
newHash[6] = g + oldHash[6];
|
||||||
|
newHash[7] = h + oldHash[7];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void processInput(
|
||||||
|
uint32_t hash[8],
|
||||||
|
const uint32_t oldHash[8], const uint64_t numBytes,
|
||||||
|
const uint8_t* input, const size_t inputSize)
|
||||||
|
// process final block, less than 64 bytes
|
||||||
|
// newHash and oldHash and be the same
|
||||||
|
{
|
||||||
|
// the input bytes are considered as bits strings, where the first bit is the most significant bit of the byte
|
||||||
|
// - append "1" bit to message
|
||||||
|
// - append "0" bits until message length in bit mod 512 is 448
|
||||||
|
// - append length as 64 bit integer
|
||||||
|
// process initial parts of input
|
||||||
|
std::memmove(hash, oldHash, 32);
|
||||||
|
const int nBlocks = inputSize / 64;
|
||||||
|
for (int i = 0; i < nBlocks; ++i) {
|
||||||
|
processBlock(hash, hash, input + i * 64);
|
||||||
|
}
|
||||||
|
// initialize buffer from input
|
||||||
|
const size_t bufferSize = inputSize - nBlocks * 64;
|
||||||
|
unsigned char buffer[BlockSize];
|
||||||
|
std::memcpy(buffer, input + nBlocks * 64, bufferSize);
|
||||||
|
// number of bits
|
||||||
|
size_t paddedLength = bufferSize * 8;
|
||||||
|
// plus one bit set to 1 (always appended)
|
||||||
|
paddedLength++;
|
||||||
|
// number of bits must be (numBits % 512) = 448
|
||||||
|
size_t lower11Bits = paddedLength & 511;
|
||||||
|
if (lower11Bits <= 448) {
|
||||||
|
paddedLength += 448 - lower11Bits;
|
||||||
|
} else {
|
||||||
|
paddedLength += 512 + 448 - lower11Bits;
|
||||||
|
}
|
||||||
|
// convert from bits to bytes
|
||||||
|
paddedLength /= 8;
|
||||||
|
// only needed if additional data flows over into a second block
|
||||||
|
unsigned char extra[BlockSize];
|
||||||
|
// append a "1" bit, 128 => binary 10000000
|
||||||
|
if (bufferSize < BlockSize) {
|
||||||
|
buffer[bufferSize] = 128;
|
||||||
|
} else {
|
||||||
|
extra[0] = 128;
|
||||||
|
}
|
||||||
|
size_t i;
|
||||||
|
for (i = bufferSize + 1; i < BlockSize; i++) {
|
||||||
|
buffer[i] = 0;
|
||||||
|
}
|
||||||
|
for (; i < paddedLength; i++) {
|
||||||
|
extra[i - BlockSize] = 0;
|
||||||
|
}
|
||||||
|
// add message length in bits as 64 bit number
|
||||||
|
uint64_t msgBits = 8 * (numBytes + inputSize);
|
||||||
|
// find right position
|
||||||
|
unsigned char* addLength;
|
||||||
|
if (paddedLength < BlockSize) {
|
||||||
|
addLength = buffer + paddedLength;
|
||||||
|
} else {
|
||||||
|
addLength = extra + paddedLength - BlockSize;
|
||||||
|
}
|
||||||
|
// must be big endian
|
||||||
|
*addLength++ = (unsigned char)((msgBits >> 56) & 0xFF);
|
||||||
|
*addLength++ = (unsigned char)((msgBits >> 48) & 0xFF);
|
||||||
|
*addLength++ = (unsigned char)((msgBits >> 40) & 0xFF);
|
||||||
|
*addLength++ = (unsigned char)((msgBits >> 32) & 0xFF);
|
||||||
|
*addLength++ = (unsigned char)((msgBits >> 24) & 0xFF);
|
||||||
|
*addLength++ = (unsigned char)((msgBits >> 16) & 0xFF);
|
||||||
|
*addLength++ = (unsigned char)((msgBits >> 8) & 0xFF);
|
||||||
|
*addLength = (unsigned char)( msgBits & 0xFF);
|
||||||
|
// process blocks
|
||||||
|
processBlock(hash, hash, buffer);
|
||||||
|
// flowed over into a second block ?
|
||||||
|
if (paddedLength > BlockSize) {
|
||||||
|
processBlock(hash, hash, extra);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void setInitialHash(uint32_t hash[8])
|
||||||
|
{
|
||||||
|
hash[0] = 0x6a09e667;
|
||||||
|
hash[1] = 0xbb67ae85;
|
||||||
|
hash[2] = 0x3c6ef372;
|
||||||
|
hash[3] = 0xa54ff53a;
|
||||||
|
hash[4] = 0x510e527f;
|
||||||
|
hash[5] = 0x9b05688c;
|
||||||
|
hash[6] = 0x1f83d9ab;
|
||||||
|
hash[7] = 0x5be0cd19;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void computeHash(uint32_t hash[8], const void* data, const size_t size)
|
||||||
|
{
|
||||||
|
uint32_t initHash[8];
|
||||||
|
setInitialHash(initHash);
|
||||||
|
processInput(hash, initHash, 0, (const uint8_t*)data, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void rawHashFromHash(uint8_t rawHash[HashBytes], const uint32_t hash[HashValues])
|
||||||
|
{
|
||||||
|
uint8_t* current = rawHash;
|
||||||
|
for (size_t i = 0; i < HashValues; i++) {
|
||||||
|
*current++ = (hash[i] >> 24) & 0xFF;
|
||||||
|
*current++ = (hash[i] >> 16) & 0xFF;
|
||||||
|
*current++ = (hash[i] >> 8) & 0xFF;
|
||||||
|
*current++ = hash[i] & 0xFF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string showRawHash(const uint8_t rawHash[HashBytes])
|
||||||
|
{
|
||||||
|
std::string result;
|
||||||
|
result.reserve(2 * HashBytes);
|
||||||
|
for (size_t i = 0; i < HashBytes; i++) {
|
||||||
|
static const char dec2hex[16+1] = "0123456789abcdef";
|
||||||
|
result += dec2hex[(rawHash[i] >> 4) & 15];
|
||||||
|
result += dec2hex[ rawHash[i] & 15];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string showHash(const uint32_t hash[8])
|
||||||
|
{
|
||||||
|
unsigned char rawHash[HashBytes];
|
||||||
|
rawHashFromHash(rawHash, hash);
|
||||||
|
return showRawHash(rawHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
}
|
||||||
|
#endif
|
125
lib/lattice/rng/show.h
Normal file
125
lib/lattice/rng/show.h
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
// vim: set ts=2 sw=2 expandtab:
|
||||||
|
|
||||||
|
// Copyright (c) 2014 Luchang Jin
|
||||||
|
// All rights reserved.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef INCLUDE_SHOW_H
|
||||||
|
#define INCLUDE_SHOW_H
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <cstdarg>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
namespace CURRENT_DEFAULT_NAMESPACE_NAME {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline std::string vssprintf(const char* fmt, va_list args)
|
||||||
|
{
|
||||||
|
std::string str;
|
||||||
|
char* cstr;
|
||||||
|
vasprintf(&cstr, fmt, args);
|
||||||
|
str += std::string(cstr);
|
||||||
|
std::free(cstr);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string ssprintf(const char* fmt, ...)
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
return vssprintf(fmt, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show()
|
||||||
|
{
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const int& x)
|
||||||
|
{
|
||||||
|
return ssprintf("%d", x);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const unsigned int& x)
|
||||||
|
{
|
||||||
|
return ssprintf("%u", x);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const long& x)
|
||||||
|
{
|
||||||
|
return ssprintf("%ld", x);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const unsigned long& x)
|
||||||
|
{
|
||||||
|
return ssprintf("%lu", x);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const double& x)
|
||||||
|
{
|
||||||
|
return ssprintf("%24.17E", x);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const bool& x)
|
||||||
|
{
|
||||||
|
return x ? "true" : "false";
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string show(const std::string& x)
|
||||||
|
{
|
||||||
|
std::ostringstream out;
|
||||||
|
out << x;
|
||||||
|
return out.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
std::string shows(const T& x)
|
||||||
|
{
|
||||||
|
std::ostringstream out;
|
||||||
|
out << x;
|
||||||
|
return out.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
T& reads(T& x, const std::string& str)
|
||||||
|
{
|
||||||
|
std::istringstream in(str);
|
||||||
|
in >> x;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void fdisplay(FILE* fp, const std::string& str)
|
||||||
|
{
|
||||||
|
fprintf(fp, "%s", str.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void fdisplayln(FILE* fp, const std::string& str)
|
||||||
|
{
|
||||||
|
fprintf(fp, "%s\n", str.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
115
lib/lattice/rng/sprng-sha256.h
Normal file
115
lib/lattice/rng/sprng-sha256.h
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
// vim: set ts=2 sw=2 expandtab:
|
||||||
|
|
||||||
|
// Copyright (c) 2016 Luchang Jin
|
||||||
|
// All rights reserved.
|
||||||
|
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef INCLUDE_SPRNG_SHA256_H
|
||||||
|
#define INCLUDE_SPRNG_SHA256_H
|
||||||
|
|
||||||
|
#include "rng-state.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstring>
|
||||||
|
#include <ostream>
|
||||||
|
#include <istream>
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
namespace CURRENT_DEFAULT_NAMESPACE_NAME {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct SprngSha256
|
||||||
|
{
|
||||||
|
RngState rs;
|
||||||
|
//
|
||||||
|
using result_type = uint64_t;
|
||||||
|
//
|
||||||
|
static constexpr result_type default_seed = 0;
|
||||||
|
//
|
||||||
|
explicit SprngSha256(result_type val = default_seed)
|
||||||
|
{
|
||||||
|
seed(val);
|
||||||
|
}
|
||||||
|
template<typename Sseq, typename = typename
|
||||||
|
std::enable_if<!std::is_same<Sseq, SprngSha256>::value>
|
||||||
|
::type>
|
||||||
|
explicit SprngSha256(Sseq& q)
|
||||||
|
{
|
||||||
|
seed(q);
|
||||||
|
}
|
||||||
|
//
|
||||||
|
static constexpr result_type min()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//
|
||||||
|
static constexpr result_type max()
|
||||||
|
{
|
||||||
|
return UINT64_MAX;
|
||||||
|
}
|
||||||
|
//
|
||||||
|
void seed(result_type val = default_seed)
|
||||||
|
{
|
||||||
|
reset(rs, (long)val);
|
||||||
|
}
|
||||||
|
template <class Sseq>
|
||||||
|
typename std::enable_if<std::is_class<Sseq>::value>::type
|
||||||
|
seed(Sseq& q)
|
||||||
|
{
|
||||||
|
std::array<uint32_t, 8> seq;
|
||||||
|
q.generate(seq.begin(), seq.end());
|
||||||
|
reset(rs);
|
||||||
|
for (size_t i = 0; i < seq.size(); ++i) {
|
||||||
|
splitRngState(rs, rs, seq[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//
|
||||||
|
result_type operator()()
|
||||||
|
{
|
||||||
|
return randGen(rs);
|
||||||
|
}
|
||||||
|
//
|
||||||
|
void discard(unsigned long long z)
|
||||||
|
{
|
||||||
|
for (unsigned long long i = 0; i < z; ++i) {
|
||||||
|
randGen(rs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream& operator<<(std::ostream& os, const SprngSha256& ss)
|
||||||
|
{
|
||||||
|
os << ss.rs;
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::istream& operator>>(std::istream& is, SprngSha256& ss)
|
||||||
|
{
|
||||||
|
is >> ss.rs;
|
||||||
|
return is;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool operator==(const SprngSha256& ss1, const SprngSha256& ss2)
|
||||||
|
{
|
||||||
|
return ss1.rs == ss2.rs;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CURRENT_DEFAULT_NAMESPACE_NAME
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
@ -194,22 +194,22 @@ class BinaryIO {
|
|||||||
|
|
||||||
std::vector<int> site({x,y,z,t});
|
std::vector<int> site({x,y,z,t});
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if (grid->IsBoss()) {
|
||||||
fin.read((char *)&file_object,sizeof(file_object));
|
fin.read((char *)&file_object, sizeof(file_object));
|
||||||
bytes += sizeof(file_object);
|
bytes += sizeof(file_object);
|
||||||
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
|
||||||
|
|
||||||
munge(file_object,munged,csum);
|
munge(file_object, munged, csum);
|
||||||
}
|
}
|
||||||
// The boss who read the file has their value poked
|
// The boss who read the file has their value poked
|
||||||
pokeSite(munged,Umu,site);
|
pokeSite(munged,Umu,site);
|
||||||
}}}}
|
}}}}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -254,20 +254,20 @@ class BinaryIO {
|
|||||||
|
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
|
|
||||||
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
// NB could gather an xstrip as an optimisation.
|
// NB could gather an xstrip as an optimisation.
|
||||||
fout.write((char *)&file_object,sizeof(file_object));
|
fout.write((char *)&file_object,sizeof(file_object));
|
||||||
bytes+=sizeof(file_object);
|
bytes+=sizeof(file_object);
|
||||||
}
|
}
|
||||||
}}}}
|
}}}}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -305,15 +305,15 @@ class BinaryIO {
|
|||||||
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
if( rank == grid->ThisRank() ){
|
if( rank == grid->ThisRank() ){
|
||||||
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
|
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
|
||||||
parallel.GetState(saved,l_idx);
|
parallel.GetState(saved,l_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Broadcast(rank,(void *)&saved[0],bytes);
|
grid->Broadcast(rank,(void *)&saved[0],bytes);
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
||||||
fout.write((char *)&saved[0],bytes);
|
fout.write((char *)&saved[0],bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -355,14 +355,14 @@ class BinaryIO {
|
|||||||
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
fin.read((char *)&saved[0],bytes);
|
fin.read((char *)&saved[0],bytes);
|
||||||
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Broadcast(0,(void *)&saved[0],bytes);
|
grid->Broadcast(0,(void *)&saved[0],bytes);
|
||||||
|
|
||||||
if( rank == grid->ThisRank() ){
|
if( rank == grid->ThisRank() ){
|
||||||
parallel.SetState(saved,l_idx);
|
parallel.SetState(saved,l_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -415,15 +415,15 @@ class BinaryIO {
|
|||||||
|
|
||||||
if ( d == 0 ) parallel[d] = 0;
|
if ( d == 0 ) parallel[d] = 0;
|
||||||
if (parallel[d]) {
|
if (parallel[d]) {
|
||||||
range[d] = grid->_ldimensions[d];
|
range[d] = grid->_ldimensions[d];
|
||||||
start[d] = grid->_processor_coor[d]*range[d];
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
ioproc[d]= grid->_processor_coor[d];
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
} else {
|
} else {
|
||||||
range[d] = grid->_gdimensions[d];
|
range[d] = grid->_gdimensions[d];
|
||||||
start[d] = 0;
|
start[d] = 0;
|
||||||
ioproc[d]= 0;
|
ioproc[d]= 0;
|
||||||
|
|
||||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
}
|
}
|
||||||
slice_vol = slice_vol * range[d];
|
slice_vol = slice_vol * range[d];
|
||||||
}
|
}
|
||||||
@ -434,9 +434,9 @@ class BinaryIO {
|
|||||||
std::cout<< std::dec ;
|
std::cout<< std::dec ;
|
||||||
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
for(int d=0;d<grid->_ndimension;d++){
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
std::cout<< range[d];
|
std::cout<< range[d];
|
||||||
if( d< grid->_ndimension-1 )
|
if( d< grid->_ndimension-1 )
|
||||||
std::cout<< " x ";
|
std::cout<< " x ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
@ -463,7 +463,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
// need to implement these loops in Nd independent way with a lexico conversion
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
std::vector<int> tsite(nd); // temporary mixed up site
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
std::vector<int> gsite(nd);
|
std::vector<int> gsite(nd);
|
||||||
std::vector<int> lsite(nd);
|
std::vector<int> lsite(nd);
|
||||||
@ -472,8 +472,8 @@ class BinaryIO {
|
|||||||
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
gsite[d] = tsite[d]+start[d]; // global site
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
@ -487,29 +487,29 @@ class BinaryIO {
|
|||||||
// iorank reads from the seek
|
// iorank reads from the seek
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
if (myrank == iorank) {
|
if (myrank == iorank) {
|
||||||
|
|
||||||
fin.seekg(offset+g_idx*sizeof(fileObj));
|
fin.seekg(offset+g_idx*sizeof(fileObj));
|
||||||
fin.read((char *)&fileObj,sizeof(fileObj));
|
fin.read((char *)&fileObj,sizeof(fileObj));
|
||||||
bytes+=sizeof(fileObj);
|
bytes+=sizeof(fileObj);
|
||||||
|
|
||||||
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
munge(fileObj,siteObj,csum);
|
munge(fileObj,siteObj,csum);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Possibly do transport through pt2pt
|
// Possibly do transport through pt2pt
|
||||||
if ( rank != iorank ) {
|
if ( rank != iorank ) {
|
||||||
if ( (myrank == rank) || (myrank==iorank) ) {
|
if ( (myrank == rank) || (myrank==iorank) ) {
|
||||||
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
|
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Poke at destination
|
// Poke at destination
|
||||||
if ( myrank == rank ) {
|
if ( myrank == rank ) {
|
||||||
pokeLocalSite(siteObj,Umu,lsite);
|
pokeLocalSite(siteObj,Umu,lsite);
|
||||||
}
|
}
|
||||||
grid->Barrier(); // necessary?
|
grid->Barrier(); // necessary?
|
||||||
}
|
}
|
||||||
@ -520,7 +520,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -558,15 +558,15 @@ class BinaryIO {
|
|||||||
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
|
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
|
||||||
|
|
||||||
if (parallel[d]) {
|
if (parallel[d]) {
|
||||||
range[d] = grid->_ldimensions[d];
|
range[d] = grid->_ldimensions[d];
|
||||||
start[d] = grid->_processor_coor[d]*range[d];
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
ioproc[d]= grid->_processor_coor[d];
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
} else {
|
} else {
|
||||||
range[d] = grid->_gdimensions[d];
|
range[d] = grid->_gdimensions[d];
|
||||||
start[d] = 0;
|
start[d] = 0;
|
||||||
ioproc[d]= 0;
|
ioproc[d]= 0;
|
||||||
|
|
||||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
slice_vol = slice_vol * range[d];
|
slice_vol = slice_vol * range[d];
|
||||||
@ -577,9 +577,9 @@ class BinaryIO {
|
|||||||
grid->GlobalSum(tmp);
|
grid->GlobalSum(tmp);
|
||||||
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
for(int d=0;d<grid->_ndimension;d++){
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
std::cout<< range[d];
|
std::cout<< range[d];
|
||||||
if( d< grid->_ndimension-1 )
|
if( d< grid->_ndimension-1 )
|
||||||
std::cout<< " x ";
|
std::cout<< " x ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
@ -610,7 +610,7 @@ class BinaryIO {
|
|||||||
// should aggregate a whole chunk and then write.
|
// should aggregate a whole chunk and then write.
|
||||||
// need to implement these loops in Nd independent way with a lexico conversion
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
std::vector<int> tsite(nd); // temporary mixed up site
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
std::vector<int> gsite(nd);
|
std::vector<int> gsite(nd);
|
||||||
std::vector<int> lsite(nd);
|
std::vector<int> lsite(nd);
|
||||||
@ -619,8 +619,8 @@ class BinaryIO {
|
|||||||
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
gsite[d] = tsite[d]+start[d]; // global site
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -640,26 +640,26 @@ class BinaryIO {
|
|||||||
|
|
||||||
// Pair of nodes may need to do pt2pt send
|
// Pair of nodes may need to do pt2pt send
|
||||||
if ( rank != iorank ) { // comms is necessary
|
if ( rank != iorank ) { // comms is necessary
|
||||||
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
|
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
|
||||||
// Send to IOrank
|
// Send to IOrank
|
||||||
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
|
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Barrier(); // necessary?
|
grid->Barrier(); // necessary?
|
||||||
|
|
||||||
if (myrank == iorank) {
|
if (myrank == iorank) {
|
||||||
|
|
||||||
munge(siteObj,fileObj,csum);
|
munge(siteObj,fileObj,csum);
|
||||||
|
|
||||||
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
fout.seekp(offset+g_idx*sizeof(fileObj));
|
fout.seekp(offset+g_idx*sizeof(fileObj));
|
||||||
fout.write((char *)&fileObj,sizeof(fileObj));
|
fout.write((char *)&fileObj,sizeof(fileObj));
|
||||||
bytes+=sizeof(fileObj);
|
bytes+=sizeof(fileObj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -668,7 +668,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Include user configuration file (this can define various configuration macros)
|
// Include user configuration file (this can define various configuration macros)
|
||||||
#include <pugixml/pugiconfig.hpp>
|
#include "pugiconfig.hpp"
|
||||||
|
|
||||||
#ifndef HEADER_PUGIXML_HPP
|
#ifndef HEADER_PUGIXML_HPP
|
||||||
#define HEADER_PUGIXML_HPP
|
#define HEADER_PUGIXML_HPP
|
||||||
|
@ -55,10 +55,19 @@ namespace QCD {
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// QCD iMatrix types
|
// QCD iMatrix types
|
||||||
// Index conventions: Lorentz x Spin x Colour
|
// Index conventions: Lorentz x Spin x Colour
|
||||||
|
// note: static const int or constexpr will work for type deductions
|
||||||
|
// with the intel compiler (up to version 17)
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
static const int ColourIndex = 2;
|
#define ColourIndex 2
|
||||||
static const int SpinIndex = 1;
|
#define SpinIndex 1
|
||||||
static const int LorentzIndex= 0;
|
#define LorentzIndex 0
|
||||||
|
|
||||||
|
|
||||||
|
// Also should make these a named enum type
|
||||||
|
static const int DaggerNo=0;
|
||||||
|
static const int DaggerYes=1;
|
||||||
|
static const int InverseNo=0;
|
||||||
|
static const int InverseYes=1;
|
||||||
|
|
||||||
// Useful traits is this a spin index
|
// Useful traits is this a spin index
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
@ -484,16 +493,27 @@ namespace QCD {
|
|||||||
} //namespace QCD
|
} //namespace QCD
|
||||||
} // Grid
|
} // Grid
|
||||||
|
|
||||||
#include <qcd/utils/SpaceTimeGrid.h>
|
|
||||||
#include <qcd/spin/Dirac.h>
|
#include <Grid/qcd/utils/SpaceTimeGrid.h>
|
||||||
#include <qcd/spin/TwoSpinor.h>
|
#include <Grid/qcd/spin/Dirac.h>
|
||||||
#include <qcd/utils/LinalgUtils.h>
|
#include <Grid/qcd/spin/TwoSpinor.h>
|
||||||
#include <qcd/utils/CovariantCshift.h>
|
#include <Grid/qcd/utils/LinalgUtils.h>
|
||||||
#include <qcd/utils/SUn.h>
|
#include <Grid/qcd/utils/CovariantCshift.h>
|
||||||
#include <qcd/action/Actions.h>
|
|
||||||
#include <qcd/hmc/integrators/Integrator.h>
|
// Include representations
|
||||||
#include <qcd/hmc/integrators/Integrator_algorithm.h>
|
#include <Grid/qcd/utils/SUn.h>
|
||||||
#include <qcd/hmc/HMC.h>
|
#include <Grid/qcd/utils/SUnAdjoint.h>
|
||||||
|
#include <Grid/qcd/utils/SUnTwoIndex.h>
|
||||||
|
#include <Grid/qcd/representations/hmc_types.h>
|
||||||
|
|
||||||
|
#include <Grid/qcd/action/Actions.h>
|
||||||
|
|
||||||
|
#include <Grid/qcd/smearing/Smearing.h>
|
||||||
|
|
||||||
|
#include <Grid/qcd/hmc/integrators/Integrator.h>
|
||||||
|
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
|
||||||
|
#include <Grid/qcd/hmc/HMC.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,86 +1,153 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/ActionBase.h
|
Source file: ./lib/qcd/action/ActionBase.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef QCD_ACTION_BASE
|
#ifndef QCD_ACTION_BASE
|
||||||
#define QCD_ACTION_BASE
|
#define QCD_ACTION_BASE
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD{
|
namespace QCD {
|
||||||
|
|
||||||
template<class GaugeField>
|
|
||||||
class Action {
|
|
||||||
|
|
||||||
|
template <class GaugeField>
|
||||||
|
class Action {
|
||||||
public:
|
public:
|
||||||
|
bool is_smeared = false;
|
||||||
// Boundary conditions? // Heatbath?
|
// Boundary conditions? // Heatbath?
|
||||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions
|
virtual void refresh(const GaugeField& U,
|
||||||
virtual RealD S (const GaugeField &U) = 0; // evaluate the action
|
GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
||||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
|
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
||||||
virtual ~Action() {};
|
virtual void deriv(const GaugeField& U,
|
||||||
|
GaugeField& dSdU) = 0; // evaluate the action derivative
|
||||||
|
virtual ~Action(){};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Indexing of tuple types
|
||||||
|
template <class T, class Tuple>
|
||||||
|
struct Index;
|
||||||
|
|
||||||
|
template <class T, class... Types>
|
||||||
|
struct Index<T, std::tuple<T, Types...>> {
|
||||||
|
static const std::size_t value = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T, class U, class... Types>
|
||||||
|
struct Index<T, std::tuple<U, Types...>> {
|
||||||
|
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
|
|
||||||
/*
|
/*
|
||||||
template<class GaugeField, class FermionField>
|
template <class GaugeField>
|
||||||
class PseudoFermionAction : public Action<GaugeField> {
|
struct ActionLevel {
|
||||||
public:
|
public:
|
||||||
FermionField Phi;
|
typedef Action<GaugeField>*
|
||||||
GridParallelRNG &pRNG;
|
ActPtr; // now force the same colours as the rest of the code
|
||||||
GridBase &Grid;
|
|
||||||
|
|
||||||
PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) {
|
//Add supported representations here
|
||||||
};
|
|
||||||
|
|
||||||
virtual void refresh(const GaugeField &gauge) {
|
|
||||||
gaussian(Phi,pRNG);
|
|
||||||
};
|
|
||||||
|
|
||||||
};
|
unsigned int multiplier;
|
||||||
*/
|
|
||||||
|
|
||||||
template<class GaugeField> struct ActionLevel{
|
|
||||||
public:
|
|
||||||
|
|
||||||
typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
|
|
||||||
|
|
||||||
int multiplier;
|
|
||||||
|
|
||||||
std::vector<ActPtr> actions;
|
std::vector<ActPtr> actions;
|
||||||
|
|
||||||
ActionLevel(int mul = 1) : multiplier(mul) {
|
ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) {
|
||||||
assert (mul > 0);
|
assert(mul >= 1);
|
||||||
};
|
};
|
||||||
|
|
||||||
void push_back(ActPtr ptr){
|
void push_back(ActPtr ptr) { actions.push_back(ptr); }
|
||||||
actions.push_back(ptr);
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <class GaugeField, class Repr = NoHirep >
|
||||||
|
struct ActionLevel {
|
||||||
|
public:
|
||||||
|
unsigned int multiplier;
|
||||||
|
|
||||||
|
// Fundamental repr actions separated because of the smearing
|
||||||
|
typedef Action<GaugeField>* ActPtr;
|
||||||
|
|
||||||
|
// construct a tuple of vectors of the actions for the corresponding higher
|
||||||
|
// representation fields
|
||||||
|
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
|
||||||
|
action_collection actions_hirep;
|
||||||
|
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
|
||||||
|
|
||||||
|
std::vector<ActPtr>& actions;
|
||||||
|
|
||||||
|
// Temporary conversion between ActionLevel and ActionLevelHirep
|
||||||
|
//ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){}
|
||||||
|
|
||||||
|
ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) {
|
||||||
|
// initialize the hirep vectors to zero.
|
||||||
|
//apply(this->resize, actions_hirep, 0); //need a working resize
|
||||||
|
assert(mul >= 1);
|
||||||
|
};
|
||||||
|
|
||||||
|
//void push_back(ActPtr ptr) { actions.push_back(ptr); }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template < class Field >
|
||||||
|
void push_back(Action<Field>* ptr) {
|
||||||
|
// insert only in the correct vector
|
||||||
|
std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template < class ActPtr>
|
||||||
|
static void resize(ActPtr ap, unsigned int n){
|
||||||
|
ap->resize(n);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//template <std::size_t I>
|
||||||
|
//auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U) {return std::get<I>(R).U;}
|
||||||
|
|
||||||
|
// Loop on tuple for a callable function
|
||||||
|
template <std::size_t I = 1, typename Callable, typename ...Args>
|
||||||
|
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(
|
||||||
|
Callable, Repr& R,Args&...) const {}
|
||||||
|
|
||||||
|
template <std::size_t I = 1, typename Callable, typename ...Args>
|
||||||
|
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(
|
||||||
|
Callable fn, Repr& R, Args&... arguments) const {
|
||||||
|
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
|
||||||
|
apply<I + 1>(fn, R, arguments...);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
|
|
||||||
|
|
||||||
|
//template <class GaugeField>
|
||||||
|
//using ActionSet = std::vector<ActionLevel<GaugeField> >;
|
||||||
|
|
||||||
}}
|
template <class GaugeField, class R>
|
||||||
|
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -40,25 +40,25 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Abstract base interface
|
// Abstract base interface
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/ActionBase.h>
|
#include <Grid/qcd/action/ActionBase.h>
|
||||||
#include <qcd/action/ActionParams.h>
|
#include <Grid/qcd/action/ActionParams.h>
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Utility functions
|
// Utility functions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/gauge/GaugeImpl.h>
|
#include <Grid/qcd/action/gauge/GaugeImpl.h>
|
||||||
#include <qcd/utils/WilsonLoops.h>
|
#include <Grid/qcd/utils/WilsonLoops.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
|
#include <Grid/qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
|
||||||
#include <qcd/action/fermion/FermionOperatorImpl.h>
|
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h>
|
||||||
#include <qcd/action/fermion/FermionOperator.h>
|
#include <Grid/qcd/action/fermion/FermionOperator.h>
|
||||||
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Gauge Actions
|
// Gauge Actions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/gauge/WilsonGaugeAction.h>
|
#include <Grid/qcd/action/gauge/WilsonGaugeAction.h>
|
||||||
#include <qcd/action/gauge/PlaqPlusRectangleAction.h>
|
#include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
@ -107,41 +107,64 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
|
|||||||
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
|
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define FermOpTemplateInstantiate(A) \
|
|
||||||
|
#define FermOp4dVecTemplateInstantiate(A) \
|
||||||
template class A<WilsonImplF>; \
|
template class A<WilsonImplF>; \
|
||||||
template class A<WilsonImplD>; \
|
template class A<WilsonImplD>; \
|
||||||
|
template class A<ZWilsonImplF>; \
|
||||||
|
template class A<ZWilsonImplD>; \
|
||||||
template class A<GparityWilsonImplF>; \
|
template class A<GparityWilsonImplF>; \
|
||||||
template class A<GparityWilsonImplD>;
|
template class A<GparityWilsonImplD>;
|
||||||
|
|
||||||
|
#define AdjointFermOpTemplateInstantiate(A) \
|
||||||
|
template class A<WilsonAdjImplF>; \
|
||||||
|
template class A<WilsonAdjImplD>;
|
||||||
|
|
||||||
|
#define TwoIndexFermOpTemplateInstantiate(A) \
|
||||||
|
template class A<WilsonTwoIndexSymmetricImplF>; \
|
||||||
|
template class A<WilsonTwoIndexSymmetricImplD>;
|
||||||
|
|
||||||
|
#define FermOp5dVecTemplateInstantiate(A) \
|
||||||
|
template class A<DomainWallVec5dImplF>; \
|
||||||
|
template class A<DomainWallVec5dImplD>; \
|
||||||
|
template class A<ZDomainWallVec5dImplF>; \
|
||||||
|
template class A<ZDomainWallVec5dImplD>;
|
||||||
|
|
||||||
|
#define FermOpTemplateInstantiate(A) \
|
||||||
|
FermOp4dVecTemplateInstantiate(A) \
|
||||||
|
FermOp5dVecTemplateInstantiate(A)
|
||||||
|
|
||||||
|
|
||||||
#define GparityFermOpTemplateInstantiate(A)
|
#define GparityFermOpTemplateInstantiate(A)
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Fermion operators / actions
|
// Fermion operators / actions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
#include <qcd/action/fermion/WilsonFermion.h> // 4d wilson like
|
#include <Grid/qcd/action/fermion/WilsonFermion.h> // 4d wilson like
|
||||||
#include <qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
|
#include <Grid/qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
|
||||||
#include <qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
||||||
|
|
||||||
//#include <qcd/action/fermion/CloverFermion.h>
|
//#include <Grid/qcd/action/fermion/CloverFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/CayleyFermion5D.h> // Cayley types
|
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
|
||||||
#include <qcd/action/fermion/DomainWallFermion.h>
|
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||||
#include <qcd/action/fermion/DomainWallFermion.h>
|
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||||
#include <qcd/action/fermion/MobiusFermion.h>
|
#include <Grid/qcd/action/fermion/MobiusFermion.h>
|
||||||
#include <qcd/action/fermion/ScaledShamirFermion.h>
|
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
|
||||||
#include <qcd/action/fermion/MobiusZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
|
||||||
#include <qcd/action/fermion/ShamirZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
|
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
|
||||||
|
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
|
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
|
||||||
@ -157,6 +180,14 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
|
|||||||
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||||
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
|
||||||
|
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
|
||||||
|
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonTwoIndexSymmetricImplR> WilsonTwoIndexSymmetricFermionR;
|
||||||
|
typedef WilsonFermion<WilsonTwoIndexSymmetricImplF> WilsonTwoIndexSymmetricFermionF;
|
||||||
|
typedef WilsonFermion<WilsonTwoIndexSymmetricImplD> WilsonTwoIndexSymmetricFermionD;
|
||||||
|
|
||||||
typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR;
|
typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR;
|
||||||
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
|
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
|
||||||
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
|
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
|
||||||
@ -167,6 +198,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
|||||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
||||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
||||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
||||||
|
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
||||||
|
|
||||||
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
||||||
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
||||||
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
|
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
|
||||||
@ -222,21 +258,21 @@ typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
#include <qcd/action/fermion/g5HermitianLinop.h>
|
#include <Grid/qcd/action/fermion/g5HermitianLinop.h>
|
||||||
|
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
// Pseudo fermion combinations for HMC
|
// Pseudo fermion combinations for HMC
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
#include <qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
|
#include <Grid/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
|
||||||
|
|
||||||
#include <qcd/action/pseudofermion/TwoFlavour.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavour.h>
|
||||||
#include <qcd/action/pseudofermion/TwoFlavourRatio.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavourRatio.h>
|
||||||
#include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
|
||||||
#include <qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
|
||||||
|
|
||||||
#include <qcd/action/pseudofermion/OneFlavourRational.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourRational.h>
|
||||||
#include <qcd/action/pseudofermion/OneFlavourRationalRatio.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
|
||||||
#include <qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
|
||||||
#include <qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
0
lib/qcd/action/fermion/.dirstamp
Normal file
0
lib/qcd/action/fermion/.dirstamp
Normal file
@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
@ -45,486 +48,352 @@ namespace QCD {
|
|||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_M5,p),
|
FourDimRedBlackGrid,_M5,p),
|
||||||
mass(_mass)
|
mass(_mass)
|
||||||
{
|
{ }
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
|
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
// Assemble Din
|
int Ls=this->Ls;
|
||||||
int Ls=this->Ls;
|
std::vector<Coeff_t> diag (Ls,1.0);
|
||||||
for(int s=0;s<Ls;s++){
|
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
|
||||||
if ( s==0 ) {
|
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
|
||||||
// Din = bs psi[s] + cs[s] psi[s+1}
|
M5D(psi,chi,chi,lower,diag,upper);
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
}
|
||||||
// Din+= -mass*cs[s] psi[s+1}
|
template<class Impl>
|
||||||
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
|
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
|
||||||
} else if ( s==(Ls-1)) {
|
{
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
|
int Ls=this->Ls;
|
||||||
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
|
std::vector<Coeff_t> diag = bs;
|
||||||
} else {
|
std::vector<Coeff_t> upper= cs;
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
std::vector<Coeff_t> lower= cs;
|
||||||
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
}
|
lower[0] =-mass*lower[0];
|
||||||
}
|
M5D(psi,psi,Din,lower,diag,upper);
|
||||||
|
}
|
||||||
|
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
std::vector<Coeff_t> diag = beo;
|
||||||
|
std::vector<Coeff_t> upper(Ls);
|
||||||
|
std::vector<Coeff_t> lower(Ls);
|
||||||
|
for(int i=0;i<Ls;i++) {
|
||||||
|
upper[i]=-ceo[i];
|
||||||
|
lower[i]=-ceo[i];
|
||||||
}
|
}
|
||||||
template<class Impl>
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
|
lower[0] =-mass*lower[0];
|
||||||
{
|
M5D(psi,psi,chi,lower,diag,upper);
|
||||||
int Ls=this->Ls;
|
}
|
||||||
for(int s=0;s<Ls;s++){
|
template<class Impl>
|
||||||
if ( s==0 ) {
|
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
||||||
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
|
{
|
||||||
axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
|
int Ls=this->Ls;
|
||||||
} else if ( s==(Ls-1)) {
|
std::vector<Coeff_t> diag = bee;
|
||||||
axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
|
std::vector<Coeff_t> upper(Ls);
|
||||||
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
|
std::vector<Coeff_t> lower(Ls);
|
||||||
} else {
|
for(int i=0;i<Ls;i++) {
|
||||||
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
|
upper[i]=-cee[i];
|
||||||
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
|
lower[i]=-cee[i];
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
|
lower[0] =-mass*lower[0];
|
||||||
|
M5D(psi,psi,chi,lower,diag,upper);
|
||||||
|
}
|
||||||
|
|
||||||
// override multiply
|
template<class Impl>
|
||||||
template<class Impl>
|
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
||||||
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
{
|
||||||
{
|
int Ls=this->Ls;
|
||||||
int Ls=this->Ls;
|
std::vector<Coeff_t> diag = bee;
|
||||||
|
std::vector<Coeff_t> upper(Ls);
|
||||||
|
std::vector<Coeff_t> lower(Ls);
|
||||||
|
|
||||||
FermionField Din(psi._grid);
|
for (int s=0;s<Ls;s++){
|
||||||
|
|
||||||
// Assemble Din
|
|
||||||
/*
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
if ( s==0 ) {
|
|
||||||
// Din = bs psi[s] + cs[s] psi[s+1}
|
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
|
||||||
// Din+= -mass*cs[s] psi[s+1}
|
|
||||||
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
|
||||||
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
Meooe5D(psi,Din);
|
|
||||||
|
|
||||||
this->DW(Din,chi,DaggerNo);
|
|
||||||
// ((b D_W + D_w hop terms +1) on s-diag
|
|
||||||
axpby(chi,1.0,1.0,chi,psi);
|
|
||||||
|
|
||||||
// Call Mooee??
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
if ( s==0 ){
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return norm2(chi);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
|
||||||
{
|
|
||||||
// Under adjoint
|
|
||||||
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
|
||||||
//D2- P+ D2+ P-D1-^dag D2+dag
|
|
||||||
|
|
||||||
FermionField Din(psi._grid);
|
|
||||||
// Apply Dw
|
|
||||||
this->DW(psi,Din,DaggerYes);
|
|
||||||
|
|
||||||
MeooeDag5D(Din,chi);
|
|
||||||
|
|
||||||
int Ls=this->Ls;
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
|
|
||||||
// Collect the terms in DW
|
|
||||||
// Chi = bs Din[s] + cs[s] Din[s+1}
|
|
||||||
// Chi+= -mass*cs[s] psi[s+1}
|
|
||||||
/*
|
|
||||||
if ( s==0 ) {
|
|
||||||
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pplus (chi,bs[s],Din,-mass*cs[0],Din,s,0);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// FIXME just call MooeeDag??
|
|
||||||
|
|
||||||
// Collect the terms indept of DW
|
|
||||||
if ( s==0 ){
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// ((b D_W + D_w hop terms +1) on s-diag
|
|
||||||
axpby (chi,1.0,1.0,chi,psi);
|
|
||||||
return norm2(chi);
|
|
||||||
}
|
|
||||||
|
|
||||||
// half checkerboard operations
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
|
||||||
{
|
|
||||||
int Ls=this->Ls;
|
|
||||||
|
|
||||||
FermionField tmp(psi._grid);
|
|
||||||
// Assemble the 5d matrix
|
// Assemble the 5d matrix
|
||||||
Meooe5D(psi,tmp);
|
if ( s==0 ) {
|
||||||
#if 0
|
upper[s] = -cee[s+1] ;
|
||||||
std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
|
lower[s] = mass*cee[Ls-1];
|
||||||
for(int s=0;s<Ls;s++){
|
} else if ( s==(Ls-1)) {
|
||||||
if ( s==0 ) {
|
upper[s] = mass*cee[0];
|
||||||
// tmp = bs psi[s] + cs[s] psi[s+1}
|
lower[s] = -cee[s-1];
|
||||||
// tmp+= -mass*cs[s] psi[s+1}
|
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
|
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Apply 4d dslash
|
|
||||||
if ( psi.checkerboard == Odd ) {
|
|
||||||
this->DhopEO(tmp,chi,DaggerNo);
|
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(tmp,chi,DaggerNo);
|
upper[s]=-cee[s+1];
|
||||||
|
lower[s]=-cee[s-1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
M5Ddag(psi,psi,chi,lower,diag,upper);
|
||||||
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
}
|
||||||
{
|
|
||||||
FermionField tmp(psi._grid);
|
|
||||||
// Apply 4d dslash
|
|
||||||
if ( psi.checkerboard == Odd ) {
|
|
||||||
this->DhopEO(psi,tmp,DaggerYes);
|
|
||||||
} else {
|
|
||||||
this->DhopOE(psi,tmp,DaggerYes);
|
|
||||||
}
|
|
||||||
|
|
||||||
MeooeDag5D(tmp,chi);
|
template<class Impl>
|
||||||
#if 0
|
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
|
||||||
std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
|
{
|
||||||
// Assemble the 5d matrix
|
int Ls=this->Ls;
|
||||||
int Ls=this->Ls;
|
std::vector<Coeff_t> diag(Ls,1.0);
|
||||||
for(int s=0;s<Ls;s++){
|
std::vector<Coeff_t> upper(Ls,-1.0);
|
||||||
if ( s==0 ) {
|
std::vector<Coeff_t> lower(Ls,-1.0);
|
||||||
axpby_ssp_pplus(chi,beo[s],tmp, -ceo[s+1] ,tmp,s,s+1);
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
axpby_ssp_pminus(chi, 1.0,chi,mass*ceo[Ls-1],tmp,s,Ls-1);
|
lower[0] =-mass*lower[0];
|
||||||
} else if ( s==(Ls-1)) {
|
M5Ddag(psi,chi,chi,lower,diag,upper);
|
||||||
axpby_ssp_pplus(chi,beo[s],tmp,mass*ceo[0],tmp,s,0);
|
}
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-ceo[s-1],tmp,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus(chi,beo[s],tmp,-ceo[s+1],tmp,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0 ,chi,-ceo[s-1],tmp,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
std::vector<Coeff_t> diag =bs;
|
||||||
|
std::vector<Coeff_t> upper=cs;
|
||||||
|
std::vector<Coeff_t> lower=cs;
|
||||||
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
|
lower[0] =-mass*lower[0];
|
||||||
|
M5Ddag(psi,psi,Din,lower,diag,upper);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
FermionField Din(psi._grid);
|
||||||
|
|
||||||
|
// Assemble Din
|
||||||
|
Meooe5D(psi,Din);
|
||||||
|
|
||||||
|
this->DW(Din,chi,DaggerNo);
|
||||||
|
// ((b D_W + D_w hop terms +1) on s-diag
|
||||||
|
axpby(chi,1.0,1.0,chi,psi);
|
||||||
|
|
||||||
|
M5D(psi,chi);
|
||||||
|
return(norm2(chi));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
// Under adjoint
|
||||||
|
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
||||||
|
//D2- P+ D2+ P-D1-^dag D2+dag
|
||||||
|
|
||||||
|
FermionField Din(psi._grid);
|
||||||
|
// Apply Dw
|
||||||
|
this->DW(psi,Din,DaggerYes);
|
||||||
|
|
||||||
|
MeooeDag5D(Din,chi);
|
||||||
|
|
||||||
|
M5Ddag(psi,chi);
|
||||||
|
// ((b D_W + D_w hop terms +1) on s-diag
|
||||||
|
axpby (chi,1.0,1.0,chi,psi);
|
||||||
|
return norm2(chi);
|
||||||
|
}
|
||||||
|
|
||||||
|
// half checkerboard operations
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
FermionField tmp(psi._grid);
|
||||||
|
|
||||||
|
Meooe5D(psi,tmp);
|
||||||
|
|
||||||
|
if ( psi.checkerboard == Odd ) {
|
||||||
|
this->DhopEO(tmp,chi,DaggerNo);
|
||||||
|
} else {
|
||||||
|
this->DhopOE(tmp,chi,DaggerNo);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
FermionField tmp(psi._grid);
|
||||||
for (int s=0;s<Ls;s++){
|
// Apply 4d dslash
|
||||||
if ( s==0 ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
|
this->DhopEO(psi,tmp,DaggerYes);
|
||||||
axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1);
|
} else {
|
||||||
} else if ( s==(Ls-1)) {
|
this->DhopOE(psi,tmp,DaggerYes);
|
||||||
axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
MeooeDag5D(tmp,chi);
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
int Ls=this->Ls;
|
FermionField tmp(psi._grid);
|
||||||
FermionField tmp(psi._grid);
|
Meo5D(psi,tmp);
|
||||||
// Assemble the 5d matrix
|
// Apply 4d dslash fragment
|
||||||
for(int s=0;s<Ls;s++){
|
this->DhopDir(tmp,chi,dir,disp);
|
||||||
if ( s==0 ) {
|
}
|
||||||
// tmp = bs psi[s] + cs[s] psi[s+1}
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
// tmp+= -mass*cs[s] psi[s+1}
|
template<class Impl>
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
|
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
|
{
|
||||||
} else if ( s==(Ls-1)) {
|
FermionField Din(V._grid);
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
if ( dag == DaggerNo ) {
|
||||||
} else {
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
|
Meooe5D(V,Din);
|
||||||
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
this->DhopDeriv(mat,U,Din,dag);
|
||||||
}
|
} else {
|
||||||
}
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
// Apply 4d dslash fragment
|
Meooe5D(U,Din);
|
||||||
this->DhopDir(tmp,chi,dir,disp);
|
this->DhopDeriv(mat,Din,V,dag);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
FermionField Din(V._grid);
|
||||||
for (int s=0;s<Ls;s++){
|
|
||||||
// Assemble the 5d matrix
|
if ( dag == DaggerNo ) {
|
||||||
if ( s==0 ) {
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1] ,psi,s,s+1);
|
Meooe5D(V,Din);
|
||||||
axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1);
|
this->DhopDerivOE(mat,U,Din,dag);
|
||||||
} else if ( s==(Ls-1)) {
|
} else {
|
||||||
axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0);
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0 ,chi,-cee[s-1],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
|
||||||
{
|
|
||||||
int Ls=this->Ls;
|
|
||||||
// Apply (L^{\prime})^{-1}
|
|
||||||
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
|
||||||
for (int s=1;s<Ls;s++){
|
|
||||||
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
|
||||||
}
|
|
||||||
// L_m^{-1}
|
|
||||||
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
|
|
||||||
}
|
|
||||||
// U_m^{-1} D^{-1}
|
|
||||||
for (int s=0;s<Ls-1;s++){
|
|
||||||
// Chi[s] + 1/d chi[s]
|
|
||||||
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
|
|
||||||
}
|
|
||||||
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
|
||||||
|
|
||||||
// Apply U^{-1}
|
|
||||||
for (int s=Ls-2;s>=0;s--){
|
|
||||||
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
|
||||||
{
|
|
||||||
int Ls=this->Ls;
|
|
||||||
// Apply (U^{\prime})^{-dagger}
|
|
||||||
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
|
||||||
for (int s=1;s<Ls;s++){
|
|
||||||
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
|
|
||||||
}
|
|
||||||
// U_m^{-\dagger}
|
|
||||||
for (int s=0;s<Ls-1;s++){
|
|
||||||
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
|
|
||||||
}
|
|
||||||
// L_m^{-\dagger} D^{-dagger}
|
|
||||||
for (int s=0;s<Ls-1;s++){
|
|
||||||
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
|
|
||||||
}
|
|
||||||
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
|
||||||
|
|
||||||
// Apply L^{-dagger}
|
|
||||||
for (int s=Ls-2;s>=0;s--){
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// force terms; five routines; default to Dhop on diagonal
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
FermionField Din(V._grid);
|
|
||||||
|
|
||||||
if ( dag == DaggerNo ) {
|
|
||||||
// U d/du [D_w D5] V = U d/du DW D5 V
|
|
||||||
Meooe5D(V,Din);
|
|
||||||
this->DhopDeriv(mat,U,Din,dag);
|
|
||||||
} else {
|
|
||||||
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
|
||||||
Meooe5D(U,Din);
|
|
||||||
this->DhopDeriv(mat,Din,V,dag);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
FermionField Din(V._grid);
|
|
||||||
|
|
||||||
if ( dag == DaggerNo ) {
|
|
||||||
// U d/du [D_w D5] V = U d/du DW D5 V
|
|
||||||
Meooe5D(V,Din);
|
|
||||||
this->DhopDerivOE(mat,U,Din,dag);
|
|
||||||
} else {
|
|
||||||
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
|
||||||
Meooe5D(U,Din);
|
Meooe5D(U,Din);
|
||||||
this->DhopDerivOE(mat,Din,V,dag);
|
this->DhopDerivOE(mat,Din,V,dag);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
{
|
{
|
||||||
FermionField Din(V._grid);
|
FermionField Din(V._grid);
|
||||||
|
|
||||||
if ( dag == DaggerNo ) {
|
|
||||||
// U d/du [D_w D5] V = U d/du DW D5 V
|
|
||||||
Meooe5D(V,Din);
|
|
||||||
this->DhopDerivEO(mat,U,Din,dag);
|
|
||||||
} else {
|
|
||||||
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
|
||||||
Meooe5D(U,Din);
|
|
||||||
this->DhopDerivEO(mat,Din,V,dag);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Tanh
|
if ( dag == DaggerNo ) {
|
||||||
template<class Impl>
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
Meooe5D(V,Din);
|
||||||
{
|
this->DhopDerivEO(mat,U,Din,dag);
|
||||||
SetCoefficientsZolotarev(1.0,zdata,b,c);
|
} else {
|
||||||
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
|
Meooe5D(U,Din);
|
||||||
|
this->DhopDerivEO(mat,Din,V,dag);
|
||||||
}
|
}
|
||||||
//Zolo
|
};
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
// Tanh
|
||||||
{
|
template<class Impl>
|
||||||
int Ls=this->Ls;
|
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
|
{
|
||||||
|
std::vector<Coeff_t> gamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
|
||||||
|
SetCoefficientsInternal(1.0,gamma,b,c);
|
||||||
|
}
|
||||||
|
//Zolo
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
|
{
|
||||||
|
std::vector<Coeff_t> gamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
|
||||||
|
SetCoefficientsInternal(zolo_hi,gamma,b,c);
|
||||||
|
}
|
||||||
|
//Zolo
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// The Cayley coeffs (unprec)
|
// The Cayley coeffs (unprec)
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
omega.resize(Ls);
|
omega.resize(Ls);
|
||||||
bs.resize(Ls);
|
bs.resize(Ls);
|
||||||
cs.resize(Ls);
|
cs.resize(Ls);
|
||||||
as.resize(Ls);
|
as.resize(Ls);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
|
||||||
|
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
||||||
|
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
|
||||||
|
//
|
||||||
|
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
|
||||||
|
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
|
||||||
|
//
|
||||||
|
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
|
||||||
|
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
|
||||||
|
//
|
||||||
|
// So
|
||||||
|
//
|
||||||
|
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
|
||||||
|
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
||||||
|
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
||||||
|
//
|
||||||
|
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
|
||||||
|
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
|
||||||
|
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
||||||
|
//
|
||||||
|
|
||||||
//
|
double bpc = b+c;
|
||||||
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
|
double bmc = b-c;
|
||||||
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
for(int i=0; i < Ls; i++){
|
||||||
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
|
as[i] = 1.0;
|
||||||
//
|
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
|
||||||
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
|
bs[i] = 0.5*(bpc/omega[i] + bmc);
|
||||||
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
|
cs[i] = 0.5*(bpc/omega[i] - bmc);
|
||||||
//
|
}
|
||||||
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
|
|
||||||
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
|
////////////////////////////////////////////////////////
|
||||||
//
|
// Constants for the preconditioned matrix Cayley form
|
||||||
// So
|
////////////////////////////////////////////////////////
|
||||||
//
|
bee.resize(Ls);
|
||||||
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
|
cee.resize(Ls);
|
||||||
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
beo.resize(Ls);
|
||||||
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
ceo.resize(Ls);
|
||||||
//
|
|
||||||
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
|
for(int i=0;i<Ls;i++){
|
||||||
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
|
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
|
||||||
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
|
||||||
//
|
beo[i]=as[i]*bs[i];
|
||||||
|
ceo[i]=-as[i]*cs[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
aee.resize(Ls);
|
||||||
|
aeo.resize(Ls);
|
||||||
|
for(int i=0;i<Ls;i++){
|
||||||
|
aee[i]=cee[i];
|
||||||
|
aeo[i]=ceo[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// LDU decomposition of eeoo
|
||||||
|
//////////////////////////////////////////
|
||||||
|
dee.resize(Ls);
|
||||||
|
lee.resize(Ls);
|
||||||
|
leem.resize(Ls);
|
||||||
|
uee.resize(Ls);
|
||||||
|
ueem.resize(Ls);
|
||||||
|
|
||||||
|
for(int i=0;i<Ls;i++){
|
||||||
|
|
||||||
double bpc = b+c;
|
dee[i] = bee[i];
|
||||||
double bmc = b-c;
|
|
||||||
for(int i=0; i < Ls; i++){
|
|
||||||
as[i] = 1.0;
|
|
||||||
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
|
|
||||||
bs[i] = 0.5*(bpc/omega[i] + bmc);
|
|
||||||
cs[i] = 0.5*(bpc/omega[i] - bmc);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
// Constants for the preconditioned matrix Cayley form
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
bee.resize(Ls);
|
|
||||||
cee.resize(Ls);
|
|
||||||
beo.resize(Ls);
|
|
||||||
ceo.resize(Ls);
|
|
||||||
|
|
||||||
for(int i=0;i<Ls;i++){
|
if ( i < Ls-1 ) {
|
||||||
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
|
|
||||||
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
|
|
||||||
beo[i]=as[i]*bs[i];
|
|
||||||
ceo[i]=-as[i]*cs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
aee.resize(Ls);
|
|
||||||
aeo.resize(Ls);
|
|
||||||
for(int i=0;i<Ls;i++){
|
|
||||||
aee[i]=cee[i];
|
|
||||||
aeo[i]=ceo[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////
|
|
||||||
// LDU decomposition of eeoo
|
|
||||||
//////////////////////////////////////////
|
|
||||||
dee.resize(Ls);
|
|
||||||
lee.resize(Ls);
|
|
||||||
leem.resize(Ls);
|
|
||||||
uee.resize(Ls);
|
|
||||||
ueem.resize(Ls);
|
|
||||||
|
|
||||||
for(int i=0;i<Ls;i++){
|
|
||||||
|
|
||||||
dee[i] = bee[i];
|
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
|
||||||
|
|
||||||
if ( i < Ls-1 ) {
|
leem[i]=mass*cee[Ls-1]/bee[0];
|
||||||
|
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
|
||||||
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
|
|
||||||
|
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
|
||||||
leem[i]=mass*cee[Ls-1]/bee[0];
|
|
||||||
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
|
ueem[i]=mass;
|
||||||
|
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
|
||||||
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
|
ueem[i]*= aee[0]/bee[0];
|
||||||
|
|
||||||
ueem[i]=mass;
|
} else {
|
||||||
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
|
lee[i] =0.0;
|
||||||
ueem[i]*= aee[0]/bee[0];
|
leem[i]=0.0;
|
||||||
|
uee[i] =0.0;
|
||||||
} else {
|
ueem[i]=0.0;
|
||||||
lee[i] =0.0;
|
|
||||||
leem[i]=0.0;
|
|
||||||
uee[i] =0.0;
|
|
||||||
ueem[i]=0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
double delta_d=mass*cee[Ls-1];
|
|
||||||
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
|
||||||
dee[Ls-1] += delta_d;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Coeff_t delta_d=mass*cee[Ls-1];
|
||||||
|
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
||||||
|
dee[Ls-1] += delta_d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
FermOpTemplateInstantiate(CayleyFermion5D);
|
FermOpTemplateInstantiate(CayleyFermion5D);
|
||||||
GparityFermOpTemplateInstantiate(CayleyFermion5D);
|
GparityFermOpTemplateInstantiate(CayleyFermion5D);
|
||||||
|
@ -51,6 +51,29 @@ namespace Grid {
|
|||||||
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||||
|
virtual void Meo5D (const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
virtual void M5D (const FermionField &psi, FermionField &chi);
|
||||||
|
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Instantiate different versions depending on Impl
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
void M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper);
|
||||||
|
|
||||||
|
void M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper);
|
||||||
|
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
|
||||||
|
|
||||||
virtual void Instantiatable(void)=0;
|
virtual void Instantiatable(void)=0;
|
||||||
|
|
||||||
// force terms; five routines; default to Dhop on diagonal
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
@ -68,23 +91,23 @@ namespace Grid {
|
|||||||
RealD mass;
|
RealD mass;
|
||||||
|
|
||||||
// Cayley form Moebius (tanh and zolotarev)
|
// Cayley form Moebius (tanh and zolotarev)
|
||||||
std::vector<RealD> omega;
|
std::vector<Coeff_t> omega;
|
||||||
std::vector<RealD> bs; // S dependent coeffs
|
std::vector<Coeff_t> bs; // S dependent coeffs
|
||||||
std::vector<RealD> cs;
|
std::vector<Coeff_t> cs;
|
||||||
std::vector<RealD> as;
|
std::vector<Coeff_t> as;
|
||||||
// For preconditioning Cayley form
|
// For preconditioning Cayley form
|
||||||
std::vector<RealD> bee;
|
std::vector<Coeff_t> bee;
|
||||||
std::vector<RealD> cee;
|
std::vector<Coeff_t> cee;
|
||||||
std::vector<RealD> aee;
|
std::vector<Coeff_t> aee;
|
||||||
std::vector<RealD> beo;
|
std::vector<Coeff_t> beo;
|
||||||
std::vector<RealD> ceo;
|
std::vector<Coeff_t> ceo;
|
||||||
std::vector<RealD> aeo;
|
std::vector<Coeff_t> aeo;
|
||||||
// LDU factorisation of the eeoo matrix
|
// LDU factorisation of the eeoo matrix
|
||||||
std::vector<RealD> lee;
|
std::vector<Coeff_t> lee;
|
||||||
std::vector<RealD> leem;
|
std::vector<Coeff_t> leem;
|
||||||
std::vector<RealD> uee;
|
std::vector<Coeff_t> uee;
|
||||||
std::vector<RealD> ueem;
|
std::vector<Coeff_t> ueem;
|
||||||
std::vector<RealD> dee;
|
std::vector<Coeff_t> dee;
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
CayleyFermion5D(GaugeField &_Umu,
|
CayleyFermion5D(GaugeField &_Umu,
|
||||||
@ -97,9 +120,20 @@ namespace Grid {
|
|||||||
protected:
|
protected:
|
||||||
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
|
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#define INSTANTIATE_DPERP(A)\
|
||||||
|
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
||||||
|
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
|
||||||
|
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
||||||
|
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
|
||||||
|
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
|
||||||
|
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
#define CAYLEY_DPERP_CACHE
|
||||||
|
#undef CAYLEY_DPERP_LINALG
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
211
lib/qcd/action/fermion/CayleyFermion5Dcache.cc
Normal file
211
lib/qcd/action/fermion/CayleyFermion5Dcache.cc
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||||
|
|
||||||
|
// Pminus fowards
|
||||||
|
// Pplus backwards..
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls =this->Ls;
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
if ( s==0 ) {
|
||||||
|
spProj5m(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5p(tmp,psi._odata[ss+Ls-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
spProj5m(tmp,psi._odata[ss+0]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5p(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else {
|
||||||
|
spProj5m(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5p(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls =this->Ls;
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
spProj5p(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5m(tmp,psi._odata[ss+Ls-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
spProj5p(tmp,psi._odata[ss+0]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5m(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else {
|
||||||
|
spProj5p(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5m(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
|
||||||
|
// Apply (L^{\prime})^{-1}
|
||||||
|
chi[ss]=psi[ss]; // chi[0]=psi[0]
|
||||||
|
for(int s=1;s<Ls;s++){
|
||||||
|
spProj5p(tmp,chi[ss+s-1]);
|
||||||
|
chi[ss+s] = psi[ss+s]-lee[s-1]*tmp;
|
||||||
|
}
|
||||||
|
// L_m^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||||
|
spProj5m(tmp,chi[ss+s]);
|
||||||
|
chi[ss+Ls-1] = chi[ss+Ls-1] - leem[s]*tmp;
|
||||||
|
}
|
||||||
|
// U_m^{-1} D^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
// Chi[s] + 1/d chi[s]
|
||||||
|
spProj5p(tmp,chi[ss+Ls-1]);
|
||||||
|
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(ueem[s]/dee[Ls-1])*tmp;
|
||||||
|
}
|
||||||
|
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
|
||||||
|
|
||||||
|
// Apply U^{-1}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
spProj5m(tmp,chi[ss+s+1]);
|
||||||
|
chi[ss+s] = chi[ss+s] - uee[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
assert(psi.checkerboard == psi.checkerboard);
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
|
||||||
|
// Apply (U^{\prime})^{-dagger}
|
||||||
|
chi[ss]=psi[ss];
|
||||||
|
for (int s=1;s<Ls;s++){
|
||||||
|
spProj5m(tmp,chi[ss+s-1]);
|
||||||
|
chi[ss+s] = psi[ss+s]-uee[s-1]*tmp;
|
||||||
|
}
|
||||||
|
// U_m^{-\dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
spProj5p(tmp,chi[ss+s]);
|
||||||
|
chi[ss+Ls-1] = chi[ss+Ls-1] - ueem[s]*tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// L_m^{-\dagger} D^{-dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
spProj5m(tmp,chi[ss+Ls-1]);
|
||||||
|
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(leem[s]/dee[Ls-1])*tmp;
|
||||||
|
}
|
||||||
|
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
|
||||||
|
|
||||||
|
// Apply L^{-dagger}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
spProj5p(tmp,chi[ss+s+1]);
|
||||||
|
chi[ss+s] = chi[ss+s] - lee[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CAYLEY_DPERP_CACHE
|
||||||
|
INSTANTIATE_DPERP(WilsonImplF);
|
||||||
|
INSTANTIATE_DPERP(WilsonImplD);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplF);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplD);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplF);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}}
|
133
lib/qcd/action/fermion/CayleyFermion5Ddense.cc
Normal file
133
lib/qcd/action/fermion/CayleyFermion5Ddense.cc
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid/Eigen/Dense>
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
/*
|
||||||
|
* Dense matrix versions of routines
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
assert(Ls==LLs);
|
||||||
|
|
||||||
|
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||||
|
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||||
|
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Pplus(s,s) = bee[s];
|
||||||
|
Pminus(s,s)= bee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pminus(s,s+1) = -cee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pplus(s+1,s) = -cee[s+1];
|
||||||
|
}
|
||||||
|
Pplus (0,Ls-1) = mass*cee[0];
|
||||||
|
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
||||||
|
|
||||||
|
Eigen::MatrixXd PplusMat ;
|
||||||
|
Eigen::MatrixXd PminusMat;
|
||||||
|
|
||||||
|
if ( inv ) {
|
||||||
|
PplusMat =Pplus.inverse();
|
||||||
|
PminusMat=Pminus.inverse();
|
||||||
|
} else {
|
||||||
|
PplusMat =Pplus;
|
||||||
|
PminusMat=Pminus;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dag){
|
||||||
|
PplusMat.adjointInPlace();
|
||||||
|
PminusMat.adjointInPlace();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the non-vectorised s-direction this is simple
|
||||||
|
|
||||||
|
for(auto site=0;site<vol;site++){
|
||||||
|
|
||||||
|
SiteSpinor SiteChi;
|
||||||
|
SiteHalfSpinor SitePplus;
|
||||||
|
SiteHalfSpinor SitePminus;
|
||||||
|
|
||||||
|
for(int s1=0;s1<Ls;s1++){
|
||||||
|
SiteChi =zero;
|
||||||
|
for(int s2=0;s2<Ls;s2++){
|
||||||
|
int lex2 = s2+Ls*site;
|
||||||
|
|
||||||
|
if ( PplusMat(s1,s2) != 0.0 ) {
|
||||||
|
spProj5p(SitePplus,psi[lex2]);
|
||||||
|
accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( PminusMat(s1,s2) != 0.0 ) {
|
||||||
|
spProj5m(SitePminus,psi[lex2]);
|
||||||
|
accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chi[s1+Ls*site] = SiteChi*0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template void CayleyFermion5D<GparityWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<GparityWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
}}
|
149
lib/qcd/action/fermion/CayleyFermion5Dssp.cc
Normal file
149
lib/qcd/action/fermion/CayleyFermion5Dssp.cc
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||||
|
|
||||||
|
// Pminus fowards
|
||||||
|
// Pplus backwards
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,Ls-1);
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,0);
|
||||||
|
axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
} else {
|
||||||
|
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pplus(chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,Ls-1);
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,0);
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
} else {
|
||||||
|
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
// Apply (L^{\prime})^{-1}
|
||||||
|
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
||||||
|
for (int s=1;s<Ls;s++){
|
||||||
|
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||||
|
}
|
||||||
|
// L_m^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
|
||||||
|
}
|
||||||
|
// U_m^{-1} D^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
// Chi[s] + 1/d chi[s]
|
||||||
|
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
|
||||||
|
}
|
||||||
|
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
||||||
|
|
||||||
|
// Apply U^{-1}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
// Apply (U^{\prime})^{-dagger}
|
||||||
|
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
||||||
|
for (int s=1;s<Ls;s++){
|
||||||
|
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
|
||||||
|
}
|
||||||
|
// U_m^{-\dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
|
||||||
|
}
|
||||||
|
// L_m^{-\dagger} D^{-dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
|
||||||
|
}
|
||||||
|
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
||||||
|
|
||||||
|
// Apply L^{-dagger}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CAYLEY_DPERP_LINALG
|
||||||
|
INSTANTIATE(WilsonImplF);
|
||||||
|
INSTANTIATE(WilsonImplD);
|
||||||
|
INSTANTIATE(GparityWilsonImplF);
|
||||||
|
INSTANTIATE(GparityWilsonImplD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
309
lib/qcd/action/fermion/CayleyFermion5Dvec.cc
Normal file
309
lib/qcd/action/fermion/CayleyFermion5Dvec.cc
Normal file
@ -0,0 +1,309 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid/Eigen/Dense>
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
/*
|
||||||
|
* Dense matrix versions of routines
|
||||||
|
*/
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
int LLs = grid->_rdimensions[0];
|
||||||
|
int nsimd= Simd::Nsimd();
|
||||||
|
|
||||||
|
Vector<iSinglet<Simd> > u(LLs);
|
||||||
|
Vector<iSinglet<Simd> > l(LLs);
|
||||||
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
|
assert(Ls/LLs==nsimd);
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
// just directly address via type pun
|
||||||
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
|
scalar_type * u_p = (scalar_type *)&u[0];
|
||||||
|
scalar_type * l_p = (scalar_type *)&l[0];
|
||||||
|
scalar_type * d_p = (scalar_type *)&d[0];
|
||||||
|
|
||||||
|
for(int o=0;o<LLs;o++){ // outer
|
||||||
|
for(int i=0;i<nsimd;i++){ //inner
|
||||||
|
int s = o+i*LLs;
|
||||||
|
int ss = o*nsimd+i;
|
||||||
|
u_p[ss] = upper[s];
|
||||||
|
l_p[ss] = lower[s];
|
||||||
|
d_p[ss] = diag[s];
|
||||||
|
}}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
||||||
|
|
||||||
|
alignas(64) SiteHalfSpinor hp;
|
||||||
|
alignas(64) SiteHalfSpinor hm;
|
||||||
|
alignas(64) SiteSpinor fp;
|
||||||
|
alignas(64) SiteSpinor fm;
|
||||||
|
|
||||||
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
|
int vp=(v+1)%LLs;
|
||||||
|
int vm=(v+LLs-1)%LLs;
|
||||||
|
|
||||||
|
spProj5m(hp,psi[ss+vp]);
|
||||||
|
spProj5p(hm,psi[ss+vm]);
|
||||||
|
|
||||||
|
if ( vp<=v ) rotate(hp,hp,1);
|
||||||
|
if ( vm>=v ) rotate(hm,hm,nsimd-1);
|
||||||
|
|
||||||
|
hp=hp*0.5;
|
||||||
|
hm=hm*0.5;
|
||||||
|
spRecon5m(fp,hp);
|
||||||
|
spRecon5p(fm,hm);
|
||||||
|
|
||||||
|
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||||
|
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
int LLs = grid->_rdimensions[0];
|
||||||
|
int nsimd= Simd::Nsimd();
|
||||||
|
|
||||||
|
Vector<iSinglet<Simd> > u(LLs);
|
||||||
|
Vector<iSinglet<Simd> > l(LLs);
|
||||||
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
|
assert(Ls/LLs==nsimd);
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
// just directly address via type pun
|
||||||
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
|
scalar_type * u_p = (scalar_type *)&u[0];
|
||||||
|
scalar_type * l_p = (scalar_type *)&l[0];
|
||||||
|
scalar_type * d_p = (scalar_type *)&d[0];
|
||||||
|
|
||||||
|
for(int o=0;o<LLs;o++){ // outer
|
||||||
|
for(int i=0;i<nsimd;i++){ //inner
|
||||||
|
int s = o+i*LLs;
|
||||||
|
int ss = o*nsimd+i;
|
||||||
|
u_p[ss] = upper[s];
|
||||||
|
l_p[ss] = lower[s];
|
||||||
|
d_p[ss] = diag[s];
|
||||||
|
}}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
||||||
|
|
||||||
|
alignas(64) SiteHalfSpinor hp;
|
||||||
|
alignas(64) SiteHalfSpinor hm;
|
||||||
|
alignas(64) SiteSpinor fp;
|
||||||
|
alignas(64) SiteSpinor fm;
|
||||||
|
|
||||||
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
|
int vp=(v+1)%LLs;
|
||||||
|
int vm=(v+LLs-1)%LLs;
|
||||||
|
|
||||||
|
spProj5p(hp,psi[ss+vp]);
|
||||||
|
spProj5m(hm,psi[ss+vm]);
|
||||||
|
|
||||||
|
if ( vp<=v ) rotate(hp,hp,1);
|
||||||
|
if ( vm>=v ) rotate(hm,hm,nsimd-1);
|
||||||
|
|
||||||
|
hp=hp*0.5;
|
||||||
|
hm=hm*0.5;
|
||||||
|
spRecon5p(fp,hp);
|
||||||
|
spRecon5m(fm,hm);
|
||||||
|
|
||||||
|
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||||
|
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
|
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
|
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Pplus(s,s) = bee[s];
|
||||||
|
Pminus(s,s)= bee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pminus(s,s+1) = -cee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pplus(s+1,s) = -cee[s+1];
|
||||||
|
}
|
||||||
|
Pplus (0,Ls-1) = mass*cee[0];
|
||||||
|
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
||||||
|
|
||||||
|
Eigen::MatrixXcd PplusMat ;
|
||||||
|
Eigen::MatrixXcd PminusMat;
|
||||||
|
|
||||||
|
if ( inv ) {
|
||||||
|
PplusMat =Pplus.inverse();
|
||||||
|
PminusMat=Pminus.inverse();
|
||||||
|
} else {
|
||||||
|
PplusMat =Pplus;
|
||||||
|
PminusMat=Pminus;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dag){
|
||||||
|
PplusMat.adjointInPlace();
|
||||||
|
PminusMat.adjointInPlace();
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
||||||
|
const int Nsimd=Simd::Nsimd();
|
||||||
|
Vector<iSinglet<Simd> > Matp(Ls*LLs);
|
||||||
|
Vector<iSinglet<Simd> > Matm(Ls*LLs);
|
||||||
|
|
||||||
|
for(int s2=0;s2<Ls;s2++){
|
||||||
|
for(int s1=0;s1<LLs;s1++){
|
||||||
|
int istride = LLs;
|
||||||
|
int ostride = 1;
|
||||||
|
Simd Vp;
|
||||||
|
Simd Vm;
|
||||||
|
scalar_type *sp = (scalar_type *)&Vp;
|
||||||
|
scalar_type *sm = (scalar_type *)&Vm;
|
||||||
|
for(int l=0;l<Nsimd;l++){
|
||||||
|
sp[l] = PplusMat (l*istride+s1*ostride ,s2);
|
||||||
|
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
||||||
|
}
|
||||||
|
Matp[LLs*s2+s1] = Vp;
|
||||||
|
Matm[LLs*s2+s1] = Vm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dynamic allocate on stack to get per thread without serialised heap acces
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(auto site=0;site<vol;site++){
|
||||||
|
|
||||||
|
// SiteHalfSpinor *SitePplus =(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
|
||||||
|
// SiteHalfSpinor *SitePminus=(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
|
||||||
|
// SiteSpinor *SiteChi =(SiteSpinor *) alloca(LLs*sizeof(SiteSpinor));
|
||||||
|
|
||||||
|
Vector<SiteHalfSpinor> SitePplus(LLs);
|
||||||
|
Vector<SiteHalfSpinor> SitePminus(LLs);
|
||||||
|
Vector<SiteHalfSpinor> SiteChiP(LLs);
|
||||||
|
Vector<SiteHalfSpinor> SiteChiM(LLs);
|
||||||
|
Vector<SiteSpinor> SiteChi(LLs);
|
||||||
|
|
||||||
|
SiteHalfSpinor BcastP;
|
||||||
|
SiteHalfSpinor BcastM;
|
||||||
|
|
||||||
|
for(int s=0;s<LLs;s++){
|
||||||
|
int lex = s+LLs*site;
|
||||||
|
spProj5p(SitePplus[s] ,psi[lex]);
|
||||||
|
spProj5m(SitePminus[s],psi[lex]);
|
||||||
|
SiteChiP[s]=zero;
|
||||||
|
SiteChiM[s]=zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
int s=0;
|
||||||
|
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
|
||||||
|
for(int s2=0;s2<LLs;s2++){ // Column loop of right hand side
|
||||||
|
vbroadcast(BcastP,SitePplus [s2],l);
|
||||||
|
vbroadcast(BcastM,SitePminus[s2],l);
|
||||||
|
for(int s1=0;s1<LLs;s1++){ // Column loop of reduction variables
|
||||||
|
SiteChiP[s1]=SiteChiP[s1]+Matp[LLs*s+s1]*BcastP;
|
||||||
|
SiteChiM[s1]=SiteChiM[s1]+Matm[LLs*s+s1]*BcastM;
|
||||||
|
}
|
||||||
|
s++;
|
||||||
|
}}
|
||||||
|
|
||||||
|
for(int s=0;s<LLs;s++){
|
||||||
|
int lex = s+LLs*site;
|
||||||
|
spRecon5p(SiteChi[s],SiteChiP[s]);
|
||||||
|
accumRecon5m(SiteChi[s],SiteChiM[s]);
|
||||||
|
chi[lex] = SiteChi[s]*0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_DPERP(DomainWallVec5dImplD);
|
||||||
|
INSTANTIATE_DPERP(DomainWallVec5dImplF);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
|
||||||
|
|
||||||
|
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
}}
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
|
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
|
||||||
#define GRID_QCD_DOMAIN_WALL_FERMION_H
|
#define GRID_QCD_DOMAIN_WALL_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -1,35 +1,36 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
|
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
|
/* END LEGAL */
|
||||||
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
|
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
|
||||||
|
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -75,7 +76,7 @@ namespace Grid {
|
|||||||
//
|
//
|
||||||
//
|
//
|
||||||
// template<class Impl>
|
// template<class Impl>
|
||||||
// class MyOp : pubic<Impl> {
|
// class MyOp : public<Impl> {
|
||||||
// public:
|
// public:
|
||||||
//
|
//
|
||||||
// INHERIT_ALL_IMPL_TYPES(Impl);
|
// INHERIT_ALL_IMPL_TYPES(Impl);
|
||||||
@ -99,247 +100,281 @@ namespace Grid {
|
|||||||
typedef typename Impl::SiteSpinor SiteSpinor; \
|
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||||
typedef typename Impl::Compressor Compressor; \
|
typedef typename Impl::Compressor Compressor; \
|
||||||
typedef typename Impl::StencilImpl StencilImpl; \
|
typedef typename Impl::StencilImpl StencilImpl; \
|
||||||
typedef typename Impl::ImplParams ImplParams;
|
typedef typename Impl::ImplParams ImplParams; \
|
||||||
|
typedef typename Impl::Coeff_t Coeff_t;
|
||||||
|
|
||||||
#define INHERIT_IMPL_TYPES(Base) \
|
#define INHERIT_IMPL_TYPES(Base) \
|
||||||
INHERIT_GIMPL_TYPES(Base)\
|
INHERIT_GIMPL_TYPES(Base) \
|
||||||
INHERIT_FIMPL_TYPES(Base)
|
INHERIT_FIMPL_TYPES(Base)
|
||||||
|
|
||||||
///////
|
///////
|
||||||
// Single flavour four spinors with colour index
|
// Single flavour four spinors with colour index
|
||||||
///////
|
///////
|
||||||
template<class S,int Nrepresentation=Nc>
|
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
|
||||||
class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
class WilsonImpl
|
||||||
|
: public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||||
public:
|
public:
|
||||||
|
static const int Dimension = Representation::Dimension;
|
||||||
|
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||||
|
|
||||||
|
//Necessary?
|
||||||
|
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||||
|
|
||||||
|
const bool LsVectorised=false;
|
||||||
|
typedef _Coeff_t Coeff_t;
|
||||||
|
|
||||||
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
|
||||||
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||||
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
|
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||||
|
|
||||||
typedef iImplSpinor <Simd> SiteSpinor;
|
typedef iImplSpinor<Simd> SiteSpinor;
|
||||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||||
|
|
||||||
typedef Lattice<SiteSpinor> FermionField;
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
|
|
||||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||||
typedef WilsonImplParams ImplParams;
|
typedef WilsonImplParams ImplParams;
|
||||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||||
|
|
||||||
ImplParams Params;
|
ImplParams Params;
|
||||||
|
|
||||||
WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||||
|
|
||||||
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
|
inline void multLink(SiteHalfSpinor &phi,
|
||||||
mult(&phi(),&U(mu),&chi());
|
const SiteDoubledGaugeField &U,
|
||||||
}
|
const SiteHalfSpinor &chi,
|
||||||
|
int mu,
|
||||||
template<class ref>
|
StencilEntry *SE,
|
||||||
inline void loadLinkElement(Simd & reg,ref &memory){
|
StencilImpl &St) {
|
||||||
reg = memory;
|
mult(&phi(), &U(mu), &chi());
|
||||||
}
|
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
|
||||||
{
|
|
||||||
conformable(Uds._grid,GaugeGrid);
|
|
||||||
conformable(Umu._grid,GaugeGrid);
|
|
||||||
GaugeLinkField U(GaugeGrid);
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
PokeIndex<LorentzIndex>(Uds,U,mu);
|
|
||||||
U = adj(Cshift(U,mu,-1));
|
|
||||||
PokeIndex<LorentzIndex>(Uds,U,mu+4);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class ref>
|
||||||
|
inline void loadLinkElement(Simd ®,
|
||||||
|
ref &memory) {
|
||||||
|
reg = memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void DoubleStore(GridBase *GaugeGrid,
|
||||||
|
DoubledGaugeField &Uds,
|
||||||
|
const GaugeField &Umu) {
|
||||||
|
conformable(Uds._grid, GaugeGrid);
|
||||||
|
conformable(Umu._grid, GaugeGrid);
|
||||||
|
GaugeLinkField U(GaugeGrid);
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||||
|
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||||
|
U = adj(Cshift(U, mu, -1));
|
||||||
|
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||||
GaugeLinkField link(mat._grid);
|
GaugeLinkField link(mat._grid);
|
||||||
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
PokeIndex<LorentzIndex>(mat,link,mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||||
|
|
||||||
int Ls=Btilde._grid->_fdimensions[0];
|
int Ls=Btilde._grid->_fdimensions[0];
|
||||||
|
|
||||||
GaugeLinkField tmp(mat._grid);
|
GaugeLinkField tmp(mat._grid);
|
||||||
tmp = zero;
|
tmp = zero;
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
PARALLEL_FOR_LOOP
|
||||||
int sU=sss;
|
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
||||||
for(int s=0;s<Ls;s++){
|
int sU=sss;
|
||||||
int sF = s+Ls*sU;
|
for(int s=0;s<Ls;s++){
|
||||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
int sF = s+Ls*sU;
|
||||||
|
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
///////
|
///////
|
||||||
// Single flavour four spinors with colour index, 5d redblack
|
// Single flavour four spinors with colour index, 5d redblack
|
||||||
///////
|
///////
|
||||||
template<class S,int Nrepresentation=Nc>
|
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
|
||||||
class DomainWallRedBlack5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
static const int Dimension = Nrepresentation;
|
||||||
|
const bool LsVectorised=true;
|
||||||
|
typedef _Coeff_t Coeff_t;
|
||||||
|
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
||||||
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
||||||
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
|
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
|
||||||
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
|
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
|
||||||
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||||
|
|
||||||
typedef iImplSpinor <Simd> SiteSpinor;
|
typedef iImplSpinor<Simd> SiteSpinor;
|
||||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
typedef Lattice<SiteSpinor> FermionField;
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
|
|
||||||
// Make the doubled gauge field a *scalar*
|
// Make the doubled gauge field a *scalar*
|
||||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
typedef iImplDoubledGaugeField<typename Simd::scalar_type>
|
||||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
SiteDoubledGaugeField; // This is a scalar
|
||||||
typedef iImplGaugeLink <typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
typedef iImplGaugeField<typename Simd::scalar_type>
|
||||||
|
SiteScalarGaugeField; // scalar
|
||||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
typedef iImplGaugeLink<typename Simd::scalar_type>
|
||||||
|
SiteScalarGaugeLink; // scalar
|
||||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
|
||||||
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
|
|
||||||
|
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||||
typedef WilsonImplParams ImplParams;
|
typedef WilsonImplParams ImplParams;
|
||||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||||
|
|
||||||
ImplParams Params;
|
ImplParams Params;
|
||||||
|
|
||||||
DomainWallRedBlack5dImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
bool overlapCommsCompute(void) { return false; };
|
bool overlapCommsCompute(void) { return false; };
|
||||||
|
|
||||||
template<class ref>
|
template <class ref>
|
||||||
inline void loadLinkElement(Simd & reg,ref &memory){
|
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||||
vsplat(reg,memory);
|
vsplat(reg, memory);
|
||||||
}
|
}
|
||||||
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||||
{
|
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||||
|
StencilImpl &St) {
|
||||||
SiteGaugeLink UU;
|
SiteGaugeLink UU;
|
||||||
for(int i=0;i<Nrepresentation;i++){
|
for (int i = 0; i < Nrepresentation; i++) {
|
||||||
for(int j=0;j<Nrepresentation;j++){
|
for (int j = 0; j < Nrepresentation; j++) {
|
||||||
vsplat(UU()()(i,j),U(mu)()(i,j));
|
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mult(&phi(),&UU(),&chi());
|
mult(&phi(), &UU(), &chi());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
|
||||||
{
|
const GaugeField &Umu) {
|
||||||
SiteScalarGaugeField ScalarUmu;
|
SiteScalarGaugeField ScalarUmu;
|
||||||
SiteDoubledGaugeField ScalarUds;
|
SiteDoubledGaugeField ScalarUds;
|
||||||
|
|
||||||
GaugeLinkField U (Umu._grid);
|
|
||||||
GaugeField Uadj(Umu._grid);
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
U = adj(Cshift(U,mu,-1));
|
|
||||||
PokeIndex<LorentzIndex>(Uadj,U,mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int lidx=0;lidx<GaugeGrid->lSites();lidx++){
|
|
||||||
std::vector<int> lcoor;
|
|
||||||
GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor);
|
|
||||||
|
|
||||||
peekLocalSite(ScalarUmu,Umu,lcoor);
|
|
||||||
for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu);
|
|
||||||
|
|
||||||
peekLocalSite(ScalarUmu,Uadj,lcoor);
|
|
||||||
for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu);
|
|
||||||
|
|
||||||
pokeLocalSite(ScalarUds,Uds,lcoor);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
GaugeLinkField U(Umu._grid);
|
||||||
assert(0);
|
GaugeField Uadj(Umu._grid);
|
||||||
}
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
U = adj(Cshift(U, mu, -1));
|
||||||
|
PokeIndex<LorentzIndex>(Uadj, U, mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||||
|
std::vector<int> lcoor;
|
||||||
|
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||||
|
|
||||||
|
peekLocalSite(ScalarUmu, Umu, lcoor);
|
||||||
|
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||||
|
|
||||||
|
peekLocalSite(ScalarUmu, Uadj, lcoor);
|
||||||
|
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
||||||
|
|
||||||
|
pokeLocalSite(ScalarUds, Uds, lcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
|
||||||
|
FermionField &A, int mu) {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
|
||||||
|
FermionField Ã, int mu) {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<class S,int Nrepresentation>
|
|
||||||
class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{
|
|
||||||
public:
|
|
||||||
|
|
||||||
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
|
|
||||||
template<typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >;
|
|
||||||
template<typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >;
|
|
||||||
template<typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >;
|
|
||||||
|
|
||||||
typedef iImplSpinor <Simd> SiteSpinor;
|
template <class S, int Nrepresentation,class _Coeff_t = RealD>
|
||||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
class GparityWilsonImpl
|
||||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
: public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
||||||
|
public:
|
||||||
|
static const int Dimension = Nrepresentation;
|
||||||
|
|
||||||
typedef Lattice<SiteSpinor> FermionField;
|
const bool LsVectorised=false;
|
||||||
|
|
||||||
|
typedef _Coeff_t Coeff_t;
|
||||||
|
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
||||||
|
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
|
template <typename vtype>
|
||||||
|
using iImplSpinor =
|
||||||
|
iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
|
||||||
|
template <typename vtype>
|
||||||
|
using iImplHalfSpinor =
|
||||||
|
iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
|
||||||
|
template <typename vtype>
|
||||||
|
using iImplDoubledGaugeField =
|
||||||
|
iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
|
||||||
|
|
||||||
|
typedef iImplSpinor<Simd> SiteSpinor;
|
||||||
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
|
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||||
|
|
||||||
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
|
|
||||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||||
|
|
||||||
typedef GparityWilsonImplParams ImplParams;
|
typedef GparityWilsonImplParams ImplParams;
|
||||||
|
|
||||||
ImplParams Params;
|
ImplParams Params;
|
||||||
|
|
||||||
GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
|
||||||
|
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||||
|
|
||||||
// provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity
|
// provide the multiply by link that is differentiated between Gparity (with
|
||||||
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
|
// flavour index) and non-Gparity
|
||||||
|
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||||
|
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||||
|
StencilImpl &St) {
|
||||||
typedef SiteHalfSpinor vobj;
|
typedef SiteHalfSpinor vobj;
|
||||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||||
|
|
||||||
vobj vtmp;
|
vobj vtmp;
|
||||||
sobj stmp;
|
sobj stmp;
|
||||||
|
|
||||||
GridBase *grid = St._grid;
|
GridBase *grid = St._grid;
|
||||||
|
|
||||||
const int Nsimd = grid->Nsimd();
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
int direction = St._directions[mu];
|
int direction = St._directions[mu];
|
||||||
int distance = St._distances[mu];
|
int distance = St._distances[mu];
|
||||||
int ptype = St._permute_type[mu];
|
int ptype = St._permute_type[mu];
|
||||||
int sl = St._grid->_simd_layout[direction];
|
int sl = St._grid->_simd_layout[direction];
|
||||||
|
|
||||||
// Fixme X.Y.Z.T hardcode in stencil
|
// Fixme X.Y.Z.T hardcode in stencil
|
||||||
int mmu = mu % Nd;
|
int mmu = mu % Nd;
|
||||||
|
|
||||||
// assert our assumptions
|
// assert our assumptions
|
||||||
assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code
|
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
|
||||||
assert((sl==1)||(sl==2));
|
assert((sl == 1) || (sl == 2));
|
||||||
|
|
||||||
std::vector<int> icoor;
|
std::vector<int> icoor;
|
||||||
|
|
||||||
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
||||||
|
|
||||||
if ( sl == 2 ) {
|
if ( sl == 2 ) {
|
||||||
@ -380,7 +415,7 @@ PARALLEL_FOR_LOOP
|
|||||||
mult(&phi(1),&U(1)(mu),&chi(1));
|
mult(&phi(1),&U(1)(mu),&chi(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
{
|
{
|
||||||
@ -393,7 +428,7 @@ PARALLEL_FOR_LOOP
|
|||||||
GaugeLinkField Uconj(GaugeGrid);
|
GaugeLinkField Uconj(GaugeGrid);
|
||||||
|
|
||||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||||
|
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
@ -401,19 +436,19 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
Uconj = conjugate(U);
|
Uconj = conjugate(U);
|
||||||
|
|
||||||
// This phase could come from a simple bc 1,1,-1,1 ..
|
// This phase could come from a simple bc 1,1,-1,1 ..
|
||||||
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
||||||
if ( Params.twists[mu] ) {
|
if ( Params.twists[mu] ) {
|
||||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(auto ss=U.begin();ss<U.end();ss++){
|
PARALLEL_FOR_LOOP
|
||||||
Uds[ss](0)(mu) = U[ss]();
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
Uds[ss](1)(mu) = Uconj[ss]();
|
Uds[ss](0)(mu) = U[ss]();
|
||||||
}
|
Uds[ss](1)(mu) = Uconj[ss]();
|
||||||
|
}
|
||||||
|
|
||||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||||
Uconj = adj(Cshift(Uconj,mu,-1));
|
Uconj = adj(Cshift(Uconj,mu,-1));
|
||||||
@ -423,68 +458,86 @@ PARALLEL_FOR_LOOP
|
|||||||
Utmp = where(coor==0,Uconj,Utmp);
|
Utmp = where(coor==0,Uconj,Utmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(auto ss=U.begin();ss<U.end();ss++){
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
Uds[ss](0)(mu+4) = Utmp[ss]();
|
Uds[ss](0)(mu+4) = Utmp[ss]();
|
||||||
}
|
}
|
||||||
|
|
||||||
Utmp = Uconj;
|
Utmp = Uconj;
|
||||||
if ( Params.twists[mu] ) {
|
if ( Params.twists[mu] ) {
|
||||||
Utmp = where(coor==0,U,Utmp);
|
Utmp = where(coor==0,U,Utmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(auto ss=U.begin();ss<U.end();ss++){
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
Uds[ss](1)(mu+4) = Utmp[ss]();
|
Uds[ss](1)(mu+4) = Utmp[ss]();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
|
||||||
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
|
||||||
|
FermionField &A, int mu) {
|
||||||
// DhopDir provides U or Uconj depending on coor/flavour.
|
// DhopDir provides U or Uconj depending on coor/flavour.
|
||||||
GaugeLinkField link(mat._grid);
|
GaugeLinkField link(mat._grid);
|
||||||
// use lorentz for flavour as hack.
|
// use lorentz for flavour as hack.
|
||||||
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(auto ss=tmp.begin();ss<tmp.end();ss++){
|
for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
|
||||||
link[ss]() = tmp[ss](0,0) - conjugate(tmp[ss](1,1)) ;
|
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
|
||||||
}
|
}
|
||||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
PokeIndex<LorentzIndex>(mat, link, mu);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
|
||||||
int Ls=Btilde._grid->_fdimensions[0];
|
FermionField Ã, int mu) {
|
||||||
|
int Ls = Btilde._grid->_fdimensions[0];
|
||||||
|
|
||||||
GaugeLinkField tmp(mat._grid);
|
GaugeLinkField tmp(mat._grid);
|
||||||
tmp = zero;
|
tmp = zero;
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<tmp._grid->oSites();ss++){
|
for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
|
||||||
for(int s=0;s<Ls;s++){
|
for (int s = 0; s < Ls; s++) {
|
||||||
int sF = s+Ls*ss;
|
int sF = s + Ls * ss;
|
||||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF]));
|
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
|
||||||
tmp[ss]() = tmp[ss]()+ ttmp(0,0) + conjugate(ttmp(1,1));
|
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex ,Nc> WilsonImplR; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
|
||||||
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
|
||||||
|
|
||||||
typedef DomainWallRedBlack5dImpl<vComplex ,Nc> DomainWallRedBlack5dImplR; // Real.. whichever prec
|
|
||||||
typedef DomainWallRedBlack5dImpl<vComplexF,Nc> DomainWallRedBlack5dImplF; // Float
|
|
||||||
typedef DomainWallRedBlack5dImpl<vComplexD,Nc> DomainWallRedBlack5dImplD; // Double
|
|
||||||
|
|
||||||
typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
|
||||||
typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
|
||||||
typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
|
||||||
|
|
||||||
}
|
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
|
||||||
|
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
|
||||||
|
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
|
||||||
|
|
||||||
|
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
|
||||||
|
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
|
||||||
|
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
|
||||||
|
|
||||||
|
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
||||||
|
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
||||||
|
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
|
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||||
|
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
|
||||||
|
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
|
typedef GparityWilsonImpl<vComplex, Nc> GparityWilsonImplR; // Real.. whichever prec
|
||||||
|
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
|
||||||
|
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_MOBIUS_FERMION_H
|
#ifndef GRID_QCD_MOBIUS_FERMION_H
|
||||||
#define GRID_QCD_MOBIUS_FERMION_H
|
#define GRID_QCD_MOBIUS_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
||||||
#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
||||||
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
||||||
#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
||||||
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
||||||
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
||||||
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
||||||
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H
|
#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H
|
||||||
#define GRID_QCD_SCALED_SHAMIR_FERMION_H
|
#define GRID_QCD_SCALED_SHAMIR_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
||||||
#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -1,319 +1,315 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
|
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
const std::vector<int> WilsonFermionStatic::directions ({0,1,2,3, 0, 1, 2, 3});
|
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2,
|
||||||
const std::vector<int> WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1});
|
3});
|
||||||
int WilsonFermionStatic::HandOptDslash;
|
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1,
|
||||||
|
-1, -1});
|
||||||
|
int WilsonFermionStatic::HandOptDslash;
|
||||||
|
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
// Constructor and gauge import
|
// Constructor and gauge import
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu,
|
WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||||
GridCartesian &Fgrid,
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
GridRedBlackCartesian &Hgrid,
|
const ImplParams &p)
|
||||||
RealD _mass,const ImplParams &p) :
|
: Kernels(p),
|
||||||
Kernels(p),
|
_grid(&Fgrid),
|
||||||
_grid(&Fgrid),
|
_cbgrid(&Hgrid),
|
||||||
_cbgrid(&Hgrid),
|
Stencil(&Fgrid, npoint, Even, directions, displacements),
|
||||||
Stencil (&Fgrid,npoint,Even,directions,displacements),
|
StencilEven(&Hgrid, npoint, Even, directions,
|
||||||
StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even
|
displacements), // source is Even
|
||||||
StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd
|
StencilOdd(&Hgrid, npoint, Odd, directions,
|
||||||
mass(_mass),
|
displacements), // source is Odd
|
||||||
Lebesgue(_grid),
|
mass(_mass),
|
||||||
LebesgueEvenOdd(_cbgrid),
|
Lebesgue(_grid),
|
||||||
Umu(&Fgrid),
|
LebesgueEvenOdd(_cbgrid),
|
||||||
UmuEven(&Hgrid),
|
Umu(&Fgrid),
|
||||||
UmuOdd (&Hgrid)
|
UmuEven(&Hgrid),
|
||||||
{
|
UmuOdd(&Hgrid) {
|
||||||
// Allocate the required comms buffer
|
// Allocate the required comms buffer
|
||||||
ImportGauge(_Umu);
|
ImportGauge(_Umu);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
|
||||||
|
GaugeField HUmu(_Umu._grid);
|
||||||
|
HUmu = _Umu * (-0.5);
|
||||||
|
Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
|
||||||
|
pickCheckerboard(Even, UmuEven, Umu);
|
||||||
|
pickCheckerboard(Odd, UmuOdd, Umu);
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// Implement the interface
|
||||||
|
/////////////////////////////
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
Dhop(in, out, DaggerNo);
|
||||||
|
return axpy_norm(out, 4 + mass, in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
Dhop(in, out, DaggerYes);
|
||||||
|
return axpy_norm(out, 4 + mass, in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||||
|
if (in.checkerboard == Odd) {
|
||||||
|
DhopEO(in, out, DaggerNo);
|
||||||
|
} else {
|
||||||
|
DhopOE(in, out, DaggerNo);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||||
{
|
if (in.checkerboard == Odd) {
|
||||||
GaugeField HUmu(_Umu._grid);
|
DhopEO(in, out, DaggerYes);
|
||||||
HUmu = _Umu*(-0.5);
|
} else {
|
||||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
DhopOE(in, out, DaggerYes);
|
||||||
pickCheckerboard(Even,UmuEven,Umu);
|
|
||||||
pickCheckerboard(Odd ,UmuOdd,Umu);
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////
|
|
||||||
// Implement the interface
|
|
||||||
/////////////////////////////
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
out.checkerboard=in.checkerboard;
|
|
||||||
Dhop(in,out,DaggerNo);
|
|
||||||
return axpy_norm(out,4+mass,in,out);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
{
|
out.checkerboard = in.checkerboard;
|
||||||
out.checkerboard=in.checkerboard;
|
typename FermionField::scalar_type scal(4.0 + mass);
|
||||||
Dhop(in,out,DaggerYes);
|
out = scal * in;
|
||||||
return axpy_norm(out,4+mass,in,out);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
|
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||||
{
|
out.checkerboard = in.checkerboard;
|
||||||
if ( in.checkerboard == Odd ) {
|
Mooee(in, out);
|
||||||
DhopEO(in,out,DaggerNo);
|
}
|
||||||
} else {
|
|
||||||
DhopOE(in,out,DaggerNo);
|
template <class Impl>
|
||||||
}
|
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||||
}
|
out.checkerboard = in.checkerboard;
|
||||||
template<class Impl>
|
out = (1.0 / (4.0 + mass)) * in;
|
||||||
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
}
|
||||||
{
|
|
||||||
if ( in.checkerboard == Odd ) {
|
template <class Impl>
|
||||||
DhopEO(in,out,DaggerYes);
|
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in,
|
||||||
} else {
|
FermionField &out) {
|
||||||
DhopOE(in,out,DaggerYes);
|
out.checkerboard = in.checkerboard;
|
||||||
|
MooeeInv(in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////
|
||||||
|
// Internal
|
||||||
|
///////////////////////////////////
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||||
|
GaugeField &mat, const FermionField &A,
|
||||||
|
const FermionField &B, int dag) {
|
||||||
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
|
Compressor compressor(dag);
|
||||||
|
|
||||||
|
FermionField Btilde(B._grid);
|
||||||
|
FermionField Atilde(B._grid);
|
||||||
|
Atilde = A;
|
||||||
|
|
||||||
|
st.HaloExchange(B, compressor);
|
||||||
|
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Flip gamma (1+g)<->(1-g) if dag
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
int gamma = mu;
|
||||||
|
if (!dag) gamma += Nd;
|
||||||
|
|
||||||
|
////////////////////////
|
||||||
|
// Call the single hop
|
||||||
|
////////////////////////
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < B._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu,
|
||||||
|
gamma);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// spin trace outer product
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag) {
|
||||||
|
conformable(U._grid, _grid);
|
||||||
|
conformable(U._grid, V._grid);
|
||||||
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
|
mat.checkerboard = U.checkerboard;
|
||||||
|
|
||||||
|
DerivInternal(Stencil, Umu, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag) {
|
||||||
|
conformable(U._grid, _cbgrid);
|
||||||
|
conformable(U._grid, V._grid);
|
||||||
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
|
assert(V.checkerboard == Even);
|
||||||
|
assert(U.checkerboard == Odd);
|
||||||
|
mat.checkerboard = Odd;
|
||||||
|
|
||||||
|
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag) {
|
||||||
|
conformable(U._grid, _cbgrid);
|
||||||
|
conformable(U._grid, V._grid);
|
||||||
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
|
assert(V.checkerboard == Odd);
|
||||||
|
assert(U.checkerboard == Even);
|
||||||
|
mat.checkerboard = Even;
|
||||||
|
|
||||||
|
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,
|
||||||
|
int dag) {
|
||||||
|
conformable(in._grid, _grid); // verifies full grid
|
||||||
|
conformable(in._grid, out._grid);
|
||||||
|
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
|
||||||
|
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,
|
||||||
|
int dag) {
|
||||||
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
|
assert(in.checkerboard == Even);
|
||||||
|
out.checkerboard = Odd;
|
||||||
|
|
||||||
|
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,
|
||||||
|
int dag) {
|
||||||
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
|
assert(in.checkerboard == Odd);
|
||||||
|
out.checkerboard = Even;
|
||||||
|
|
||||||
|
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out,
|
||||||
|
int dir, int disp) {
|
||||||
|
DhopDir(in, out, dir, disp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,
|
||||||
|
int dir, int disp) {
|
||||||
|
int skip = (disp == 1) ? 0 : 1;
|
||||||
|
int dirdisp = dir + skip * 4;
|
||||||
|
int gamma = dir + (1 - skip) * 4;
|
||||||
|
|
||||||
|
DhopDirDisp(in, out, dirdisp, gamma, DaggerNo);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
||||||
|
int dirdisp, int gamma, int dag) {
|
||||||
|
Compressor compressor(dag);
|
||||||
|
|
||||||
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out,
|
||||||
|
dirdisp, gamma);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
const FermionField &in,
|
||||||
|
FermionField &out, int dag) {
|
||||||
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
|
Compressor compressor(dag);
|
||||||
|
st.HaloExchange(in, compressor);
|
||||||
|
|
||||||
|
if (dag == DaggerYes) {
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
|
||||||
|
out);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
|
||||||
|
out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<class Impl>
|
FermOpTemplateInstantiate(WilsonFermion);
|
||||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
AdjointFermOpTemplateInstantiate(WilsonFermion);
|
||||||
out.checkerboard = in.checkerboard;
|
TwoIndexFermOpTemplateInstantiate(WilsonFermion);
|
||||||
typename FermionField::scalar_type scal(4.0+mass);
|
GparityFermOpTemplateInstantiate(WilsonFermion);
|
||||||
out = scal*in;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
|
||||||
out.checkerboard = in.checkerboard;
|
|
||||||
Mooee(in,out);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
|
||||||
out.checkerboard = in.checkerboard;
|
|
||||||
out = (1.0/(4.0+mass))*in;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
|
|
||||||
out.checkerboard = in.checkerboard;
|
|
||||||
MooeeInv(in,out);
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////
|
|
||||||
// Internal
|
|
||||||
///////////////////////////////////
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DerivInternal(StencilImpl & st,
|
|
||||||
DoubledGaugeField & U,
|
|
||||||
GaugeField &mat,
|
|
||||||
const FermionField &A,
|
|
||||||
const FermionField &B,int dag) {
|
|
||||||
|
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
||||||
|
|
||||||
Compressor compressor(dag);
|
|
||||||
|
|
||||||
FermionField Btilde(B._grid);
|
|
||||||
FermionField Atilde(B._grid);
|
|
||||||
Atilde = A;
|
|
||||||
|
|
||||||
st.HaloExchange(B,compressor);
|
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Flip gamma (1+g)<->(1-g) if dag
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
int gamma = mu;
|
|
||||||
if ( !dag ) gamma+= Nd;
|
|
||||||
|
|
||||||
////////////////////////
|
|
||||||
// Call the single hop
|
|
||||||
////////////////////////
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<B._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sss,sss,B,Btilde,mu,gamma);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// spin trace outer product
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
Impl::InsertForce4D(mat,Btilde,Atilde,mu);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
conformable(U._grid,_grid);
|
|
||||||
conformable(U._grid,V._grid);
|
|
||||||
conformable(U._grid,mat._grid);
|
|
||||||
|
|
||||||
mat.checkerboard = U.checkerboard;
|
|
||||||
|
|
||||||
DerivInternal(Stencil,Umu,mat,U,V,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
conformable(U._grid,_cbgrid);
|
|
||||||
conformable(U._grid,V._grid);
|
|
||||||
conformable(U._grid,mat._grid);
|
|
||||||
|
|
||||||
assert(V.checkerboard==Even);
|
|
||||||
assert(U.checkerboard==Odd);
|
|
||||||
mat.checkerboard = Odd;
|
|
||||||
|
|
||||||
DerivInternal(StencilEven,UmuOdd,mat,U,V,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
conformable(U._grid,_cbgrid);
|
|
||||||
conformable(U._grid,V._grid);
|
|
||||||
conformable(U._grid,mat._grid);
|
|
||||||
|
|
||||||
assert(V.checkerboard==Odd);
|
|
||||||
assert(U.checkerboard==Even);
|
|
||||||
mat.checkerboard = Even;
|
|
||||||
|
|
||||||
DerivInternal(StencilOdd,UmuEven,mat,U,V,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,int dag) {
|
|
||||||
conformable(in._grid,_grid); // verifies full grid
|
|
||||||
conformable(in._grid,out._grid);
|
|
||||||
|
|
||||||
out.checkerboard = in.checkerboard;
|
|
||||||
|
|
||||||
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) {
|
|
||||||
conformable(in._grid,_cbgrid); // verifies half grid
|
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
|
||||||
|
|
||||||
assert(in.checkerboard==Even);
|
|
||||||
out.checkerboard = Odd;
|
|
||||||
|
|
||||||
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
|
|
||||||
conformable(in._grid,_cbgrid); // verifies half grid
|
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
|
||||||
|
|
||||||
assert(in.checkerboard==Odd);
|
|
||||||
out.checkerboard = Even;
|
|
||||||
|
|
||||||
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::Mdir (const FermionField &in, FermionField &out,int dir,int disp) {
|
|
||||||
DhopDir(in,out,dir,disp);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){
|
|
||||||
|
|
||||||
int skip = (disp==1) ? 0 : 1;
|
|
||||||
int dirdisp = dir+skip*4;
|
|
||||||
int gamma = dir+(1-skip)*4;
|
|
||||||
|
|
||||||
DhopDirDisp(in,out,dirdisp,gamma,DaggerNo);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) {
|
|
||||||
|
|
||||||
Compressor compressor(dag);
|
|
||||||
|
|
||||||
Stencil.HaloExchange(in,compressor);
|
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sss,sss,in,out,dirdisp,gamma);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,LebesgueOrder& lo,DoubledGaugeField & U,
|
|
||||||
const FermionField &in, FermionField &out,int dag)
|
|
||||||
{
|
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
||||||
|
|
||||||
Compressor compressor(dag);
|
|
||||||
st.HaloExchange(in,compressor);
|
|
||||||
|
|
||||||
if ( dag == DaggerYes ) {
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonFermion);
|
|
||||||
GparityFermOpTemplateInstantiate(WilsonFermion);
|
|
||||||
|
|
||||||
|
|
||||||
}}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,161 +1,155 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.h
|
Source file: ./lib/qcd/action/fermion/WilsonFermion.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
#ifndef GRID_QCD_WILSON_FERMION_H
|
/* END LEGAL */
|
||||||
#define GRID_QCD_WILSON_FERMION_H
|
#ifndef GRID_QCD_WILSON_FERMION_H
|
||||||
|
#define GRID_QCD_WILSON_FERMION_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class WilsonFermionStatic {
|
class WilsonFermionStatic {
|
||||||
public:
|
public:
|
||||||
static int HandOptDslash; // these are a temporary hack
|
static int HandOptDslash; // these are a temporary hack
|
||||||
static int MortonOrder;
|
static int MortonOrder;
|
||||||
static const std::vector<int> directions ;
|
static const std::vector<int> directions;
|
||||||
static const std::vector<int> displacements;
|
static const std::vector<int> displacements;
|
||||||
static const int npoint=8;
|
static const int npoint = 8;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic
|
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
||||||
{
|
public:
|
||||||
public:
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
typedef WilsonKernels<Impl> Kernels;
|
||||||
typedef WilsonKernels<Impl> Kernels;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
GridBase *GaugeGrid(void) { return _grid ;}
|
GridBase *GaugeGrid(void) { return _grid; }
|
||||||
GridBase *GaugeRedBlackGrid(void) { return _cbgrid ;}
|
GridBase *GaugeRedBlackGrid(void) { return _cbgrid; }
|
||||||
GridBase *FermionGrid(void) { return _grid;}
|
GridBase *FermionGrid(void) { return _grid; }
|
||||||
GridBase *FermionRedBlackGrid(void) { return _cbgrid;}
|
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// override multiply; cut number routines if pass dagger argument
|
// override multiply; cut number routines if pass dagger argument
|
||||||
// and also make interface more uniformly consistent
|
// and also make interface more uniformly consistent
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
RealD M(const FermionField &in, FermionField &out);
|
RealD M(const FermionField &in, FermionField &out);
|
||||||
RealD Mdag(const FermionField &in, FermionField &out);
|
RealD Mdag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
// could remain virtual so we can derive Clover from Wilson base
|
// could remain virtual so we can derive Clover from Wilson base
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
void Meooe(const FermionField &in, FermionField &out) ;
|
void Meooe(const FermionField &in, FermionField &out);
|
||||||
void MeooeDag(const FermionField &in, FermionField &out) ;
|
void MeooeDag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// allow override for twisted mass and clover
|
// allow override for twisted mass and clover
|
||||||
virtual void Mooee(const FermionField &in, FermionField &out) ;
|
virtual void Mooee(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
|
virtual void MooeeDag(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
|
virtual void MooeeInv(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
|
virtual void MooeeInvDag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
////////////////////////
|
////////////////////////
|
||||||
// Derivative interface
|
// Derivative interface
|
||||||
////////////////////////
|
////////////////////////
|
||||||
// Interface calls an internal routine
|
// Interface calls an internal routine
|
||||||
void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
void DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V,
|
||||||
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
int dag);
|
||||||
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
void DhopDerivOE(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag);
|
||||||
|
void DhopDerivEO(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// non-hermitian hopping term; half cb or both
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void Dhop(const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopOE(const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopEO(const FermionField &in, FermionField &out, int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Multigrid assistance; force term uses too
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp,
|
||||||
|
int gamma, int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Extra methods added by derived
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat,
|
||||||
|
const FermionField &A, const FermionField &B, int dag);
|
||||||
|
|
||||||
|
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
const FermionField &in, FermionField &out, int dag);
|
||||||
|
|
||||||
|
// Constructor
|
||||||
|
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||||
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
|
const ImplParams &p = ImplParams());
|
||||||
|
|
||||||
|
// DoubleStore impl dependent
|
||||||
|
void ImportGauge(const GaugeField &_Umu);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Data members require to support the functionality
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// protected:
|
||||||
|
public:
|
||||||
|
RealD mass;
|
||||||
|
|
||||||
|
GridBase *_grid;
|
||||||
|
GridBase *_cbgrid;
|
||||||
|
|
||||||
|
// Defines the stencils for even and odd
|
||||||
|
StencilImpl Stencil;
|
||||||
|
StencilImpl StencilEven;
|
||||||
|
StencilImpl StencilOdd;
|
||||||
|
|
||||||
|
// Copy of the gauge field , with even and odd subsets
|
||||||
|
DoubledGaugeField Umu;
|
||||||
|
DoubledGaugeField UmuEven;
|
||||||
|
DoubledGaugeField UmuOdd;
|
||||||
|
|
||||||
|
LebesgueOrder Lebesgue;
|
||||||
|
LebesgueOrder LebesgueEvenOdd;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||||
|
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
}
|
||||||
// non-hermitian hopping term; half cb or both
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
void Dhop(const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
void DhopOE(const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
void DhopEO(const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
// Multigrid assistance; force term uses too
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ;
|
|
||||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
|
||||||
void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
// Extra methods added by derived
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
void DerivInternal(StencilImpl & st,
|
|
||||||
DoubledGaugeField & U,
|
|
||||||
GaugeField &mat,
|
|
||||||
const FermionField &A,
|
|
||||||
const FermionField &B,
|
|
||||||
int dag);
|
|
||||||
|
|
||||||
void DhopInternal(StencilImpl & st,LebesgueOrder & lo,DoubledGaugeField & U,
|
|
||||||
const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
|
|
||||||
// Constructor
|
|
||||||
WilsonFermion(GaugeField &_Umu,
|
|
||||||
GridCartesian &Fgrid,
|
|
||||||
GridRedBlackCartesian &Hgrid,
|
|
||||||
RealD _mass,
|
|
||||||
const ImplParams &p= ImplParams()
|
|
||||||
) ;
|
|
||||||
|
|
||||||
// DoubleStore impl dependent
|
|
||||||
void ImportGauge(const GaugeField &_Umu);
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
// Data members require to support the functionality
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// protected:
|
|
||||||
public:
|
|
||||||
|
|
||||||
RealD mass;
|
|
||||||
|
|
||||||
GridBase * _grid;
|
|
||||||
GridBase * _cbgrid;
|
|
||||||
|
|
||||||
//Defines the stencils for even and odd
|
|
||||||
StencilImpl Stencil;
|
|
||||||
StencilImpl StencilEven;
|
|
||||||
StencilImpl StencilOdd;
|
|
||||||
|
|
||||||
// Copy of the gauge field , with even and odd subsets
|
|
||||||
DoubledGaugeField Umu;
|
|
||||||
DoubledGaugeField UmuEven;
|
|
||||||
DoubledGaugeField UmuOdd;
|
|
||||||
|
|
||||||
LebesgueOrder Lebesgue;
|
|
||||||
LebesgueOrder LebesgueEvenOdd;
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
|
||||||
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -42,15 +42,15 @@ const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1
|
|||||||
// 5d lattice for DWF.
|
// 5d lattice for DWF.
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _M5,const ImplParams &p) :
|
RealD _M5,const ImplParams &p) :
|
||||||
Kernels(p),
|
Kernels(p),
|
||||||
_FiveDimGrid(&FiveDimGrid),
|
_FiveDimGrid (&FiveDimGrid),
|
||||||
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
||||||
_FourDimGrid(&FourDimGrid),
|
_FourDimGrid (&FourDimGrid),
|
||||||
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
||||||
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
||||||
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
||||||
@ -62,60 +62,83 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
Lebesgue(_FourDimGrid),
|
Lebesgue(_FourDimGrid),
|
||||||
LebesgueEvenOdd(_FourDimRedBlackGrid)
|
LebesgueEvenOdd(_FourDimRedBlackGrid)
|
||||||
{
|
{
|
||||||
// some assertions
|
if (Impl::LsVectorised) {
|
||||||
assert(FiveDimGrid._ndimension==5);
|
|
||||||
assert(FourDimGrid._ndimension==4);
|
|
||||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
|
||||||
assert(FourDimRedBlackGrid._ndimension==4);
|
|
||||||
assert(FiveDimRedBlackGrid._checker_dim==1);
|
|
||||||
|
|
||||||
// Dimension zero of the five-d is the Ls direction
|
int nsimd = Simd::Nsimd();
|
||||||
Ls=FiveDimGrid._fdimensions[0];
|
|
||||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
// some assertions
|
||||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
assert(FiveDimGrid._ndimension==5);
|
||||||
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||||
assert(FiveDimGrid._processors[0] ==1);
|
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
||||||
assert(FiveDimGrid._simd_layout[0] ==1);
|
assert(FourDimGrid._ndimension==4);
|
||||||
|
|
||||||
// Other dimensions must match the decomposition of the four-D fields
|
// Dimension zero of the five-d is the Ls direction
|
||||||
for(int d=0;d<4;d++){
|
Ls=FiveDimGrid._fdimensions[0];
|
||||||
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
assert(FiveDimGrid._processors[0] ==1);
|
||||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
||||||
|
|
||||||
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||||
|
|
||||||
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
// Other dimensions must match the decomposition of the four-D fields
|
||||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
for(int d=0;d<4;d++){
|
||||||
|
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
|
|
||||||
|
assert(FourDimGrid._simd_layout[d]=1);
|
||||||
|
assert(FourDimRedBlackGrid._simd_layout[d]=1);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||||
|
|
||||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// some assertions
|
||||||
|
assert(FiveDimGrid._ndimension==5);
|
||||||
|
assert(FourDimGrid._ndimension==4);
|
||||||
|
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||||
|
assert(FourDimRedBlackGrid._ndimension==4);
|
||||||
|
assert(FiveDimRedBlackGrid._checker_dim==1);
|
||||||
|
|
||||||
|
// Dimension zero of the five-d is the Ls direction
|
||||||
|
Ls=FiveDimGrid._fdimensions[0];
|
||||||
|
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||||
|
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
||||||
|
assert(FiveDimGrid._processors[0] ==1);
|
||||||
|
assert(FiveDimGrid._simd_layout[0] ==1);
|
||||||
|
|
||||||
|
// Other dimensions must match the decomposition of the four-D fields
|
||||||
|
for(int d=0;d<4;d++){
|
||||||
|
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||||
|
|
||||||
|
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
|
|
||||||
|
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
||||||
|
|
||||||
|
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||||
|
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
|
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate the required comms buffer
|
// Allocate the required comms buffer
|
||||||
ImportGauge(_Umu);
|
ImportGauge(_Umu);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
RealD _M5,const ImplParams &p) :
|
RealD _M5,const ImplParams &p) :
|
||||||
Kernels(p),
|
|
||||||
_FiveDimGrid (&FiveDimGrid),
|
|
||||||
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
|
||||||
_FourDimGrid (&FourDimGrid),
|
|
||||||
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
|
||||||
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
|
||||||
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
|
|
||||||
M5(_M5),
|
|
||||||
Umu(_FourDimGrid),
|
|
||||||
UmuEven(_FourDimGrid),
|
|
||||||
UmuOdd (_FourDimGrid),
|
|
||||||
Lebesgue(_FourDimGrid),
|
|
||||||
LebesgueEvenOdd(_FourDimGrid)
|
|
||||||
{
|
{
|
||||||
int nsimd = Simd::Nsimd();
|
int nsimd = Simd::Nsimd();
|
||||||
|
|
||||||
@ -148,13 +171,75 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
GaugeField HUmu(_Umu._grid);
|
|
||||||
HUmu = _Umu*(-0.5);
|
|
||||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
|
||||||
UmuEven=Umu;// Really want a reference.
|
|
||||||
UmuOdd =Umu;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion5D<Impl>::Report(void)
|
||||||
|
{
|
||||||
|
std::vector<int> latt = GridDefaultLatt();
|
||||||
|
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||||
|
RealD NP = _FourDimGrid->_Nprocessors;
|
||||||
|
|
||||||
|
if ( DhopCalls > 0 ) {
|
||||||
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
|
||||||
|
<< " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : "
|
||||||
|
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : "
|
||||||
|
<< DhopComputeTime << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : "
|
||||||
|
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||||
|
|
||||||
|
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( DerivCalls > 0 ) {
|
||||||
|
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DerivCalls > 0 || DhopCalls > 0){
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion5D<Impl>::ZeroCounters(void) {
|
||||||
|
DhopCalls = 0;
|
||||||
|
DhopCommTime = 0;
|
||||||
|
DhopComputeTime = 0;
|
||||||
|
|
||||||
|
DerivCalls = 0;
|
||||||
|
DerivCommTime = 0;
|
||||||
|
DerivComputeTime = 0;
|
||||||
|
DerivDhopComputeTime = 0;
|
||||||
|
|
||||||
|
Stencil.ZeroCounters();
|
||||||
|
StencilEven.ZeroCounters();
|
||||||
|
StencilOdd.ZeroCounters();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -197,12 +282,13 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
GaugeField &mat,
|
GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
|
DerivCalls++;
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
|
|
||||||
conformable(st._grid,A._grid);
|
conformable(st._grid,A._grid);
|
||||||
@ -213,51 +299,53 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
FermionField Btilde(B._grid);
|
FermionField Btilde(B._grid);
|
||||||
FermionField Atilde(B._grid);
|
FermionField Atilde(B._grid);
|
||||||
|
|
||||||
|
DerivCommTime-=usecond();
|
||||||
st.HaloExchange(B,compressor);
|
st.HaloExchange(B,compressor);
|
||||||
|
DerivCommTime+=usecond();
|
||||||
|
|
||||||
Atilde=A;
|
Atilde=A;
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
DerivComputeTime-=usecond();
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Flip gamma if dag
|
// Flip gamma if dag
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
int gamma = mu;
|
int gamma = mu;
|
||||||
if ( !dag ) gamma+= Nd;
|
if (!dag) gamma += Nd;
|
||||||
|
|
||||||
////////////////////////
|
////////////////////////
|
||||||
// Call the single hop
|
// Call the single hop
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
DerivDhopComputeTime -= usecond();
|
||||||
for(int sss=0;sss<U._grid->oSites();sss++){
|
PARALLEL_FOR_LOOP
|
||||||
for(int s=0;s<Ls;s++){
|
for (int sss = 0; sss < U._grid->oSites(); sss++) {
|
||||||
int sU=sss;
|
for (int s = 0; s < Ls; s++) {
|
||||||
int sF = s+Ls*sU;
|
int sU = sss;
|
||||||
|
int sF = s + Ls * sU;
|
||||||
|
|
||||||
assert ( sF< B._grid->oSites());
|
assert(sF < B._grid->oSites());
|
||||||
assert ( sU< U._grid->oSites());
|
assert(sU < U._grid->oSites());
|
||||||
|
|
||||||
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma);
|
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu,
|
||||||
|
gamma);
|
||||||
////////////////////////////
|
|
||||||
// spin trace outer product
|
|
||||||
////////////////////////////
|
|
||||||
|
|
||||||
|
////////////////////////////
|
||||||
|
// spin trace outer product
|
||||||
|
////////////////////////////
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
DerivDhopComputeTime += usecond();
|
||||||
Impl::InsertForce5D(mat,Btilde,Atilde,mu);
|
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
DerivComputeTime += usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
|
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
conformable(A._grid,FermionGrid());
|
conformable(A._grid,FermionGrid());
|
||||||
conformable(A._grid,B._grid);
|
conformable(A._grid,B._grid);
|
||||||
@ -270,9 +358,9 @@ void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
|
|||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
conformable(A._grid,FermionRedBlackGrid());
|
conformable(A._grid,FermionRedBlackGrid());
|
||||||
conformable(GaugeRedBlackGrid(),mat._grid);
|
conformable(GaugeRedBlackGrid(),mat._grid);
|
||||||
@ -288,9 +376,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
|||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
conformable(A._grid,FermionRedBlackGrid());
|
conformable(A._grid,FermionRedBlackGrid());
|
||||||
conformable(GaugeRedBlackGrid(),mat._grid);
|
conformable(GaugeRedBlackGrid(),mat._grid);
|
||||||
@ -305,32 +393,61 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
|||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
|
DhopCalls++;
|
||||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
|
||||||
int LLs = in._grid->_rdimensions[0];
|
int LLs = in._grid->_rdimensions[0];
|
||||||
|
|
||||||
|
DhopCommTime-=usecond();
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchange(in,compressor);
|
||||||
|
DhopCommTime+=usecond();
|
||||||
|
|
||||||
|
DhopComputeTime-=usecond();
|
||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
if ( dag == DaggerYes ) {
|
if (dag == DaggerYes) {
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
int sU=ss;
|
int sU = ss;
|
||||||
int sF=LLs*sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
|
||||||
|
out);
|
||||||
}
|
}
|
||||||
|
#ifdef AVX512
|
||||||
|
} else if (stat.is_init() ) {
|
||||||
|
|
||||||
|
int nthreads;
|
||||||
|
stat.start();
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp master
|
||||||
|
nthreads = omp_get_num_threads();
|
||||||
|
int mythread = omp_get_thread_num();
|
||||||
|
stat.enter(mythread);
|
||||||
|
#pragma omp for nowait
|
||||||
|
for(int ss=0;ss<U._grid->oSites();ss++)
|
||||||
|
{
|
||||||
|
int sU=ss;
|
||||||
|
int sF=LLs*sU;
|
||||||
|
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
||||||
|
}
|
||||||
|
stat.exit(mythread);
|
||||||
|
}
|
||||||
|
stat.accum(nthreads);
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
int sU=ss;
|
int sU = ss;
|
||||||
int sF=LLs*sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
|
||||||
|
out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DhopComputeTime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -376,8 +493,6 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
|
|||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonFermion5D);
|
FermOpTemplateInstantiate(WilsonFermion5D);
|
||||||
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
||||||
template class WilsonFermion5D<DomainWallRedBlack5dImplF>;
|
|
||||||
template class WilsonFermion5D<DomainWallRedBlack5dImplD>;
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
@ -31,6 +31,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_WILSON_FERMION_5D_H
|
#ifndef GRID_QCD_WILSON_FERMION_5D_H
|
||||||
#define GRID_QCD_WILSON_FERMION_5D_H
|
#define GRID_QCD_WILSON_FERMION_5D_H
|
||||||
|
|
||||||
|
#include <Grid/Stat.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
@ -60,6 +62,18 @@ namespace Grid {
|
|||||||
public:
|
public:
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef WilsonKernels<Impl> Kernels;
|
typedef WilsonKernels<Impl> Kernels;
|
||||||
|
PmuStat stat;
|
||||||
|
|
||||||
|
void Report(void);
|
||||||
|
void ZeroCounters(void);
|
||||||
|
double DhopCalls;
|
||||||
|
double DhopCommTime;
|
||||||
|
double DhopComputeTime;
|
||||||
|
|
||||||
|
double DerivCalls;
|
||||||
|
double DerivCommTime;
|
||||||
|
double DerivComputeTime;
|
||||||
|
double DerivDhopComputeTime;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
@ -125,12 +139,14 @@ namespace Grid {
|
|||||||
double _M5,const ImplParams &p= ImplParams());
|
double _M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
|
/*
|
||||||
WilsonFermion5D(int simd,
|
WilsonFermion5D(int simd,
|
||||||
GaugeField &_Umu,
|
GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
double _M5,const ImplParams &p= ImplParams());
|
double _M5,const ImplParams &p= ImplParams());
|
||||||
|
*/
|
||||||
|
|
||||||
// DoubleStore
|
// DoubleStore
|
||||||
void ImportGauge(const GaugeField &_Umu);
|
void ImportGauge(const GaugeField &_Umu);
|
||||||
|
@ -1,98 +1,54 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
int WilsonKernelsStatic::HandOpt;
|
int WilsonKernelsStatic::HandOpt;
|
||||||
int WilsonKernelsStatic::AsmOpt;
|
int WilsonKernelsStatic::AsmOpt;
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
|
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||||
|
|
||||||
template<class Impl>
|
////////////////////////////////////////////
|
||||||
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
// Generic implementation; move to different file?
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
////////////////////////////////////////////
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
#ifdef AVX512
|
|
||||||
if ( AsmOpt ) {
|
|
||||||
|
|
||||||
WilsonKernels<Impl>::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
template <class Impl>
|
||||||
|
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
|
||||||
} else {
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
#else
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
|
||||||
{
|
int sU, const FermionField &in, FermionField &out) {
|
||||||
#endif
|
SiteHalfSpinor tmp;
|
||||||
for(int site=0;site<Ns;site++) {
|
SiteHalfSpinor chi;
|
||||||
for(int s=0;s<Ls;s++) {
|
|
||||||
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
else WilsonKernels<Impl>::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
// No asm implementation yet.
|
|
||||||
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
// else
|
|
||||||
for(int site=0;site<Ns;site++) {
|
|
||||||
for(int s=0;s<Ls;s++) {
|
|
||||||
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
|
||||||
// Generic implementation; move to different file?
|
|
||||||
////////////////////////////////////////////
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
SiteHalfSpinor tmp;
|
|
||||||
SiteHalfSpinor chi;
|
|
||||||
SiteHalfSpinor *chi_p;
|
SiteHalfSpinor *chi_p;
|
||||||
SiteHalfSpinor Uchi;
|
SiteHalfSpinor Uchi;
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
@ -102,176 +58,175 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrd
|
|||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xp
|
// Xp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xp,sF);
|
SE = st.GetEntry(ptype, Xp, sF);
|
||||||
|
|
||||||
if (SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXp(tmp,in._odata[SE->_offset]);
|
spProjXp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXp(chi,in._odata[SE->_offset]);
|
spProjXp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
|
||||||
spReconXp(result,Uchi);
|
spReconXp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Yp
|
// Yp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Yp,sF);
|
SE = st.GetEntry(ptype, Yp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYp(tmp,in._odata[SE->_offset]);
|
spProjYp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYp(chi,in._odata[SE->_offset]);
|
spProjYp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
|
||||||
accumReconYp(result,Uchi);
|
accumReconYp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zp
|
// Zp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zp,sF);
|
SE = st.GetEntry(ptype, Zp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZp(tmp,in._odata[SE->_offset]);
|
spProjZp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZp(chi,in._odata[SE->_offset]);
|
spProjZp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
|
||||||
accumReconZp(result,Uchi);
|
accumReconZp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tp
|
// Tp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tp,sF);
|
SE = st.GetEntry(ptype, Tp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTp(tmp,in._odata[SE->_offset]);
|
spProjTp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTp(chi,in._odata[SE->_offset]);
|
spProjTp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
|
||||||
accumReconTp(result,Uchi);
|
accumReconTp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xm
|
// Xm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xm,sF);
|
SE = st.GetEntry(ptype, Xm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXm(tmp,in._odata[SE->_offset]);
|
spProjXm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXm(chi,in._odata[SE->_offset]);
|
spProjXm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
|
||||||
accumReconXm(result,Uchi);
|
accumReconXm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Ym
|
// Ym
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Ym,sF);
|
SE = st.GetEntry(ptype, Ym, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYm(tmp,in._odata[SE->_offset]);
|
spProjYm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYm(chi,in._odata[SE->_offset]);
|
spProjYm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
|
||||||
accumReconYm(result,Uchi);
|
accumReconYm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zm
|
// Zm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zm,sF);
|
SE = st.GetEntry(ptype, Zm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZm(tmp,in._odata[SE->_offset]);
|
spProjZm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZm(chi,in._odata[SE->_offset]);
|
spProjZm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
|
||||||
accumReconZm(result,Uchi);
|
accumReconZm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tm
|
// Tm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tm,sF);
|
SE = st.GetEntry(ptype, Tm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTm(tmp,in._odata[SE->_offset]);
|
spProjTm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTm(chi,in._odata[SE->_offset]);
|
spProjTm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
|
||||||
accumReconTm(result,Uchi);
|
accumReconTm(result, Uchi);
|
||||||
|
|
||||||
vstream(out._odata[sF],result);
|
vstream(out._odata[sF], result);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Need controls to do interior, exterior, or both
|
||||||
// Need controls to do interior, exterior, or both
|
template <class Impl>
|
||||||
template<class Impl>
|
void WilsonKernels<Impl>::DiracOptGenericDhopSite(
|
||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
int sU, const FermionField &in, FermionField &out) {
|
||||||
{
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor chi;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor *chi_p;
|
||||||
SiteHalfSpinor *chi_p;
|
|
||||||
SiteHalfSpinor Uchi;
|
SiteHalfSpinor Uchi;
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
@ -280,299 +235,298 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder
|
|||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xp
|
// Xp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xm,sF);
|
SE = st.GetEntry(ptype, Xm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXp(tmp,in._odata[SE->_offset]);
|
spProjXp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXp(chi,in._odata[SE->_offset]);
|
spProjXp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
|
||||||
spReconXp(result,Uchi);
|
spReconXp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Yp
|
// Yp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Ym,sF);
|
SE = st.GetEntry(ptype, Ym, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYp(tmp,in._odata[SE->_offset]);
|
spProjYp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYp(chi,in._odata[SE->_offset]);
|
spProjYp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
|
||||||
accumReconYp(result,Uchi);
|
accumReconYp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zp
|
// Zp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zm,sF);
|
SE = st.GetEntry(ptype, Zm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZp(tmp,in._odata[SE->_offset]);
|
spProjZp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZp(chi,in._odata[SE->_offset]);
|
spProjZp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
|
||||||
accumReconZp(result,Uchi);
|
accumReconZp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tp
|
// Tp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tm,sF);
|
SE = st.GetEntry(ptype, Tm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTp(tmp,in._odata[SE->_offset]);
|
spProjTp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTp(chi,in._odata[SE->_offset]);
|
spProjTp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
|
||||||
accumReconTp(result,Uchi);
|
accumReconTp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xm
|
// Xm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xp,sF);
|
SE = st.GetEntry(ptype, Xp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXm(tmp,in._odata[SE->_offset]);
|
spProjXm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXm(chi,in._odata[SE->_offset]);
|
spProjXm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
|
||||||
accumReconXm(result,Uchi);
|
accumReconXm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Ym
|
// Ym
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Yp,sF);
|
SE = st.GetEntry(ptype, Yp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYm(tmp,in._odata[SE->_offset]);
|
spProjYm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYm(chi,in._odata[SE->_offset]);
|
spProjYm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
|
||||||
accumReconYm(result,Uchi);
|
accumReconYm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zm
|
// Zm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zp,sF);
|
SE = st.GetEntry(ptype, Zp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZm(tmp,in._odata[SE->_offset]);
|
spProjZm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZm(chi,in._odata[SE->_offset]);
|
spProjZm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
|
||||||
accumReconZm(result,Uchi);
|
accumReconZm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tm
|
// Tm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tp,sF);
|
SE = st.GetEntry(ptype, Tp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTm(tmp,in._odata[SE->_offset]);
|
spProjTm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTm(chi,in._odata[SE->_offset]);
|
spProjTm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
|
||||||
accumReconTm(result,Uchi);
|
accumReconTm(result, Uchi);
|
||||||
|
|
||||||
vstream(out._odata[sF],result);
|
vstream(out._odata[sF], result);
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
|
void WilsonKernels<Impl>::DiracOptDhopDir(
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
StencilImpl &st, DoubledGaugeField &U,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma)
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
|
||||||
{
|
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
SiteHalfSpinor Uchi;
|
SiteHalfSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
|
|
||||||
SE=st.GetEntry(ptype,dir,sF);
|
SE = st.GetEntry(ptype, dir, sF);
|
||||||
|
|
||||||
// Xp
|
// Xp
|
||||||
if(gamma==Xp){
|
if (gamma == Xp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjXp(tmp,in._odata[SE->_offset]);
|
spProjXp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjXp(chi,in._odata[SE->_offset]);
|
spProjXp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconXp(result,Uchi);
|
spReconXp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Yp
|
// Yp
|
||||||
if ( gamma==Yp ){
|
if (gamma == Yp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjYp(tmp,in._odata[SE->_offset]);
|
spProjYp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjYp(chi,in._odata[SE->_offset]);
|
spProjYp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconYp(result,Uchi);
|
spReconYp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zp
|
// Zp
|
||||||
if ( gamma ==Zp ){
|
if (gamma == Zp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjZp(tmp,in._odata[SE->_offset]);
|
spProjZp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjZp(chi,in._odata[SE->_offset]);
|
spProjZp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconZp(result,Uchi);
|
spReconZp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tp
|
// Tp
|
||||||
if ( gamma ==Tp ){
|
if (gamma == Tp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjTp(tmp,in._odata[SE->_offset]);
|
spProjTp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjTp(chi,in._odata[SE->_offset]);
|
spProjTp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconTp(result,Uchi);
|
spReconTp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Xm
|
// Xm
|
||||||
if ( gamma==Xm ){
|
if (gamma == Xm) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjXm(tmp,in._odata[SE->_offset]);
|
spProjXm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjXm(chi,in._odata[SE->_offset]);
|
spProjXm(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconXm(result,Uchi);
|
spReconXm(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ym
|
// Ym
|
||||||
if ( gamma == Ym ){
|
if (gamma == Ym) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjYm(tmp,in._odata[SE->_offset]);
|
spProjYm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjYm(chi,in._odata[SE->_offset]);
|
spProjYm(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconYm(result,Uchi);
|
spReconYm(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zm
|
// Zm
|
||||||
if ( gamma == Zm ){
|
if (gamma == Zm) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjZm(tmp,in._odata[SE->_offset]);
|
spProjZm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjZm(chi,in._odata[SE->_offset]);
|
spProjZm(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconZm(result,Uchi);
|
spReconZm(result, Uchi);
|
||||||
}
|
|
||||||
|
|
||||||
// Tm
|
|
||||||
if ( gamma==Tm ) {
|
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
|
||||||
spProjTm(tmp,in._odata[SE->_offset]);
|
|
||||||
permute(chi,tmp,ptype);
|
|
||||||
} else if ( SE->_is_local ) {
|
|
||||||
spProjTm(chi,in._odata[SE->_offset]);
|
|
||||||
} else {
|
|
||||||
chi=buf[SE->_offset];
|
|
||||||
}
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
|
||||||
spReconTm(result,Uchi);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vstream(out._odata[sF],result);
|
// Tm
|
||||||
|
if (gamma == Tm) {
|
||||||
|
if (SE->_is_local && SE->_permute) {
|
||||||
|
spProjTm(tmp, in._odata[SE->_offset]);
|
||||||
|
permute(chi, tmp, ptype);
|
||||||
|
} else if (SE->_is_local) {
|
||||||
|
spProjTm(chi, in._odata[SE->_offset]);
|
||||||
|
} else {
|
||||||
|
chi = buf[SE->_offset];
|
||||||
|
}
|
||||||
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
|
spReconTm(result, Uchi);
|
||||||
|
}
|
||||||
|
|
||||||
|
vstream(out._odata[sF], result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FermOpTemplateInstantiate(WilsonKernels);
|
||||||
FermOpTemplateInstantiate(WilsonKernels);
|
AdjointFermOpTemplateInstantiate(WilsonKernels);
|
||||||
|
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
|
||||||
template class WilsonKernels<DomainWallRedBlack5dImplF>;
|
|
||||||
template class WilsonKernels<DomainWallRedBlack5dImplD>;
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
@ -1,34 +1,35 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
|
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
#ifndef GRID_QCD_DHOP_H
|
/* END LEGAL */
|
||||||
#define GRID_QCD_DHOP_H
|
#ifndef GRID_QCD_DHOP_H
|
||||||
|
#define GRID_QCD_DHOP_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -48,51 +49,158 @@ namespace Grid {
|
|||||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef FermionOperator<Impl> Base;
|
typedef FermionOperator<Impl> Base;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
void DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
template <bool EnableBool = true>
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||||
int sF, int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
DiracOptDhopSite(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
void DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in,FermionField &out);
|
FermionField &out) {
|
||||||
|
#ifdef AVX512
|
||||||
|
if (AsmOpt) {
|
||||||
|
WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns,
|
||||||
|
in, out);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
if (HandOpt)
|
||||||
|
WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU,
|
||||||
|
in, out);
|
||||||
|
else
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
|
||||||
|
in, out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool EnableBool = true>
|
||||||
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||||
|
DiracOptDhopSite(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
|
FermionField &out) {
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
|
||||||
|
out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool EnableBool = true>
|
||||||
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
|
||||||
|
void>::type
|
||||||
|
DiracOptDhopSiteDag(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
|
FermionField &out) {
|
||||||
|
#ifdef AVX512
|
||||||
|
if (AsmOpt) {
|
||||||
|
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
|
||||||
|
Ns, in, out);
|
||||||
|
} else {
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
if (HandOpt)
|
||||||
|
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
|
||||||
|
in, out);
|
||||||
|
else
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
|
||||||
|
sU, in, out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool EnableBool = true>
|
||||||
|
typename std::enable_if<
|
||||||
|
(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
|
||||||
|
void>::type
|
||||||
|
DiracOptDhopSiteDag(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
|
FermionField &out) {
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
|
||||||
|
in, out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiracOptDhopDir(
|
||||||
|
StencilImpl &st, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
|
||||||
|
int gamma);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Specialised variants
|
||||||
|
void DiracOptGenericDhopSite(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptGenericDhopSiteDag(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptAsmDhopSite(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
|
FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptAsmDhopSiteDag(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
|
FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptHandDhopSite(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptHandDhopSiteDag(
|
||||||
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
public:
|
||||||
|
WilsonKernels(const ImplParams &p = ImplParams());
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Specialised variants
|
|
||||||
void DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in,FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
|
|
||||||
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out);
|
|
||||||
public:
|
|
||||||
|
|
||||||
WilsonKernels(const ImplParams &p= ImplParams());
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -26,59 +26,77 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Default to no assembler implementation
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
// Default to no assembler implementation
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(AVX512)
|
#if defined(AVX512)
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// If we are AVX512 specialise the single precision routine
|
// If we are AVX512 specialise the single precision routine
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#include <simd/Intel512wilson.h>
|
#include <simd/Intel512wilson.h>
|
||||||
#include <simd/Intel512single.h>
|
#include <simd/Intel512single.h>
|
||||||
|
|
||||||
static Vector<vComplexF> signs;
|
static Vector<vComplexF> signs;
|
||||||
|
|
||||||
int setupSigns(void ){
|
int setupSigns(void ){
|
||||||
Vector<vComplexF> bother(2);
|
Vector<vComplexF> bother(2);
|
||||||
signs = bother;
|
signs = bother;
|
||||||
vrsign(signs[0]);
|
vrsign(signs[0]);
|
||||||
visign(signs[1]);
|
visign(signs[1]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
static int signInit = setupSigns();
|
static int signInit = setupSigns();
|
||||||
|
|
||||||
#define label(A) ilabel(A)
|
#define label(A) ilabel(A)
|
||||||
#define ilabel(A) ".globl\n" #A ":\n"
|
#define ilabel(A) ".globl\n" #A ":\n"
|
||||||
|
|
||||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
||||||
#define FX(A) WILSONASM_ ##A
|
#define FX(A) WILSONASM_ ##A
|
||||||
template<>
|
|
||||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
#undef KERNEL_DAG
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template<>
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
|
#define KERNEL_DAG
|
||||||
|
template<>
|
||||||
|
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef VMOVIDUP
|
#undef VMOVIDUP
|
||||||
#undef VMOVRDUP
|
#undef VMOVRDUP
|
||||||
#undef MAYBEPERM
|
#undef MAYBEPERM
|
||||||
@ -89,32 +107,43 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
|
|||||||
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
||||||
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
||||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||||
template<>
|
|
||||||
void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
#undef KERNEL_DAG
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template<>
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
|
#define KERNEL_DAG
|
||||||
|
template<>
|
||||||
|
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
#define INSTANTIATE_ASM(A)\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
INSTANTIATE_ASM(WilsonImplF);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_ASM(WilsonImplD);
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
INSTANTIATE_ASM(ZWilsonImplF);
|
||||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
INSTANTIATE_ASM(ZWilsonImplD);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_ASM(GparityWilsonImplF);
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
INSTANTIATE_ASM(GparityWilsonImplD);
|
||||||
}}
|
INSTANTIATE_ASM(DomainWallVec5dImplF);
|
||||||
|
INSTANTIATE_ASM(DomainWallVec5dImplD);
|
||||||
|
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
|
||||||
|
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -30,7 +30,11 @@
|
|||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns);
|
LOAD64(%r10,isigns);
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XP_PROJMEM(base);
|
||||||
|
#else
|
||||||
XM_PROJMEM(base);
|
XM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR3,perm);
|
MAYBEPERM(PERMUTE_DIR3,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -41,15 +45,22 @@
|
|||||||
MULT_2SPIN_DIR_PFXP(Xp,basep);
|
MULT_2SPIN_DIR_PFXP(Xp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns);
|
LOAD64(%r10,isigns);
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XP_RECON;
|
||||||
|
#else
|
||||||
XM_RECON;
|
XM_RECON;
|
||||||
|
#endif
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Yp
|
// Yp
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YP_PROJMEM(base);
|
||||||
|
#else
|
||||||
YM_PROJMEM(base);
|
YM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR2,perm);
|
MAYBEPERM(PERMUTE_DIR2,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -60,7 +71,11 @@
|
|||||||
MULT_2SPIN_DIR_PFYP(Yp,basep);
|
MULT_2SPIN_DIR_PFYP(Yp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YP_RECON_ACCUM;
|
||||||
|
#else
|
||||||
YM_RECON_ACCUM;
|
YM_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Zp
|
// Zp
|
||||||
@ -68,7 +83,11 @@
|
|||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZP_PROJMEM(base);
|
||||||
|
#else
|
||||||
ZM_PROJMEM(base);
|
ZM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR1,perm);
|
MAYBEPERM(PERMUTE_DIR1,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -79,7 +98,11 @@
|
|||||||
MULT_2SPIN_DIR_PFZP(Zp,basep);
|
MULT_2SPIN_DIR_PFZP(Zp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZP_RECON_ACCUM;
|
||||||
|
#else
|
||||||
ZM_RECON_ACCUM;
|
ZM_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Tp
|
// Tp
|
||||||
@ -87,7 +110,11 @@
|
|||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TP_PROJMEM(base);
|
||||||
|
#else
|
||||||
TM_PROJMEM(base);
|
TM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR0,perm);
|
MAYBEPERM(PERMUTE_DIR0,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -98,16 +125,26 @@
|
|||||||
MULT_2SPIN_DIR_PFTP(Tp,basep);
|
MULT_2SPIN_DIR_PFTP(Tp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TP_RECON_ACCUM;
|
||||||
|
#else
|
||||||
TM_RECON_ACCUM;
|
TM_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Xm
|
// Xm
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
|
#ifndef STREAM_STORE
|
||||||
basep= (uint64_t) &out._odata[ss];
|
basep= (uint64_t) &out._odata[ss];
|
||||||
|
#endif
|
||||||
// basep= st.GetPFInfo(nent,plocal); nent++;
|
// basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XM_PROJMEM(base);
|
||||||
|
#else
|
||||||
XP_PROJMEM(base);
|
XP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR3,perm);
|
MAYBEPERM(PERMUTE_DIR3,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -118,7 +155,11 @@
|
|||||||
MULT_2SPIN_DIR_PFXM(Xm,basep);
|
MULT_2SPIN_DIR_PFXM(Xm,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
XP_RECON_ACCUM;
|
XP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Ym
|
// Ym
|
||||||
@ -126,7 +167,11 @@
|
|||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YM_PROJMEM(base);
|
||||||
|
#else
|
||||||
YP_PROJMEM(base);
|
YP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR2,perm);
|
MAYBEPERM(PERMUTE_DIR2,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -137,7 +182,11 @@
|
|||||||
MULT_2SPIN_DIR_PFYM(Ym,basep);
|
MULT_2SPIN_DIR_PFYM(Ym,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
YP_RECON_ACCUM;
|
YP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Zm
|
// Zm
|
||||||
@ -145,7 +194,11 @@
|
|||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZM_PROJMEM(base);
|
||||||
|
#else
|
||||||
ZP_PROJMEM(base);
|
ZP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR1,perm);
|
MAYBEPERM(PERMUTE_DIR1,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -156,7 +209,11 @@
|
|||||||
MULT_2SPIN_DIR_PFZM(Zm,basep);
|
MULT_2SPIN_DIR_PFZM(Zm,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
ZP_RECON_ACCUM;
|
ZP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Tm
|
// Tm
|
||||||
@ -164,18 +221,28 @@
|
|||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TM_PROJMEM(base);
|
||||||
|
#else
|
||||||
TP_PROJMEM(base);
|
TP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR0,perm);
|
MAYBEPERM(PERMUTE_DIR0,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
}
|
}
|
||||||
base= (uint64_t) &out._odata[ss];
|
base= (uint64_t) &out._odata[ss];
|
||||||
|
#ifndef STREAM_STORE
|
||||||
PREFETCH_CHIMU(base);
|
PREFETCH_CHIMU(base);
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
MULT_2SPIN_DIR_PFTM(Tm,basep);
|
MULT_2SPIN_DIR_PFTM(Tm,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
TP_RECON_ACCUM;
|
TP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
SAVE_RESULT(base,basep);
|
SAVE_RESULT(base,basep);
|
||||||
|
@ -311,8 +311,8 @@ namespace Grid {
|
|||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
@ -554,8 +554,8 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &l
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
@ -839,46 +839,23 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
|
|||||||
////////////// Wilson ; uses this implementation /////////////////////
|
////////////// Wilson ; uses this implementation /////////////////////
|
||||||
// Need Nc=3 though //
|
// Need Nc=3 though //
|
||||||
|
|
||||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
#define INSTANTIATE_THEM(A) \
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
||||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
||||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
int ss,int sU,const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
INSTANTIATE_THEM(WilsonImplF);
|
||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
INSTANTIATE_THEM(WilsonImplD);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_THEM(ZWilsonImplF);
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
INSTANTIATE_THEM(ZWilsonImplD);
|
||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
INSTANTIATE_THEM(GparityWilsonImplF);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_THEM(GparityWilsonImplD);
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
INSTANTIATE_THEM(DomainWallVec5dImplF);
|
||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
INSTANTIATE_THEM(DomainWallVec5dImplD);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
|
||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
|
|
||||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -28,7 +28,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_WILSON_TM_FERMION_H
|
#ifndef GRID_QCD_WILSON_TM_FERMION_H
|
||||||
#define GRID_QCD_WILSON_TM_FERMION_H
|
#define GRID_QCD_WILSON_TM_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
79
lib/qcd/action/fermion/ZMobiusFermion.h
Normal file
79
lib/qcd/action/fermion/ZMobiusFermion.h
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/MobiusFermion.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
|
||||||
|
#define GRID_QCD_ZMOBIUS_FERMION_H
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
class ZMobiusFermion : public CayleyFermion5D<Impl>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
public:
|
||||||
|
|
||||||
|
virtual void Instantiatable(void) {};
|
||||||
|
// Constructors
|
||||||
|
ZMobiusFermion(GaugeField &_Umu,
|
||||||
|
GridCartesian &FiveDimGrid,
|
||||||
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
|
GridCartesian &FourDimGrid,
|
||||||
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
|
RealD _mass,RealD _M5,
|
||||||
|
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
|
CayleyFermion5D<Impl>(_Umu,
|
||||||
|
FiveDimGrid,
|
||||||
|
FiveDimRedBlackGrid,
|
||||||
|
FourDimGrid,
|
||||||
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
|
|
||||||
|
{
|
||||||
|
RealD eps = 1.0;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
|
||||||
|
std::vector<Coeff_t> zgamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++){
|
||||||
|
zgamma[s] = gamma[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call base setter
|
||||||
|
this->SetCoefficientsInternal(1.0,zgamma,b,c);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -1,181 +1,194 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/gauge/GaugeImpl.h
|
Source file: ./lib/qcd/action/gauge/GaugeImpl.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
#ifndef GRID_QCD_GAUGE_IMPL_H
|
/* END LEGAL */
|
||||||
#define GRID_QCD_GAUGE_IMPL_H
|
#ifndef GRID_QCD_GAUGE_IMPL_H
|
||||||
|
#define GRID_QCD_GAUGE_IMPL_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
namespace QCD {
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Implementation dependent gauge types
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template <class Gimpl> class WilsonLoops;
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Implementation dependent gauge types
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
template<class Gimpl> class WilsonLoops;
|
#define INHERIT_GIMPL_TYPES(GImpl) \
|
||||||
|
typedef typename GImpl::Simd Simd; \
|
||||||
|
typedef typename GImpl::GaugeLinkField GaugeLinkField; \
|
||||||
|
typedef typename GImpl::GaugeField GaugeField; \
|
||||||
|
typedef typename GImpl::SiteGaugeField SiteGaugeField; \
|
||||||
|
typedef typename GImpl::SiteGaugeLink SiteGaugeLink;
|
||||||
|
|
||||||
#define INHERIT_GIMPL_TYPES(GImpl) \
|
//
|
||||||
typedef typename GImpl::Simd Simd;\
|
template <class S, int Nrepresentation = Nc> class GaugeImplTypes {
|
||||||
typedef typename GImpl::GaugeLinkField GaugeLinkField;\
|
public:
|
||||||
typedef typename GImpl::GaugeField GaugeField;\
|
typedef S Simd;
|
||||||
typedef typename GImpl::SiteGaugeField SiteGaugeField;\
|
|
||||||
typedef typename GImpl::SiteGaugeLink SiteGaugeLink;
|
|
||||||
|
|
||||||
|
template <typename vtype>
|
||||||
|
using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>;
|
||||||
|
template <typename vtype>
|
||||||
|
using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>;
|
||||||
|
|
||||||
//
|
typedef iImplGaugeLink<Simd> SiteGaugeLink;
|
||||||
template<class S,int Nrepresentation=Nc>
|
typedef iImplGaugeField<Simd> SiteGaugeField;
|
||||||
class GaugeImplTypes {
|
|
||||||
public:
|
|
||||||
|
|
||||||
typedef S Simd;
|
|
||||||
|
|
||||||
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
|
||||||
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
|
|
||||||
|
|
||||||
typedef iImplGaugeLink <Simd> SiteGaugeLink;
|
|
||||||
typedef iImplGaugeField <Simd> SiteGaugeField;
|
|
||||||
|
|
||||||
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
|
|
||||||
typedef Lattice<SiteGaugeField> GaugeField;
|
|
||||||
|
|
||||||
};
|
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised
|
||||||
|
// gauge field, lorentz... all
|
||||||
|
// ugly
|
||||||
|
typedef Lattice<SiteGaugeField> GaugeField;
|
||||||
|
|
||||||
// Composition with smeared link, bc's etc.. probably need multiple inheritance
|
// Move this elsewhere? FIXME
|
||||||
// Variable precision "S" and variable Nc
|
static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W,
|
||||||
template<class GimplTypes>
|
int mu) { // U[mu] += W
|
||||||
class PeriodicGaugeImpl : public GimplTypes {
|
PARALLEL_FOR_LOOP
|
||||||
public:
|
for (auto ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
|
U._odata[ss]._internal[mu] =
|
||||||
INHERIT_GIMPL_TYPES(GimplTypes);
|
U._odata[ss]._internal[mu] + W._odata[ss]._internal;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Support needed for the assembly of loops including all boundary condition effects such as conjugate bcs
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
template<class covariant> static inline
|
|
||||||
Lattice<covariant> CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice<covariant> &field) {
|
|
||||||
return PeriodicBC::CovShiftForward(Link,mu,field);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class covariant> static inline
|
|
||||||
Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice<covariant> &field) {
|
|
||||||
return PeriodicBC::CovShiftBackward(Link,mu,field);
|
|
||||||
}
|
|
||||||
static inline
|
|
||||||
GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
|
||||||
return Cshift(adj(Link),mu,-1);
|
|
||||||
}
|
|
||||||
static inline
|
|
||||||
GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
|
||||||
return Link;
|
|
||||||
}
|
|
||||||
static inline
|
|
||||||
GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
|
|
||||||
return Cshift(Link,mu,1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool isPeriodicGaugeField(void) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// Composition with smeared link, bc's etc.. probably need multiple inheritance
|
|
||||||
// Variable precision "S" and variable Nc
|
|
||||||
template<class GimplTypes>
|
|
||||||
class ConjugateGaugeImpl : public GimplTypes {
|
|
||||||
public:
|
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(GimplTypes);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Support needed for the assembly of loops including all boundary condition effects such as Gparity.
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
template<class covariant> static
|
|
||||||
Lattice<covariant> CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice<covariant> &field) {
|
|
||||||
return ConjugateBC::CovShiftForward(Link,mu,field);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class covariant> static
|
|
||||||
Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice<covariant> &field) {
|
|
||||||
return ConjugateBC::CovShiftBackward(Link,mu,field);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline
|
|
||||||
GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
|
||||||
GridBase *grid = Link._grid;
|
|
||||||
int Lmu = grid->GlobalDimensions()[mu]-1;
|
|
||||||
|
|
||||||
Lattice<iScalar<vInteger> > coor(grid); LatticeCoordinate(coor,mu);
|
|
||||||
|
|
||||||
GaugeLinkField tmp (grid);
|
|
||||||
tmp=adj(Link);
|
|
||||||
tmp = where(coor==Lmu,conjugate(tmp),tmp);
|
|
||||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
|
||||||
}
|
|
||||||
static inline
|
|
||||||
GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
|
||||||
return Link;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline
|
|
||||||
GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
|
|
||||||
GridBase *grid = Link._grid;
|
|
||||||
int Lmu = grid->GlobalDimensions()[mu]-1;
|
|
||||||
|
|
||||||
Lattice<iScalar<vInteger> > coor(grid); LatticeCoordinate(coor,mu);
|
|
||||||
|
|
||||||
GaugeLinkField tmp (grid);
|
|
||||||
tmp=Cshift(Link,mu,1);
|
|
||||||
tmp=where(coor==Lmu,conjugate(tmp),tmp);
|
|
||||||
return tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool isPeriodicGaugeField(void) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef GaugeImplTypes<vComplex,Nc> GimplTypesR;
|
|
||||||
typedef GaugeImplTypes<vComplexF,Nc> GimplTypesF;
|
|
||||||
typedef GaugeImplTypes<vComplexD,Nc> GimplTypesD;
|
|
||||||
|
|
||||||
typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
|
|
||||||
typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
|
|
||||||
typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
|
|
||||||
|
|
||||||
typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
|
|
||||||
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
|
|
||||||
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Composition with smeared link, bc's etc.. probably need multiple inheritance
|
||||||
|
// Variable precision "S" and variable Nc
|
||||||
|
template <class GimplTypes> class PeriodicGaugeImpl : public GimplTypes {
|
||||||
|
public:
|
||||||
|
INHERIT_GIMPL_TYPES(GimplTypes);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Support needed for the assembly of loops including all boundary condition
|
||||||
|
// effects such as conjugate bcs
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template <class covariant>
|
||||||
|
static inline Lattice<covariant>
|
||||||
|
CovShiftForward(const GaugeLinkField &Link, int mu,
|
||||||
|
const Lattice<covariant> &field) {
|
||||||
|
return PeriodicBC::CovShiftForward(Link, mu, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class covariant>
|
||||||
|
static inline Lattice<covariant>
|
||||||
|
CovShiftBackward(const GaugeLinkField &Link, int mu,
|
||||||
|
const Lattice<covariant> &field) {
|
||||||
|
return PeriodicBC::CovShiftBackward(Link, mu, field);
|
||||||
|
}
|
||||||
|
static inline GaugeLinkField
|
||||||
|
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
||||||
|
return Cshift(adj(Link), mu, -1);
|
||||||
|
}
|
||||||
|
static inline GaugeLinkField
|
||||||
|
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
||||||
|
return Link;
|
||||||
|
}
|
||||||
|
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
|
||||||
|
return Cshift(Link, mu, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool isPeriodicGaugeField(void) { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Composition with smeared link, bc's etc.. probably need multiple inheritance
|
||||||
|
// Variable precision "S" and variable Nc
|
||||||
|
template <class GimplTypes> class ConjugateGaugeImpl : public GimplTypes {
|
||||||
|
public:
|
||||||
|
INHERIT_GIMPL_TYPES(GimplTypes);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Support needed for the assembly of loops including all boundary condition
|
||||||
|
// effects such as Gparity.
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <class covariant>
|
||||||
|
static Lattice<covariant> CovShiftForward(const GaugeLinkField &Link, int mu,
|
||||||
|
const Lattice<covariant> &field) {
|
||||||
|
return ConjugateBC::CovShiftForward(Link, mu, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class covariant>
|
||||||
|
static Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,
|
||||||
|
const Lattice<covariant> &field) {
|
||||||
|
return ConjugateBC::CovShiftBackward(Link, mu, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline GaugeLinkField
|
||||||
|
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
||||||
|
GridBase *grid = Link._grid;
|
||||||
|
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger>> coor(grid);
|
||||||
|
LatticeCoordinate(coor, mu);
|
||||||
|
|
||||||
|
GaugeLinkField tmp(grid);
|
||||||
|
tmp = adj(Link);
|
||||||
|
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
||||||
|
return Cshift(tmp, mu, -1); // moves towards positive mu
|
||||||
|
}
|
||||||
|
static inline GaugeLinkField
|
||||||
|
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
||||||
|
return Link;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
|
||||||
|
GridBase *grid = Link._grid;
|
||||||
|
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger>> coor(grid);
|
||||||
|
LatticeCoordinate(coor, mu);
|
||||||
|
|
||||||
|
GaugeLinkField tmp(grid);
|
||||||
|
tmp = Cshift(Link, mu, 1);
|
||||||
|
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool isPeriodicGaugeField(void) { return false; }
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
|
||||||
|
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
|
||||||
|
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
|
||||||
|
|
||||||
|
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
|
||||||
|
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
|
||||||
|
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
|
||||||
|
|
||||||
|
typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
|
||||||
|
typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
|
||||||
|
typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
|
||||||
|
|
||||||
|
typedef PeriodicGaugeImpl<GimplAdjointTypesR> PeriodicGimplAdjR; // Real.. whichever prec
|
||||||
|
typedef PeriodicGaugeImpl<GimplAdjointTypesF> PeriodicGimplAdjF; // Float
|
||||||
|
typedef PeriodicGaugeImpl<GimplAdjointTypesD> PeriodicGimplAdjD; // Double
|
||||||
|
|
||||||
|
typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
|
||||||
|
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
|
||||||
|
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,212 +1,214 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h
|
Source file: ./lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H
|
#ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H
|
||||||
#define QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H
|
#define QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H
|
||||||
|
|
||||||
namespace Grid{
|
namespace Grid {
|
||||||
namespace QCD{
|
namespace QCD {
|
||||||
|
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
// One flavour rational
|
// One flavour rational
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
|
|
||||||
// S_f = chi^dag * N(Mpc^dag*Mpc)/D(Mpc^dag*Mpc) * chi
|
// S_f = chi^dag * N(Mpc^dag*Mpc)/D(Mpc^dag*Mpc) * chi
|
||||||
|
//
|
||||||
|
// Here, M is some operator
|
||||||
|
// N and D makeup the rat. poly
|
||||||
|
//
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
class OneFlavourEvenOddRationalPseudoFermionAction
|
||||||
|
: public Action<typename Impl::GaugeField> {
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
|
typedef OneFlavourRationalParams Params;
|
||||||
|
Params param;
|
||||||
|
|
||||||
|
MultiShiftFunction PowerHalf;
|
||||||
|
MultiShiftFunction PowerNegHalf;
|
||||||
|
MultiShiftFunction PowerQuarter;
|
||||||
|
MultiShiftFunction PowerNegQuarter;
|
||||||
|
|
||||||
|
private:
|
||||||
|
FermionOperator<Impl> &FermOp; // the basic operator
|
||||||
|
|
||||||
|
// NOT using "Nroots"; IroIro is -- perhaps later, but this wasn't good for us
|
||||||
|
// historically
|
||||||
|
// and hasenbusch works better
|
||||||
|
|
||||||
|
FermionField PhiEven; // the pseudo fermion field for this trajectory
|
||||||
|
FermionField PhiOdd; // the pseudo fermion field for this trajectory
|
||||||
|
|
||||||
|
public:
|
||||||
|
OneFlavourEvenOddRationalPseudoFermionAction(FermionOperator<Impl> &Op,
|
||||||
|
Params &p)
|
||||||
|
: FermOp(Op),
|
||||||
|
PhiEven(Op.FermionRedBlackGrid()),
|
||||||
|
PhiOdd(Op.FermionRedBlackGrid()),
|
||||||
|
param(p) {
|
||||||
|
AlgRemez remez(param.lo, param.hi, param.precision);
|
||||||
|
|
||||||
|
// MdagM^(+- 1/2)
|
||||||
|
std::cout << GridLogMessage << "Generating degree " << param.degree
|
||||||
|
<< " for x^(1/2)" << std::endl;
|
||||||
|
remez.generateApprox(param.degree, 1, 2);
|
||||||
|
PowerHalf.Init(remez, param.tolerance, false);
|
||||||
|
PowerNegHalf.Init(remez, param.tolerance, true);
|
||||||
|
|
||||||
|
// MdagM^(+- 1/4)
|
||||||
|
std::cout << GridLogMessage << "Generating degree " << param.degree
|
||||||
|
<< " for x^(1/4)" << std::endl;
|
||||||
|
remez.generateApprox(param.degree, 1, 4);
|
||||||
|
PowerQuarter.Init(remez, param.tolerance, false);
|
||||||
|
PowerNegQuarter.Init(remez, param.tolerance, true);
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) {
|
||||||
|
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
|
||||||
|
// = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}
|
||||||
|
// Phi = MpcdagMpc^{1/4} eta
|
||||||
//
|
//
|
||||||
// Here, M is some operator
|
// P(eta) = e^{- eta^dag eta}
|
||||||
// N and D makeup the rat. poly
|
|
||||||
//
|
//
|
||||||
|
// e^{x^2/2 sig^2} => sig^2 = 0.5.
|
||||||
template<class Impl>
|
//
|
||||||
class OneFlavourEvenOddRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
|
// So eta should be of width sig = 1/sqrt(2).
|
||||||
public:
|
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
|
||||||
|
|
||||||
typedef OneFlavourRationalParams Params;
|
RealD scale = std::sqrt(0.5);
|
||||||
Params param;
|
|
||||||
|
|
||||||
MultiShiftFunction PowerHalf ;
|
FermionField eta(FermOp.FermionGrid());
|
||||||
MultiShiftFunction PowerNegHalf;
|
FermionField etaOdd(FermOp.FermionRedBlackGrid());
|
||||||
MultiShiftFunction PowerQuarter;
|
FermionField etaEven(FermOp.FermionRedBlackGrid());
|
||||||
MultiShiftFunction PowerNegQuarter;
|
|
||||||
|
|
||||||
private:
|
gaussian(pRNG, eta);
|
||||||
|
eta = eta * scale;
|
||||||
FermionOperator<Impl> & FermOp;// the basic operator
|
|
||||||
|
|
||||||
// NOT using "Nroots"; IroIro is -- perhaps later, but this wasn't good for us historically
|
pickCheckerboard(Even, etaEven, eta);
|
||||||
// and hasenbusch works better
|
pickCheckerboard(Odd, etaOdd, eta);
|
||||||
|
|
||||||
FermionField PhiEven; // the pseudo fermion field for this trajectory
|
FermOp.ImportGauge(U);
|
||||||
FermionField PhiOdd; // the pseudo fermion field for this trajectory
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
// mutishift CG
|
||||||
|
SchurDifferentiableOperator<Impl> Mpc(FermOp);
|
||||||
|
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter, PowerQuarter);
|
||||||
|
msCG(Mpc, etaOdd, PhiOdd);
|
||||||
|
|
||||||
OneFlavourEvenOddRationalPseudoFermionAction(FermionOperator<Impl> &Op,
|
//////////////////////////////////////////////////////
|
||||||
Params & p ) : FermOp(Op),
|
// FIXME : Clover term not yet..
|
||||||
PhiEven(Op.FermionRedBlackGrid()),
|
//////////////////////////////////////////////////////
|
||||||
PhiOdd (Op.FermionRedBlackGrid()),
|
|
||||||
param(p)
|
|
||||||
{
|
|
||||||
AlgRemez remez(param.lo,param.hi,param.precision);
|
|
||||||
|
|
||||||
// MdagM^(+- 1/2)
|
assert(FermOp.ConstEE() == 1);
|
||||||
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl;
|
PhiEven = zero;
|
||||||
remez.generateApprox(param.degree,1,2);
|
};
|
||||||
PowerHalf.Init(remez,param.tolerance,false);
|
|
||||||
PowerNegHalf.Init(remez,param.tolerance,true);
|
|
||||||
|
|
||||||
// MdagM^(+- 1/4)
|
//////////////////////////////////////////////////////
|
||||||
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
|
// S = phi^dag (Mdag M)^-1/2 phi
|
||||||
remez.generateApprox(param.degree,1,4);
|
//////////////////////////////////////////////////////
|
||||||
PowerQuarter.Init(remez,param.tolerance,false);
|
virtual RealD S(const GaugeField &U) {
|
||||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
FermOp.ImportGauge(U);
|
||||||
};
|
|
||||||
|
|
||||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
|
||||||
|
|
||||||
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
|
FermionField Y(FermOp.FermionRedBlackGrid());
|
||||||
// = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}
|
|
||||||
// Phi = MpcdagMpc^{1/4} eta
|
|
||||||
//
|
|
||||||
// P(eta) = e^{- eta^dag eta}
|
|
||||||
//
|
|
||||||
// e^{x^2/2 sig^2} => sig^2 = 0.5.
|
|
||||||
//
|
|
||||||
// So eta should be of width sig = 1/sqrt(2).
|
|
||||||
|
|
||||||
RealD scale = std::sqrt(0.5);
|
SchurDifferentiableOperator<Impl> Mpc(FermOp);
|
||||||
|
|
||||||
FermionField eta (FermOp.FermionGrid());
|
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter,
|
||||||
FermionField etaOdd (FermOp.FermionRedBlackGrid());
|
PowerNegQuarter);
|
||||||
FermionField etaEven(FermOp.FermionRedBlackGrid());
|
|
||||||
|
|
||||||
gaussian(pRNG,eta); eta=eta*scale;
|
msCG(Mpc, PhiOdd, Y);
|
||||||
|
|
||||||
pickCheckerboard(Even,etaEven,eta);
|
RealD action = norm2(Y);
|
||||||
pickCheckerboard(Odd,etaOdd,eta);
|
std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 "
|
||||||
|
"solve or -1/2 solve faster??? "
|
||||||
|
<< action << std::endl;
|
||||||
|
|
||||||
FermOp.ImportGauge(U);
|
return action;
|
||||||
|
};
|
||||||
|
|
||||||
// mutishift CG
|
//////////////////////////////////////////////////////
|
||||||
SchurDifferentiableOperator<Impl> Mpc(FermOp);
|
// Need
|
||||||
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter,PowerQuarter);
|
// dS_f/dU = chi^dag d[N/D] chi
|
||||||
msCG(Mpc,etaOdd,PhiOdd);
|
//
|
||||||
|
// N/D is expressed as partial fraction expansion:
|
||||||
|
//
|
||||||
|
// a0 + \sum_k ak/(M^dagM + bk)
|
||||||
|
//
|
||||||
|
// d[N/D] is then
|
||||||
|
//
|
||||||
|
// \sum_k -ak [M^dagM+bk]^{-1} [ dM^dag M + M^dag dM ] [M^dag M +
|
||||||
|
// bk]^{-1}
|
||||||
|
//
|
||||||
|
// Need
|
||||||
|
// Mf Phi_k = [MdagM+bk]^{-1} Phi
|
||||||
|
// Mf Phi = \sum_k ak [MdagM+bk]^{-1} Phi
|
||||||
|
//
|
||||||
|
// With these building blocks
|
||||||
|
//
|
||||||
|
// dS/dU = \sum_k -ak Mf Phi_k^dag [ dM^dag M + M^dag dM ] Mf
|
||||||
|
// Phi_k
|
||||||
|
// S = innerprodReal(Phi,Mf Phi);
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
|
||||||
|
const int Npole = PowerNegHalf.poles.size();
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
std::vector<FermionField> MPhi_k(Npole, FermOp.FermionRedBlackGrid());
|
||||||
// FIXME : Clover term not yet..
|
|
||||||
//////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
assert(FermOp.ConstEE() == 1);
|
FermionField X(FermOp.FermionRedBlackGrid());
|
||||||
PhiEven = zero;
|
FermionField Y(FermOp.FermionRedBlackGrid());
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
GaugeField tmp(FermOp.GaugeGrid());
|
||||||
// S = phi^dag (Mdag M)^-1/2 phi
|
|
||||||
//////////////////////////////////////////////////////
|
|
||||||
virtual RealD S(const GaugeField &U) {
|
|
||||||
|
|
||||||
FermOp.ImportGauge(U);
|
FermOp.ImportGauge(U);
|
||||||
|
|
||||||
FermionField Y(FermOp.FermionRedBlackGrid());
|
SchurDifferentiableOperator<Impl> Mpc(FermOp);
|
||||||
|
|
||||||
SchurDifferentiableOperator<Impl> Mpc(FermOp);
|
|
||||||
|
|
||||||
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter,PowerNegQuarter);
|
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter, PowerNegHalf);
|
||||||
|
|
||||||
msCG(Mpc,PhiOdd,Y);
|
msCG(Mpc, PhiOdd, MPhi_k);
|
||||||
|
|
||||||
RealD action = norm2(Y);
|
dSdU = zero;
|
||||||
std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 solve or -1/2 solve faster??? "<<action<<std::endl;
|
for (int k = 0; k < Npole; k++) {
|
||||||
|
RealD ak = PowerNegHalf.residues[k];
|
||||||
|
|
||||||
return action;
|
X = MPhi_k[k];
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
Mpc.Mpc(X, Y);
|
||||||
// Need
|
Mpc.MpcDeriv(tmp, Y, X);
|
||||||
// dS_f/dU = chi^dag d[N/D] chi
|
dSdU = dSdU + ak * tmp;
|
||||||
//
|
Mpc.MpcDagDeriv(tmp, X, Y);
|
||||||
// N/D is expressed as partial fraction expansion:
|
dSdU = dSdU + ak * tmp;
|
||||||
//
|
}
|
||||||
// a0 + \sum_k ak/(M^dagM + bk)
|
|
||||||
//
|
|
||||||
// d[N/D] is then
|
|
||||||
//
|
|
||||||
// \sum_k -ak [M^dagM+bk]^{-1} [ dM^dag M + M^dag dM ] [M^dag M + bk]^{-1}
|
|
||||||
//
|
|
||||||
// Need
|
|
||||||
// Mf Phi_k = [MdagM+bk]^{-1} Phi
|
|
||||||
// Mf Phi = \sum_k ak [MdagM+bk]^{-1} Phi
|
|
||||||
//
|
|
||||||
// With these building blocks
|
|
||||||
//
|
|
||||||
// dS/dU = \sum_k -ak Mf Phi_k^dag [ dM^dag M + M^dag dM ] Mf Phi_k
|
|
||||||
// S = innerprodReal(Phi,Mf Phi);
|
|
||||||
//////////////////////////////////////////////////////
|
|
||||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
|
|
||||||
|
|
||||||
const int Npole = PowerNegHalf.poles.size();
|
// dSdU = Ta(dSdU);
|
||||||
|
};
|
||||||
std::vector<FermionField> MPhi_k (Npole,FermOp.FermionRedBlackGrid());
|
};
|
||||||
|
}
|
||||||
FermionField X(FermOp.FermionRedBlackGrid());
|
|
||||||
FermionField Y(FermOp.FermionRedBlackGrid());
|
|
||||||
|
|
||||||
GaugeField tmp(FermOp.GaugeGrid());
|
|
||||||
|
|
||||||
FermOp.ImportGauge(U);
|
|
||||||
|
|
||||||
SchurDifferentiableOperator<Impl> Mpc(FermOp);
|
|
||||||
|
|
||||||
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter,PowerNegHalf);
|
|
||||||
|
|
||||||
msCG(Mpc,PhiOdd,MPhi_k);
|
|
||||||
|
|
||||||
dSdU = zero;
|
|
||||||
for(int k=0;k<Npole;k++){
|
|
||||||
|
|
||||||
RealD ak = PowerNegHalf.residues[k];
|
|
||||||
|
|
||||||
X = MPhi_k[k];
|
|
||||||
|
|
||||||
Mpc.Mpc(X,Y);
|
|
||||||
Mpc.MpcDeriv (tmp , Y, X ); dSdU=dSdU+ak*tmp;
|
|
||||||
Mpc.MpcDagDeriv(tmp , X, Y ); dSdU=dSdU+ak*tmp;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
dSdU = Ta(dSdU);
|
|
||||||
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user