mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Compare commits
2 Commits
FgridStagg
...
feature/mi
Author | SHA1 | Date | |
---|---|---|---|
1e3fb32572 | |||
0d5af667d8 |
8
.gitignore
vendored
8
.gitignore
vendored
@ -92,7 +92,6 @@ build*/*
|
||||
#####################
|
||||
*.xcodeproj/*
|
||||
build.sh
|
||||
.vscode
|
||||
|
||||
# Eigen source #
|
||||
################
|
||||
@ -107,10 +106,6 @@ lib/fftw/*
|
||||
m4/lt*
|
||||
m4/libtool.m4
|
||||
|
||||
# github pages #
|
||||
################
|
||||
gh-pages/
|
||||
|
||||
# Buck files #
|
||||
##############
|
||||
.buck*
|
||||
@ -121,5 +116,4 @@ make-bin-BUCK.sh
|
||||
# generated sources #
|
||||
#####################
|
||||
lib/qcd/spin/gamma-gen/*.h
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
||||
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
76
.travis.yml
76
.travis.yml
@ -7,8 +7,64 @@ cache:
|
||||
matrix:
|
||||
include:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
osx_image: xcode7.2
|
||||
compiler: clang
|
||||
- compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.9
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-4.9
|
||||
- compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-5
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-5
|
||||
- compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
- compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@ -17,15 +73,13 @@ before_install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
||||
|
||||
install:
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
- which autoconf
|
||||
- autoconf --version
|
||||
- which automake
|
||||
- automake --version
|
||||
- which $CC
|
||||
- $CC --version
|
||||
- which $CXX
|
||||
@ -38,9 +92,15 @@ script:
|
||||
- cd build
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- ./benchmarks/Benchmark_dwf --threads 1
|
||||
- echo make clean
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
- ./benchmarks/Benchmark_dwf --threads 1
|
||||
- echo make clean
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
|
||||
- make -j4
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||
|
||||
|
11
Makefile.am
11
Makefile.am
@ -3,15 +3,10 @@ SUBDIRS = lib benchmarks tests extras
|
||||
|
||||
include $(top_srcdir)/doxygen.inc
|
||||
|
||||
bin_SCRIPTS=grid-config
|
||||
tests: all
|
||||
$(MAKE) -C tests tests
|
||||
|
||||
|
||||
.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
|
||||
|
||||
tests-local: all
|
||||
bench-local: all
|
||||
check-local: all
|
||||
.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
|
||||
|
||||
AM_CXXFLAGS += -I$(top_builddir)/include
|
||||
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
302
README.md
302
README.md
@ -1,13 +1,41 @@
|
||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid)
|
||||
# Grid
|
||||
<table>
|
||||
<tr>
|
||||
<td>Last stable release</td>
|
||||
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Development branch</td>
|
||||
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
**Data parallel C++ mathematical object library.**
|
||||
|
||||
License: GPL v2.
|
||||
|
||||
Last update June 2017.
|
||||
Last update Nov 2016.
|
||||
|
||||
_Please do not send pull requests to the `master` branch which is reserved for releases._
|
||||
|
||||
### Bug report
|
||||
|
||||
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
|
||||
|
||||
When you file an issue, please go though the following checklist:
|
||||
|
||||
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
|
||||
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
|
||||
3. Give the exact `configure` command used.
|
||||
4. Attach `config.log`.
|
||||
5. Attach `config.summary`.
|
||||
6. Attach the output of `make V=1`.
|
||||
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
|
||||
|
||||
|
||||
|
||||
### Description
|
||||
@ -30,68 +58,13 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
||||
for most programmers.
|
||||
|
||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||
Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported.
|
||||
Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way).
|
||||
|
||||
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.
|
||||
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
|
||||
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
|
||||
|
||||
MPI, OpenMP, and SIMD parallelism are present in the library.
|
||||
Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail.
|
||||
|
||||
|
||||
### Compilers
|
||||
|
||||
Intel ICPC v16.0.3 and later
|
||||
|
||||
Clang v3.5 and later (need 3.8 and later for OpenMP)
|
||||
|
||||
GCC v4.9.x (recommended)
|
||||
|
||||
GCC v6.3 and later
|
||||
|
||||
### Important:
|
||||
|
||||
Some versions of GCC appear to have a bug under high optimisation (-O2, -O3).
|
||||
|
||||
The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates.
|
||||
|
||||
GCC v5.x
|
||||
|
||||
GCC v6.1, v6.2
|
||||
|
||||
### Bug report
|
||||
|
||||
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
|
||||
|
||||
When you file an issue, please go though the following checklist:
|
||||
|
||||
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
|
||||
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
|
||||
3. Give the exact `configure` command used.
|
||||
4. Attach `config.log`.
|
||||
5. Attach `grid.config.summary`.
|
||||
6. Attach the output of `make V=1`.
|
||||
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
|
||||
|
||||
### Required libraries
|
||||
Grid requires:
|
||||
|
||||
[GMP](https://gmplib.org/),
|
||||
|
||||
[MPFR](http://www.mpfr.org/)
|
||||
|
||||
Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library.
|
||||
|
||||
Grid optionally uses:
|
||||
|
||||
[HDF5](https://support.hdfgroup.org/HDF5/)
|
||||
|
||||
[LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.
|
||||
|
||||
[FFTW](http://www.fftw.org) either generic version or via the Intel MKL library.
|
||||
|
||||
LAPACK either generic version or Intel MKL library.
|
||||
|
||||
Please see https://arxiv.org/abs/1512.03487 for more detail.
|
||||
|
||||
### Quick start
|
||||
First, start by cloning the repository:
|
||||
@ -122,10 +95,10 @@ install Grid. Other options are detailed in the next section, you can also use `
|
||||
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
||||
customise the build.
|
||||
|
||||
Finally, you can build, check, and install Grid:
|
||||
Finally, you can build and install Grid:
|
||||
|
||||
``` bash
|
||||
make; make check; make install
|
||||
make; make install
|
||||
```
|
||||
|
||||
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
|
||||
@ -148,7 +121,7 @@ If you want to build all the tests at once just use `make tests`.
|
||||
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
|
||||
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
||||
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
|
||||
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
|
||||
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||
- `--enable-chroma`: enable Chroma regression tests.
|
||||
- `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`)
|
||||
@ -162,6 +135,7 @@ The following options can be use with the `--enable-comms=` option to target dif
|
||||
| `none` | no communications |
|
||||
| `mpi[-auto]` | MPI communications |
|
||||
| `mpi3[-auto]` | MPI communications using MPI 3 shared memory |
|
||||
| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model |
|
||||
| `shmem ` | Cray SHMEM communications |
|
||||
|
||||
For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.
|
||||
@ -179,13 +153,13 @@ The following options can be use with the `--enable-simd=` option to target diff
|
||||
| `AVXFMA4` | AVX (256 bit) + FMA4 |
|
||||
| `AVX2` | AVX 2 (256 bit) |
|
||||
| `AVX512` | AVX 512 bit |
|
||||
| `NEONv8` | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit) |
|
||||
| `QPX` | IBM QPX (256 bit) |
|
||||
| `QPX` | QPX (256 bit) |
|
||||
|
||||
Alternatively, some CPU codenames can be directly used:
|
||||
|
||||
| `<code>` | Description |
|
||||
| ----------- | -------------------------------------- |
|
||||
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
|
||||
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
|
||||
| `BGQ` | Blue Gene/Q |
|
||||
|
||||
@ -202,205 +176,21 @@ The following configuration is recommended for the Intel Knights Landing platfor
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-comms=mpi-auto \
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
```
|
||||
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
--enable-comms=mpi \
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
```
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
Knight's Landing with Intel Omnipath adapters with two adapters per node
|
||||
presently performs better with use of more than one rank per node, using shared memory
|
||||
for interior communication. This is the mpi3 communications implementation.
|
||||
We recommend four ranks per node for best performance, but optimum is local volume dependent.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=KNL \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CC=icpc MPICXX=mpiicpc
|
||||
```
|
||||
|
||||
### Build setup for Intel Haswell Xeon platform
|
||||
|
||||
The following configuration is recommended for the Intel Haswell platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
--enable-comms=mpi3-auto \
|
||||
--enable-mkl \
|
||||
CXX=icpc MPICXX=mpiicpc
|
||||
```
|
||||
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
```
|
||||
Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
|
||||
one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
|
||||
```
|
||||
export I_MPI_PIN=1
|
||||
```
|
||||
This is the default.
|
||||
|
||||
### Build setup for Intel Skylake Xeon platform
|
||||
|
||||
The following configuration is recommended for the Intel Skylake platform:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=mpiicpc
|
||||
```
|
||||
The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX512 \
|
||||
--enable-comms=mpi3 \
|
||||
--enable-mkl \
|
||||
CXX=CC CC=cc
|
||||
```
|
||||
Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
|
||||
one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
|
||||
```
|
||||
export I_MPI_PIN=1
|
||||
```
|
||||
This is the default.
|
||||
|
||||
#### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):
|
||||
|
||||
mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18
|
||||
|
||||
TBA
|
||||
|
||||
|
||||
### Build setup for AMD EPYC / RYZEN
|
||||
|
||||
The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores.
|
||||
So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total
|
||||
are common. Each chip within the module exposes a separate NUMA domain.
|
||||
There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain.
|
||||
MPI-3 is recommended with the use of four ranks per socket,
|
||||
and 8 threads per rank.
|
||||
|
||||
The following configuration is recommended for the AMD EPYC platform.
|
||||
|
||||
``` bash
|
||||
../configure --enable-precision=double\
|
||||
--enable-simd=AVX2 \
|
||||
--enable-comms=mpi3 \
|
||||
CXX=mpicxx
|
||||
```
|
||||
|
||||
If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
|
||||
``` bash
|
||||
--with-gmp=<path> \
|
||||
--with-mpfr=<path> \
|
||||
```
|
||||
where `<path>` is the UNIX prefix where GMP and MPFR are installed.
|
||||
|
||||
Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank.
|
||||
This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.
|
||||
|
||||
It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and
|
||||
shared memory to communicate within this node:
|
||||
|
||||
mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4
|
||||
|
||||
Where omp_bind.sh does the following:
|
||||
```
|
||||
#!/bin/bash
|
||||
|
||||
numanode=` expr $PMI_RANK % 8 `
|
||||
basecore=`expr $numanode \* 16`
|
||||
core0=`expr $basecore + 0 `
|
||||
core1=`expr $basecore + 2 `
|
||||
core2=`expr $basecore + 4 `
|
||||
core3=`expr $basecore + 6 `
|
||||
core4=`expr $basecore + 8 `
|
||||
core5=`expr $basecore + 10 `
|
||||
core6=`expr $basecore + 12 `
|
||||
core7=`expr $basecore + 14 `
|
||||
|
||||
export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7"
|
||||
echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY
|
||||
|
||||
$@
|
||||
```
|
||||
|
||||
Performance:
|
||||
|
||||
#### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):
|
||||
|
||||
mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4
|
||||
|
||||
TBA
|
||||
|
||||
### Build setup for BlueGene/Q
|
||||
|
||||
To be written...
|
||||
|
||||
### Build setup for ARM Neon
|
||||
|
||||
To be written...
|
||||
|
||||
### Build setup for laptops, other compilers, non-cluster builds
|
||||
|
||||
Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX),
|
||||
and omit the enable-mkl flag.
|
||||
|
||||
Single node builds are enabled with
|
||||
```
|
||||
--enable-comms=none
|
||||
```
|
||||
|
||||
FFTW support that is not in the default search path may then enabled with
|
||||
```
|
||||
--with-fftw=<installpath>
|
||||
```
|
||||
|
||||
BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation.
|
||||
|
||||
```
|
70
TODO
70
TODO
@ -1,35 +1,6 @@
|
||||
TODO:
|
||||
---------------
|
||||
|
||||
Large item work list:
|
||||
|
||||
1)- BG/Q port and check ; Andrew says ok.
|
||||
2)- Christoph's local basis expansion Lanczos
|
||||
--
|
||||
3a)- RNG I/O in ILDG/SciDAC (minor)
|
||||
3b)- Precision conversion and sort out localConvert <-- partial/easy
|
||||
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
|
||||
4)- Physical propagator interface
|
||||
5)- Conserved currents
|
||||
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
|
||||
7)- HDCR resume
|
||||
|
||||
Recent DONE
|
||||
-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori
|
||||
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
|
||||
-- GaugeFix into central location <-- DONE
|
||||
-- Scidac and Ildg metadata handling <-- DONE
|
||||
-- Binary I/O MPI2 IO <-- DONE
|
||||
-- Binary I/O speed up & x-strips <-- DONE
|
||||
-- Cut down the exterior overhead <-- DONE
|
||||
-- Interior legs from SHM comms <-- DONE
|
||||
-- Half-precision comms <-- DONE
|
||||
-- Merge high precision reduction into develop <-- DONE
|
||||
-- BlockCG, BCGrQ <-- DONE
|
||||
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
|
||||
-- slice* linalg routines for multiRHS, BlockCG
|
||||
|
||||
-----
|
||||
* Forces; the UdSdU term in gauge force term is half of what I think it should
|
||||
be. This is a consequence of taking ONLY the first term in:
|
||||
|
||||
@ -50,8 +21,16 @@ Recent DONE
|
||||
This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
|
||||
This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
|
||||
|
||||
|
||||
Policies:
|
||||
|
||||
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
|
||||
|
||||
* Support different boundary conditions (finite temp, chem. potential ... )
|
||||
|
||||
* Support different fermion representations?
|
||||
- contained entirely within the integrator presently
|
||||
|
||||
- Sign of force term.
|
||||
|
||||
- Reversibility test.
|
||||
@ -62,6 +41,11 @@ Recent DONE
|
||||
|
||||
- Audit oIndex usage for cb behaviour
|
||||
|
||||
- Rectangle gauge actions.
|
||||
Iwasaki,
|
||||
Symanzik,
|
||||
... etc...
|
||||
|
||||
- Prepare multigrid for HMC. - Alternate setup schemes.
|
||||
|
||||
- Support for ILDG --- ugly, not done
|
||||
@ -71,11 +55,9 @@ Recent DONE
|
||||
- FFTnD ?
|
||||
|
||||
- Gparity; hand opt use template specialisation elegance to enable the optimised paths ?
|
||||
|
||||
- Gparity force term; Gparity (R)HMC.
|
||||
|
||||
- Random number state save restore
|
||||
- Mobius implementation clean up to rmove #if 0 stale code sequences
|
||||
|
||||
- CG -- profile carefully, kernel fusion, whole CG performance measurements.
|
||||
|
||||
================================================================
|
||||
@ -108,7 +90,6 @@ Insert/Extract
|
||||
Not sure of status of this -- reverify. Things are working nicely now though.
|
||||
|
||||
* Make the Tensor types and Complex etc... play more nicely.
|
||||
|
||||
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||
want to introduce a syntax that does not require this.
|
||||
@ -131,8 +112,6 @@ Not sure of status of this -- reverify. Things are working nicely now though.
|
||||
RECENT
|
||||
---------------
|
||||
|
||||
- Support different fermion representations? -- DONE
|
||||
- contained entirely within the integrator presently
|
||||
- Clean up HMC -- DONE
|
||||
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
|
||||
- Simplified the integrators a bit. -- DONE
|
||||
@ -144,26 +123,6 @@ RECENT
|
||||
- Parallel io improvements -- DONE
|
||||
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
|
||||
|
||||
|
||||
DONE:
|
||||
- MultiArray -- MultiRHS done
|
||||
- ConjugateGradientMultiShift -- DONE
|
||||
- MCR -- DONE
|
||||
- Remez -- Mike or Boost? -- DONE
|
||||
- Proto (ET) -- DONE
|
||||
- uBlas -- DONE ; Eigen
|
||||
- Potentially Useful Boost libraries -- DONE ; Eigen
|
||||
- Aligned allocator; memory pool -- DONE
|
||||
- Multiprecision -- DONE
|
||||
- Serialization -- DONE
|
||||
- Regex -- Not needed
|
||||
- Tokenize -- Why?
|
||||
|
||||
- Random number state save restore -- DONE
|
||||
- Rectangle gauge actions. -- DONE
|
||||
Iwasaki,
|
||||
Symanzik,
|
||||
... etc...
|
||||
Done: Cayley, Partial , ContFrac force terms.
|
||||
|
||||
DONE
|
||||
@ -248,7 +207,6 @@ Done
|
||||
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
||||
======================================================================================================
|
||||
|
||||
* Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE
|
||||
* Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE
|
||||
user pass these? Is this a QCD specific?
|
||||
|
||||
|
9
VERSION
9
VERSION
@ -1,5 +1,6 @@
|
||||
Version : 0.7.0
|
||||
Version : 0.6.0
|
||||
|
||||
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
|
||||
- MPI and MPI3 comms optimisations for KNL and OPA finished
|
||||
- Half precision comms
|
||||
- AVX512, AVX2, AVX, SSE good
|
||||
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
|
||||
- MPI and MPI3
|
||||
- HiRep, Smearing, Generic gauge group
|
||||
|
@ -1,800 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_memory_bandwidth.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
|
||||
|
||||
|
||||
std::vector<int> L_list;
|
||||
std::vector<int> Ls_list;
|
||||
std::vector<double> mflop_list;
|
||||
|
||||
double mflop_ref;
|
||||
double mflop_ref_err;
|
||||
|
||||
int NN_global;
|
||||
|
||||
struct time_statistics{
|
||||
double mean;
|
||||
double err;
|
||||
double min;
|
||||
double max;
|
||||
|
||||
void statistics(std::vector<double> v){
|
||||
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
||||
mean = sum / v.size();
|
||||
|
||||
std::vector<double> diff(v.size());
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
|
||||
|
||||
auto result = std::minmax_element(v.begin(), v.end());
|
||||
min = *result.first;
|
||||
max = *result.second;
|
||||
}
|
||||
};
|
||||
|
||||
void comms_header(){
|
||||
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
||||
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
|
||||
};
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
struct controls {
|
||||
int Opt;
|
||||
int CommsOverlap;
|
||||
Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
|
||||
// int HugePages;
|
||||
};
|
||||
|
||||
class Benchmark {
|
||||
public:
|
||||
static void Decomposition (void ) {
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
|
||||
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvReal : "<<sizeof(vReal )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvComplex : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
static void Comms(void)
|
||||
{
|
||||
int Nloop=200;
|
||||
int nmu=0;
|
||||
int maxlat=32;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||
|
||||
std::vector<double> t_time(Nloop);
|
||||
time_statistics timestat;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
comms_header();
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
lat*mpi_layout[2],
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
int ncomm;
|
||||
double dbytes;
|
||||
std::vector<double> times(Nloop);
|
||||
for(int i=0;i<Nloop;i++){
|
||||
|
||||
double start=usecond();
|
||||
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
|
||||
parallel_for(int dir=0;dir<8;dir++){
|
||||
|
||||
double tbytes;
|
||||
int mu =dir % 4;
|
||||
|
||||
if (mpi_layout[mu]>1 ) {
|
||||
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
if ( dir == mu ) {
|
||||
int comm_proc=1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
} else {
|
||||
int comm_proc = mpi_layout[mu]-1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
}
|
||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank,
|
||||
bytes,dir);
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp atomic
|
||||
#endif
|
||||
ncomm++;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp atomic
|
||||
#endif
|
||||
dbytes+=tbytes;
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
}
|
||||
|
||||
timestat.statistics(t_time);
|
||||
// for(int i=0;i<t_time.size();i++){
|
||||
// std::cout << i<<" "<<t_time[i]<<std::endl;
|
||||
// }
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void Memory(void)
|
||||
{
|
||||
const int Nvec=8;
|
||||
typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
|
||||
typedef iVector<vReal,Nvec> Vec;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
uint64_t NP;
|
||||
uint64_t NN;
|
||||
|
||||
|
||||
uint64_t lmax=48;
|
||||
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
|
||||
|
||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
for(int lat=8;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
NP= Grid.RankCount();
|
||||
NN =Grid.NodeCount();
|
||||
|
||||
Vec rn ; random(sRNG,rn);
|
||||
|
||||
LatticeVec z(&Grid); z=rn;
|
||||
LatticeVec x(&Grid); x=rn;
|
||||
LatticeVec y(&Grid); y=rn;
|
||||
double a=2.0;
|
||||
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=a*x-y;
|
||||
x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away
|
||||
y._odata[4]=z._odata[4];
|
||||
}
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3.0*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3)
|
||||
<< lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.
|
||||
<< "\t\t"<< bytes/time/NN <<std::endl;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
static double DWF5(int Ls,int L)
|
||||
{
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
||||
double mflops;
|
||||
double mflops_best = 0;
|
||||
double mflops_worst= 0;
|
||||
std::vector<double> mflops_all;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Set/Get the layout & grid size
|
||||
///////////////////////////////////////////////////////
|
||||
int threads = GridThread::GetThreads();
|
||||
std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
||||
std::vector<int> local({L,L,L,L});
|
||||
|
||||
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
uint64_t NP = TmpGrid->RankCount();
|
||||
uint64_t NN = TmpGrid->NodeCount();
|
||||
NN_global=NN;
|
||||
uint64_t SHM=NP/NN;
|
||||
|
||||
std::vector<int> internal;
|
||||
if ( SHM == 1 ) internal = std::vector<int>({1,1,1,1});
|
||||
else if ( SHM == 2 ) internal = std::vector<int>({2,1,1,1});
|
||||
else if ( SHM == 4 ) internal = std::vector<int>({2,2,1,1});
|
||||
else if ( SHM == 8 ) internal = std::vector<int>({2,2,2,1});
|
||||
else assert(0);
|
||||
|
||||
std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
|
||||
std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
|
||||
|
||||
///////// Welcome message ////////////
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
||||
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
///////// Lattice Init ////////////
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
||||
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||
|
||||
///////// RNG Init ////////////
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
///////// Source preparation ////////////
|
||||
LatticeFermion src (sFGrid); random(RNG5,src);
|
||||
LatticeFermion tmp (sFGrid);
|
||||
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||
LatticeFermion src_e (sFrbGrid);
|
||||
LatticeFermion src_o (sFrbGrid);
|
||||
LatticeFermion r_e (sFrbGrid);
|
||||
LatticeFermion r_o (sFrbGrid);
|
||||
LatticeFermion r_eo (sFGrid);
|
||||
LatticeFermion err (sFGrid);
|
||||
{
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
#if defined(AVX512)
|
||||
const int num_cases = 6;
|
||||
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
|
||||
#else
|
||||
const int num_cases = 4;
|
||||
std::string fmt("U/S ; U/O ; G/S ; G/O ");
|
||||
#endif
|
||||
controls Cases [] = {
|
||||
#ifdef AVX512
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
#endif
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
||||
};
|
||||
|
||||
for(int c=0;c<num_cases;c++) {
|
||||
|
||||
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
int nwarm = 100;
|
||||
uint64_t ncall = 1000;
|
||||
|
||||
double t0=usecond();
|
||||
sFGrid->Barrier();
|
||||
for(int i=0;i<nwarm;i++){
|
||||
sDw.DhopEO(src_o,r_e,DaggerNo);
|
||||
}
|
||||
sFGrid->Barrier();
|
||||
double t1=usecond();
|
||||
|
||||
sDw.ZeroCounters();
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
for(uint64_t i=0;i<ncall;i++){
|
||||
t0=usecond();
|
||||
sDw.DhopEO(src_o,r_e,DaggerNo);
|
||||
t1=usecond();
|
||||
t_time[i] = t1-t0;
|
||||
}
|
||||
sFGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=(1344.0*volume)/2;
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
mf_hi = flops/timestat.min;
|
||||
mf_lo = flops/timestat.max;
|
||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||
|
||||
mflops = flops/timestat.mean;
|
||||
mflops_all.push_back(mflops);
|
||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node "<< mflops/NN<<std::endl;
|
||||
|
||||
sDw.Report();
|
||||
|
||||
}
|
||||
double robust = mflops_worst/mflops_best;;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
|
||||
std::cout<<GridLogMessage <<fmt << std::endl;
|
||||
std::cout<<GridLogMessage;
|
||||
|
||||
for(int i=0;i<mflops_all.size();i++){
|
||||
std::cout<<mflops_all[i]/NN<<" ; " ;
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
return mflops_best;
|
||||
}
|
||||
|
||||
static double DWF(int Ls,int L, double & robust)
|
||||
{
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
||||
double mflops;
|
||||
double mflops_best = 0;
|
||||
double mflops_worst= 0;
|
||||
std::vector<double> mflops_all;
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Set/Get the layout & grid size
|
||||
///////////////////////////////////////////////////////
|
||||
int threads = GridThread::GetThreads();
|
||||
std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4);
|
||||
std::vector<int> local({L,L,L,L});
|
||||
|
||||
GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
uint64_t NP = TmpGrid->RankCount();
|
||||
uint64_t NN = TmpGrid->NodeCount();
|
||||
NN_global=NN;
|
||||
uint64_t SHM=NP/NN;
|
||||
|
||||
std::vector<int> internal;
|
||||
if ( SHM == 1 ) internal = std::vector<int>({1,1,1,1});
|
||||
else if ( SHM == 2 ) internal = std::vector<int>({2,1,1,1});
|
||||
else if ( SHM == 4 ) internal = std::vector<int>({2,2,1,1});
|
||||
else if ( SHM == 8 ) internal = std::vector<int>({2,2,2,1});
|
||||
else assert(0);
|
||||
|
||||
std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
|
||||
std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
|
||||
|
||||
///////// Welcome message ////////////
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
|
||||
std::cout<<GridLogMessage << "* MPI ranks : "<<GridCmdVectorIntToString(mpi)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Intranode : "<<GridCmdVectorIntToString(internal)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* nodes : "<<GridCmdVectorIntToString(nodes)<<std::endl;
|
||||
std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
|
||||
///////// Lattice Init ////////////
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
|
||||
///////// RNG Init ////////////
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
///////// Source preparation ////////////
|
||||
LatticeFermion src (FGrid); random(RNG5,src);
|
||||
LatticeFermion ref (FGrid);
|
||||
LatticeFermion tmp (FGrid);
|
||||
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
|
||||
|
||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
////////////////////////////////////
|
||||
// Naive wilson implementation
|
||||
////////////////////////////////////
|
||||
{
|
||||
LatticeGaugeField Umu5d(FGrid);
|
||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
||||
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
|
||||
}
|
||||
}
|
||||
ref = zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||
}
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu+1,-1);
|
||||
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
}
|
||||
|
||||
LatticeFermion src_e (FrbGrid);
|
||||
LatticeFermion src_o (FrbGrid);
|
||||
LatticeFermion r_e (FrbGrid);
|
||||
LatticeFermion r_o (FrbGrid);
|
||||
LatticeFermion r_eo (FGrid);
|
||||
LatticeFermion err (FGrid);
|
||||
{
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
#if defined(AVX512)
|
||||
const int num_cases = 6;
|
||||
std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
|
||||
#else
|
||||
const int num_cases = 4;
|
||||
std::string fmt("U/S ; U/O ; G/S ; G/O ");
|
||||
#endif
|
||||
controls Cases [] = {
|
||||
#ifdef AVX512
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
#endif
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
|
||||
{ QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
|
||||
};
|
||||
|
||||
for(int c=0;c<num_cases;c++) {
|
||||
|
||||
QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
|
||||
QCD::WilsonKernelsStatic::Opt = Cases[c].Opt;
|
||||
CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
int nwarm = 200;
|
||||
double t0=usecond();
|
||||
FGrid->Barrier();
|
||||
for(int i=0;i<nwarm;i++){
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
}
|
||||
FGrid->Barrier();
|
||||
double t1=usecond();
|
||||
// uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0);
|
||||
// if (ncall < 500) ncall = 500;
|
||||
uint64_t ncall = 1000;
|
||||
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
Dw.ZeroCounters();
|
||||
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
for(uint64_t i=0;i<ncall;i++){
|
||||
t0=usecond();
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
t1=usecond();
|
||||
t_time[i] = t1-t0;
|
||||
}
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=(1344.0*volume)/2;
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
mf_hi = flops/timestat.min;
|
||||
mf_lo = flops/timestat.max;
|
||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||
|
||||
mflops = flops/timestat.mean;
|
||||
mflops_all.push_back(mflops);
|
||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||
|
||||
Dw.Report();
|
||||
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||
setCheckerboard(r_eo,r_o);
|
||||
setCheckerboard(r_eo,r_e);
|
||||
err = r_eo-ref;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
assert((norm2(err)<1.0e-4));
|
||||
|
||||
}
|
||||
robust = mflops_worst/mflops_best;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best mflop/s = "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
|
||||
std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s = "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness = "<< robust <<std::endl;
|
||||
std::cout<<GridLogMessage <<fmt << std::endl;
|
||||
std::cout<<GridLogMessage ;
|
||||
|
||||
for(int i=0;i<mflops_all.size();i++){
|
||||
std::cout<<mflops_all[i]/NN<<" ; " ;
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
return mflops_best;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
||||
#ifdef KNL
|
||||
LebesgueOrder::Block = std::vector<int>({8,2,2,2});
|
||||
#else
|
||||
LebesgueOrder::Block = std::vector<int>({2,2,2,2});
|
||||
#endif
|
||||
Benchmark::Decomposition();
|
||||
|
||||
int do_memory=1;
|
||||
int do_comms =1;
|
||||
int do_su3 =0;
|
||||
int do_wilson=1;
|
||||
int do_dwf =1;
|
||||
|
||||
if ( do_su3 ) {
|
||||
// empty for now
|
||||
}
|
||||
#if 1
|
||||
int sel=2;
|
||||
std::vector<int> L_list({8,12,16,24});
|
||||
#else
|
||||
int sel=1;
|
||||
std::vector<int> L_list({8,12});
|
||||
#endif
|
||||
int selm1=sel-1;
|
||||
std::vector<double> robust_list;
|
||||
|
||||
std::vector<double> wilson;
|
||||
std::vector<double> dwf4;
|
||||
std::vector<double> dwf5;
|
||||
|
||||
if ( do_wilson ) {
|
||||
int Ls=1;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
double robust;
|
||||
wilson.push_back(Benchmark::DWF(1,L_list[l],robust));
|
||||
}
|
||||
}
|
||||
|
||||
int Ls=16;
|
||||
if ( do_dwf ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
double robust;
|
||||
double result = Benchmark::DWF(Ls,L_list[l],robust) ;
|
||||
dwf4.push_back(result);
|
||||
robust_list.push_back(robust);
|
||||
}
|
||||
}
|
||||
|
||||
if ( do_dwf ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
dwf5.push_back(Benchmark::DWF5(Ls,L_list[l]));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( do_dwf ) {
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
}
|
||||
|
||||
int NN=NN_global;
|
||||
if ( do_memory ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Memory benchmark " <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
Benchmark::Memory();
|
||||
}
|
||||
|
||||
if ( do_comms && (NN>1) ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Communications benchmark " <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
Benchmark::Comms();
|
||||
}
|
||||
|
||||
if ( do_dwf ) {
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4 \t\t DWF5 " <<std::endl;
|
||||
for(int l=0;l<L_list.size();l++){
|
||||
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " Comparison point result: " << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl;
|
||||
std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl;
|
||||
std::cout<<std::setprecision(3);
|
||||
std::cout<<GridLogMessage << " Comparison point robustness: " << robust_list[sel] <<std::endl;
|
||||
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -31,32 +31,6 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
struct time_statistics{
|
||||
double mean;
|
||||
double err;
|
||||
double min;
|
||||
double max;
|
||||
|
||||
void statistics(std::vector<double> v){
|
||||
double sum = std::accumulate(v.begin(), v.end(), 0.0);
|
||||
mean = sum / v.size();
|
||||
|
||||
std::vector<double> diff(v.size());
|
||||
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
|
||||
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
|
||||
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
|
||||
|
||||
auto result = std::minmax_element(v.begin(), v.end());
|
||||
min = *result.first;
|
||||
max = *result.second;
|
||||
}
|
||||
};
|
||||
|
||||
void header(){
|
||||
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
|
||||
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
@ -66,21 +40,17 @@ int main (int argc, char ** argv)
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
int Nloop=100;
|
||||
int Nloop=10;
|
||||
int nmu=0;
|
||||
int maxlat=32;
|
||||
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||
|
||||
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
|
||||
std::vector<double> t_time(Nloop);
|
||||
time_statistics timestat;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
header();
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
int maxlat=16;
|
||||
for(int lat=4;lat<=maxlat;lat+=2){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
@ -88,23 +58,15 @@ int main (int argc, char ** argv)
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
|
||||
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int mu=0;mu<8;mu++){
|
||||
xbuf[mu].resize(lat*lat*lat*Ls);
|
||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
|
||||
}
|
||||
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
|
||||
@ -117,6 +79,7 @@ int main (int argc, char ** argv)
|
||||
int comm_proc=1;
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.SendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
@ -139,24 +102,18 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
Grid.SendToRecvFromComplete(requests);
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
}
|
||||
double stop=usecond();
|
||||
|
||||
timestat.statistics(t_time);
|
||||
|
||||
double dbytes = bytes*ppn;
|
||||
double xbytes = dbytes*2.0*ncomm;
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
double time = stop-start; // microseconds
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -164,32 +121,25 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
header();
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=2){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
|
||||
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
|
||||
|
||||
for(int mu=0;mu<8;mu++){
|
||||
xbuf[mu].resize(lat*lat*lat*Ls);
|
||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
|
||||
ncomm=0;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
@ -228,37 +178,30 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
}
|
||||
|
||||
timestat.statistics(t_time);
|
||||
double stop=usecond();
|
||||
|
||||
double dbytes = bytes*ppn;
|
||||
double xbytes = dbytes*2.0*ncomm;
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
double time = stop-start;
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Nloop=100;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
header();
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
for(int lat=4;lat<=maxlat;lat+=2){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
@ -266,9 +209,6 @@ int main (int argc, char ** argv)
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
@ -276,115 +216,16 @@ int main (int argc, char ** argv)
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
|
||||
double dbytes;
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
|
||||
|
||||
if (mpi_layout[mu]>1 ) {
|
||||
|
||||
ncomm++;
|
||||
int comm_proc=1;
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes,mu);
|
||||
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes,mu+4);
|
||||
|
||||
}
|
||||
}
|
||||
Grid.StencilSendToRecvFromComplete(requests,0);
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
}
|
||||
|
||||
timestat.statistics(t_time);
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
header();
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
lat*mpi_layout[2],
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
Grid.ShmBufferFreeAll();
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
double dbytes;
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
dbytes=0;
|
||||
ncomm=0;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
|
||||
@ -396,64 +237,52 @@ int main (int argc, char ** argv)
|
||||
int recv_from_rank;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes,mu);
|
||||
Grid.StencilSendToRecvFromComplete(requests,mu);
|
||||
requests.resize(0);
|
||||
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
dbytes+=
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes,mu+4);
|
||||
Grid.StencilSendToRecvFromComplete(requests,mu+4);
|
||||
requests.resize(0);
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
}
|
||||
}
|
||||
Grid.StencilSendToRecvFromComplete(requests);
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
|
||||
}
|
||||
double stop=usecond();
|
||||
|
||||
timestat.statistics(t_time);
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
double time = stop-start; // microseconds
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Nloop=100;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
header();
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||
|
||||
for(int lat=4;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
for(int lat=4;lat<=maxlat;lat+=2){
|
||||
for(int Ls=1;Ls<=16;Ls*=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
@ -461,9 +290,6 @@ int main (int argc, char ** argv)
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||
@ -471,71 +297,65 @@ int main (int argc, char ** argv)
|
||||
for(int d=0;d<8;d++){
|
||||
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||
}
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
double dbytes;
|
||||
|
||||
double start=usecond();
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||
dbytes=0;
|
||||
|
||||
ncomm=0;
|
||||
|
||||
parallel_for(int dir=0;dir<8;dir++){
|
||||
|
||||
double tbytes;
|
||||
int mu =dir % 4;
|
||||
|
||||
for(int mu=0;mu<4;mu++){
|
||||
|
||||
if (mpi_layout[mu]>1 ) {
|
||||
|
||||
ncomm++;
|
||||
int comm_proc=1;
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
if ( dir == mu ) {
|
||||
int comm_proc=1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
} else {
|
||||
int comm_proc = mpi_layout[mu]-1;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
}
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
// Grid.StencilSendToRecvFromComplete(requests);
|
||||
// requests.resize(0);
|
||||
|
||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||
(void *)&rbuf[dir][0], recv_from_rank, bytes,dir);
|
||||
|
||||
#pragma omp atomic
|
||||
dbytes+=tbytes;
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.StencilSendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.StencilSendToRecvFromComplete(requests);
|
||||
requests.resize(0);
|
||||
|
||||
}
|
||||
}
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
|
||||
}
|
||||
double stop=usecond();
|
||||
|
||||
timestat.statistics(t_time);
|
||||
double dbytes = bytes;
|
||||
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double rbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
double time = stop-start; // microseconds
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -1,22 +1,28 @@
|
||||
/*************************************************************************************
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_dwf.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
@ -42,22 +48,16 @@ typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
|
||||
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::vector<int> latt4 = GridDefaultLatt();
|
||||
int Ls=16;
|
||||
for(int i=0;i<argc;i++)
|
||||
if(std::string(argv[i]) == "-Ls"){
|
||||
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||
}
|
||||
|
||||
|
||||
const int Ls=8;
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
@ -71,66 +71,35 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
|
||||
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
LatticeFermion src (FGrid); random(RNG5,src);
|
||||
#if 0
|
||||
src = zero;
|
||||
{
|
||||
std::vector<int> origin({0,0,0,latt4[2]-1,0});
|
||||
SpinColourVectorF tmp;
|
||||
tmp=zero;
|
||||
tmp()(0)(0)=Complex(-2.0,0.0);
|
||||
std::cout << " source site 0 " << tmp<<std::endl;
|
||||
pokeSite(tmp,src,origin);
|
||||
}
|
||||
#else
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
#endif
|
||||
|
||||
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
|
||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
#if 0
|
||||
Umu=1.0;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
LatticeColourMatrix ttmp(UGrid);
|
||||
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
// if (mu !=2 ) ttmp = 0;
|
||||
// ttmp = ttmp* pow(10.0,mu);
|
||||
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
|
||||
}
|
||||
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
|
||||
#endif
|
||||
random(RNG4,Umu);
|
||||
|
||||
////////////////////////////////////
|
||||
// Naive wilson implementation
|
||||
////////////////////////////////////
|
||||
// replicate across fifth dimension
|
||||
LatticeGaugeField Umu5d(FGrid);
|
||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
||||
|
||||
// replicate across fifth dimension
|
||||
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Naive wilson implementation
|
||||
////////////////////////////////////
|
||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
||||
}
|
||||
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
|
||||
|
||||
if (1)
|
||||
{
|
||||
@ -151,7 +120,8 @@ int main (int argc, char ** argv)
|
||||
RealD M5 =1.8;
|
||||
|
||||
RealD NP = UGrid->_Nprocessors;
|
||||
RealD NN = UGrid->NodeCount();
|
||||
|
||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||
@ -161,22 +131,15 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
|
||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
int ncall =500;
|
||||
int ncall =100;
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
Dw.ZeroCounters();
|
||||
Dw.Dhop(src,result,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
@ -190,55 +153,16 @@ int main (int argc, char ** argv)
|
||||
double flops=1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
/*
|
||||
if(( norm2(err)>1.0e-4) ) {
|
||||
std::cout << "RESULT\n " << result<<std::endl;
|
||||
std::cout << "REF \n " << ref <<std::endl;
|
||||
std::cout << "ERR \n " << err <<std::endl;
|
||||
FGrid->Barrier();
|
||||
exit(-1);
|
||||
}
|
||||
*/
|
||||
assert (norm2(err)< 1.0e-4 );
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
DwH.ZeroCounters();
|
||||
DwH.Dhop(src,result,0);
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
DwH.Dhop(src,result,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
assert (norm2(err)< 1.0e-3 );
|
||||
DwH.Report();
|
||||
}
|
||||
|
||||
if (1)
|
||||
{
|
||||
|
||||
@ -247,10 +171,6 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
@ -262,13 +182,21 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion sresult(sFGrid);
|
||||
|
||||
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||
|
||||
localConvert(src,ssrc);
|
||||
|
||||
for(int x=0;x<latt4[0];x++){
|
||||
for(int y=0;y<latt4[1];y++){
|
||||
for(int z=0;z<latt4[2];z++){
|
||||
for(int t=0;t<latt4[3];t++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
std::vector<int> site({s,x,y,z,t});
|
||||
SpinColourVector tmp;
|
||||
peekSite(tmp,src,site);
|
||||
pokeSite(tmp,ssrc,site);
|
||||
}}}}}
|
||||
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
||||
FGrid->Barrier();
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
sDw.ZeroCounters();
|
||||
double t0=usecond();
|
||||
sDw.ZeroCounters();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
@ -282,53 +210,46 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||
sDw.Report();
|
||||
|
||||
if(0){
|
||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
PerformanceCounter Counter(i);
|
||||
Counter.Start();
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
Counter.Stop();
|
||||
Counter.Report();
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||
|
||||
RealD sum=0;
|
||||
for(int x=0;x<latt4[0];x++){
|
||||
for(int y=0;y<latt4[1];y++){
|
||||
for(int z=0;z<latt4[2];z++){
|
||||
for(int t=0;t<latt4[3];t++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
std::vector<int> site({s,x,y,z,t});
|
||||
SpinColourVector normal, simd;
|
||||
peekSite(normal,result,site);
|
||||
peekSite(simd,sresult,site);
|
||||
sum=sum+norm2(normal-simd);
|
||||
if (norm2(normal-simd) > 1.0e-6 ) {
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||
}
|
||||
}}}}}
|
||||
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||
assert (sum< 1.0e-4 );
|
||||
|
||||
err=zero;
|
||||
localConvert(sresult,err);
|
||||
err = err - ref;
|
||||
sum = norm2(err);
|
||||
std::cout<<GridLogMessage<<" difference between normal ref and simd is "<<sum<<std::endl;
|
||||
if(sum > 1.0e-4 ){
|
||||
std::cout<< "sD REF\n " <<ref << std::endl;
|
||||
std::cout<< "sD ERR \n " <<err <<std::endl;
|
||||
}
|
||||
// assert(sum < 1.0e-4);
|
||||
|
||||
err=zero;
|
||||
localConvert(sresult,err);
|
||||
err = err - result;
|
||||
sum = norm2(err);
|
||||
std::cout<<GridLogMessage<<" difference between normal result and simd is "<<sum<<std::endl;
|
||||
if(sum > 1.0e-4 ){
|
||||
std::cout<< "sD REF\n " <<result << std::endl;
|
||||
std::cout<< "sD ERR \n " << err <<std::endl;
|
||||
}
|
||||
assert(sum < 1.0e-4);
|
||||
|
||||
|
||||
if(1){
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
|
||||
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
|
||||
std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm )
|
||||
std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
if (1) {
|
||||
|
||||
LatticeFermion sr_eo(sFGrid);
|
||||
|
||||
LatticeFermion ssrc_e (sFrbGrid);
|
||||
LatticeFermion ssrc_o (sFrbGrid);
|
||||
LatticeFermion sr_e (sFrbGrid);
|
||||
@ -336,30 +257,39 @@ int main (int argc, char ** argv)
|
||||
|
||||
pickCheckerboard(Even,ssrc_e,ssrc);
|
||||
pickCheckerboard(Odd,ssrc_o,ssrc);
|
||||
// setCheckerboard(sr_eo,ssrc_o);
|
||||
// setCheckerboard(sr_eo,ssrc_e);
|
||||
|
||||
setCheckerboard(sr_eo,ssrc_o);
|
||||
setCheckerboard(sr_eo,ssrc_e);
|
||||
|
||||
sr_e = zero;
|
||||
sr_o = zero;
|
||||
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
|
||||
FGrid->Barrier();
|
||||
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
|
||||
sDw.ZeroCounters();
|
||||
// sDw.stat.init("DhopEO");
|
||||
sDw.stat.init("DhopEO");
|
||||
double t0=usecond();
|
||||
for (int i = 0; i < ncall; i++) {
|
||||
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
// sDw.stat.print();
|
||||
sDw.stat.print();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=(1344.0*volume*ncall)/2;
|
||||
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||
sDw.Report();
|
||||
|
||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||
@ -368,43 +298,24 @@ int main (int argc, char ** argv)
|
||||
|
||||
pickCheckerboard(Even,ssrc_e,sresult);
|
||||
pickCheckerboard(Odd ,ssrc_o,sresult);
|
||||
|
||||
ssrc_e = ssrc_e - sr_e;
|
||||
RealD error = norm2(ssrc_e);
|
||||
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||
ssrc_o = ssrc_o - sr_o;
|
||||
|
||||
error+= norm2(ssrc_o);
|
||||
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||
|
||||
if(( error>1.0e-4) ) {
|
||||
if(error>1.0e-4) {
|
||||
setCheckerboard(ssrc,ssrc_o);
|
||||
setCheckerboard(ssrc,ssrc_e);
|
||||
std::cout<< "DIFF\n " <<ssrc << std::endl;
|
||||
setCheckerboard(ssrc,sr_o);
|
||||
setCheckerboard(ssrc,sr_e);
|
||||
std::cout<< "CBRESULT\n " <<ssrc << std::endl;
|
||||
std::cout<< "RESULT\n " <<sresult<< std::endl;
|
||||
std::cout<< ssrc << std::endl;
|
||||
}
|
||||
assert(error<1.0e-4);
|
||||
}
|
||||
|
||||
if(0){
|
||||
std::cout << "Single cache warm call to sDw.Dhop " <<std::endl;
|
||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
PerformanceCounter Counter(i);
|
||||
Counter.Start();
|
||||
sDw.Dhop(ssrc,sresult,0);
|
||||
Counter.Stop();
|
||||
Counter.Report();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (1)
|
||||
{ // Naive wilson dag implementation
|
||||
ref = zero;
|
||||
@ -413,30 +324,25 @@ int main (int argc, char ** argv)
|
||||
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
|
||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu+1,-1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
}
|
||||
// dump=1;
|
||||
Dw.Dhop(src,result,1);
|
||||
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
||||
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
|
||||
if((norm2(err)>1.0e-4)){
|
||||
std::cout<< "DAG RESULT\n " <<ref << std::endl;
|
||||
std::cout<< "DAG sRESULT\n " <<result << std::endl;
|
||||
std::cout<< "DAG ERR \n " << err <<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
assert(norm2(err)<1.0e-4);
|
||||
LatticeFermion src_e (FrbGrid);
|
||||
LatticeFermion src_o (FrbGrid);
|
||||
LatticeFermion r_e (FrbGrid);
|
||||
@ -444,24 +350,18 @@ int main (int argc, char ** argv)
|
||||
LatticeFermion r_eo (FGrid);
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl;
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
|
||||
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
||||
|
||||
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
@ -469,7 +369,6 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
Dw.ZeroCounters();
|
||||
FGrid->Barrier();
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
@ -482,7 +381,6 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
@ -498,20 +396,14 @@ int main (int argc, char ** argv)
|
||||
|
||||
err = r_eo-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
if((norm2(err)>1.0e-4)){
|
||||
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
|
||||
std::cout<< "Deo REF\n " <<result << std::endl;
|
||||
std::cout<< "Deo ERR \n " << err <<std::endl;
|
||||
}
|
||||
assert(norm2(err)<1.0e-4);
|
||||
|
||||
pickCheckerboard(Even,src_e,err);
|
||||
pickCheckerboard(Odd,src_o,err);
|
||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||
|
||||
assert(norm2(src_e)<1.0e-4);
|
||||
assert(norm2(src_o)<1.0e-4);
|
||||
Grid_finalize();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -1,190 +0,0 @@
|
||||
#include <Grid/Grid.h>
|
||||
#include <sstream>
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
|
||||
typedef typename GparityDomainWallFermionF::FermionField GparityLatticeFermionF;
|
||||
typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD;
|
||||
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int Ls=16;
|
||||
for(int i=0;i<argc;i++)
|
||||
if(std::string(argv[i]) == "-Ls"){
|
||||
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||
}
|
||||
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Ls = " << Ls << std::endl;
|
||||
|
||||
std::vector<int> latt4 = GridDefaultLatt();
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
|
||||
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||
|
||||
GparityLatticeFermionF src (FGrid); random(RNG5,src);
|
||||
RealD N2 = 1.0/::sqrt(norm2(src));
|
||||
src = src*N2;
|
||||
|
||||
GparityLatticeFermionF result(FGrid); result=zero;
|
||||
GparityLatticeFermionF ref(FGrid); ref=zero;
|
||||
GparityLatticeFermionF tmp(FGrid);
|
||||
GparityLatticeFermionF err(FGrid);
|
||||
|
||||
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
|
||||
LatticeGaugeFieldF Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
|
||||
RealD NP = UGrid->_Nprocessors;
|
||||
RealD NN = UGrid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermion::Dhop "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
|
||||
|
||||
|
||||
std::cout << GridLogMessage<< "* SINGLE/SINGLE"<<std::endl;
|
||||
GparityDomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
int ncall =1000;
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
Dw.ZeroCounters();
|
||||
Dw.Dhop(src,result,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
Dw.Dhop(src,result,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=2*1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage<< "* SINGLE/HALF"<<std::endl;
|
||||
GparityDomainWallFermionFH DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
DwH.ZeroCounters();
|
||||
DwH.Dhop(src,result,0);
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
DwH.Dhop(src,result,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=2*1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
DwH.Report();
|
||||
}
|
||||
|
||||
GridCartesian * UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
|
||||
GridCartesian * FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_d);
|
||||
GridRedBlackCartesian * FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_d);
|
||||
|
||||
|
||||
std::cout << GridLogMessage<< "* DOUBLE/DOUBLE"<<std::endl;
|
||||
GparityLatticeFermionD src_d(FGrid_d);
|
||||
precisionChange(src_d,src);
|
||||
|
||||
LatticeGaugeFieldD Umu_d(UGrid_d);
|
||||
precisionChange(Umu_d,Umu);
|
||||
|
||||
GparityLatticeFermionD result_d(FGrid_d);
|
||||
|
||||
GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5);
|
||||
if (1) {
|
||||
FGrid_d->Barrier();
|
||||
DwD.ZeroCounters();
|
||||
DwD.Dhop(src_d,result_d,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
DwD.Dhop(src_d,result_d,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid_d->Barrier();
|
||||
|
||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||
double flops=2*1344*volume*ncall;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
DwD.Report();
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -66,8 +66,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
Vec tsum; tsum = zero;
|
||||
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
pRNG.SeedFixedIntegers(std::vector<int>({56,17,89,101}));
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
std::vector<double> stop(threads);
|
||||
Vector<Vec> sum(threads);
|
||||
@ -78,7 +77,8 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
double start=usecond();
|
||||
parallel_for(int t=0;t<threads;t++){
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int t=0;t<threads;t++){
|
||||
|
||||
sum[t] = x[t]._odata[0];
|
||||
for(int i=0;i<Nloop;i++){
|
||||
|
@ -55,21 +55,21 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
uint64_t lmax=96;
|
||||
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
uint64_t lmax=44;
|
||||
#define NLOOP (1*lmax*lmax*lmax*lmax/vol)
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
double a=2.0;
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ int main (int argc, char ** argv)
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3.0*vol*Nvec*sizeof(Real);
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
}
|
||||
@ -94,17 +94,17 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
double a=2.0;
|
||||
|
||||
uint64_t Nloop=NLOOP;
|
||||
@ -119,7 +119,7 @@ int main (int argc, char ** argv)
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
double bytes=3.0*vol*Nvec*sizeof(Real);
|
||||
double bytes=3*vol*Nvec*sizeof(Real);
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
}
|
||||
@ -129,20 +129,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
uint64_t Nloop=NLOOP;
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
RealD a=2.0;
|
||||
|
||||
|
||||
@ -154,7 +154,7 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=2.0*vol*Nvec*sizeof(Real);
|
||||
double bytes=2*vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*1;// mul
|
||||
std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl;
|
||||
|
||||
@ -166,17 +166,17 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=8;lat<=lmax;lat+=8){
|
||||
for(int lat=4;lat<=lmax;lat+=4){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
uint64_t Nloop=NLOOP;
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
LatticeVec z(&Grid);// random(pRNG,z);
|
||||
LatticeVec x(&Grid);// random(pRNG,x);
|
||||
LatticeVec y(&Grid);// random(pRNG,y);
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
LatticeVec z(&Grid); //random(pRNG,z);
|
||||
LatticeVec x(&Grid); //random(pRNG,x);
|
||||
LatticeVec y(&Grid); //random(pRNG,y);
|
||||
RealD a=2.0;
|
||||
Real nn;
|
||||
double start=usecond();
|
||||
@ -187,7 +187,7 @@ int main (int argc, char ** argv)
|
||||
double stop=usecond();
|
||||
double time = (stop-start)/Nloop*1000;
|
||||
|
||||
double bytes=1.0*vol*Nvec*sizeof(Real);
|
||||
double bytes=vol*Nvec*sizeof(Real);
|
||||
double flops=vol*Nvec*2;// mul,add
|
||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl;
|
||||
|
||||
|
@ -1,134 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./benchmarks/Benchmark_staggered.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
|
||||
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
|
||||
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
|
||||
typename ImprovedStaggeredFermionR::ImplParams params;
|
||||
|
||||
FermionField src (&Grid); random(pRNG,src);
|
||||
FermionField result(&Grid); result=zero;
|
||||
FermionField ref(&Grid); ref=zero;
|
||||
FermionField tmp(&Grid); tmp=zero;
|
||||
FermionField err(&Grid); tmp=zero;
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,&Grid);
|
||||
|
||||
double volume=1;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
|
||||
// Only one non-zero (y)
|
||||
#if 0
|
||||
Umu=zero;
|
||||
Complex cone(1.0,0.0);
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
random(pRNG,U[nn]);
|
||||
if(1) {
|
||||
if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
|
||||
// else { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
|
||||
else { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
|
||||
}
|
||||
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||
}
|
||||
#endif
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
ref = zero;
|
||||
/*
|
||||
{ // Naive wilson implementation
|
||||
ref = zero;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
|
||||
tmp = U[mu]*Cshift(src,mu,1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
|
||||
tmp =adj(U[mu])*src;
|
||||
tmp =Cshift(tmp,mu,-1);
|
||||
for(int i=0;i<ref._odata.size();i++){
|
||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||
}
|
||||
}
|
||||
}
|
||||
ref = -0.5*ref;
|
||||
*/
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD c1=9.0/8.0;
|
||||
RealD c2=-1.0/24.0;
|
||||
RealD u0=1.0;
|
||||
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
|
||||
|
||||
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
|
||||
int ncall=1000;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Ds.Dhop(src,result,0);
|
||||
}
|
||||
double t1=usecond();
|
||||
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||
|
||||
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -35,14 +35,13 @@ using namespace Grid::QCD;
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
#define LMAX (64)
|
||||
|
||||
int64_t Nloop=20;
|
||||
int Nloop=1000;
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
int64_t threads = GridThread::GetThreads();
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
@ -51,19 +50,19 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid);// random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid);// random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid);// random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
for(int i=0;i<Nloop;i++){
|
||||
x=x*y;
|
||||
}
|
||||
double stop=usecond();
|
||||
@ -83,20 +82,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid); //random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); //random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); //random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
for(int i=0;i<Nloop;i++){
|
||||
z=x*y;
|
||||
}
|
||||
double stop=usecond();
|
||||
@ -114,20 +113,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid); //random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); //random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); //random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
for(int i=0;i<Nloop;i++){
|
||||
mult(z,x,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
@ -145,20 +144,20 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||
|
||||
for(int lat=2;lat<=LMAX;lat+=2){
|
||||
for(int lat=2;lat<=32;lat+=2){
|
||||
|
||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
// GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||
LatticeColourMatrix z(&Grid); //random(pRNG,z);
|
||||
LatticeColourMatrix x(&Grid); //random(pRNG,x);
|
||||
LatticeColourMatrix y(&Grid); //random(pRNG,y);
|
||||
|
||||
double start=usecond();
|
||||
for(int64_t i=0;i<Nloop;i++){
|
||||
for(int i=0;i<Nloop;i++){
|
||||
mac(z,x,y);
|
||||
}
|
||||
double stop=usecond();
|
||||
|
@ -58,7 +58,7 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
@ -69,7 +69,7 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
|
||||
// pRNG.SeedRandomDevice();
|
||||
|
||||
LatticeFermion src (&Grid); random(pRNG,src);
|
||||
LatticeFermion result(&Grid); result=zero;
|
||||
|
@ -93,7 +93,7 @@ int main (int argc, char ** argv)
|
||||
std::cout << latt_size.back() << "\t\t";
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||
|
@ -1,7 +1,11 @@
|
||||
include Make.inc
|
||||
|
||||
bench-local: all
|
||||
./Benchmark_su3
|
||||
./Benchmark_memory_bandwidth
|
||||
./Benchmark_wilson
|
||||
./Benchmark_dwf --dslash-unroll
|
||||
simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o
|
||||
|
||||
EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a
|
||||
|
||||
libsimple_su3_test_a_SOURCES = simple_su3_test.cc
|
||||
|
||||
libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc
|
||||
|
||||
libsimple_simd_test_a_SOURCES = simple_simd_test.cc
|
||||
|
@ -1,11 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'
|
||||
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
|
||||
|
||||
echo "-- deploying Eigen source..."
|
||||
wget ${EIGEN_URL} --no-check-certificate
|
||||
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
|
||||
#rm `basename ${EIGEN_URL}`
|
||||
rm `basename ${EIGEN_URL}`
|
||||
|
||||
echo '-- generating Make.inc files...'
|
||||
./scripts/filelist
|
||||
|
242
configure.ac
242
configure.ac
@ -1,23 +1,16 @@
|
||||
AC_PREREQ([2.63])
|
||||
AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid])
|
||||
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
|
||||
AC_CANONICAL_BUILD
|
||||
AC_CANONICAL_HOST
|
||||
AC_CANONICAL_TARGET
|
||||
AM_INIT_AUTOMAKE([subdir-objects 1.13])
|
||||
AM_EXTRA_RECURSIVE_TARGETS([tests bench])
|
||||
AM_INIT_AUTOMAKE(subdir-objects)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AC_CONFIG_SRCDIR([lib/Grid.h])
|
||||
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
|
||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
|
||||
################ Get git info
|
||||
#AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])])
|
||||
|
||||
################ Set flags
|
||||
# do not move!
|
||||
CXXFLAGS="-O3 $CXXFLAGS"
|
||||
|
||||
############### Checks for programs
|
||||
CXXFLAGS="-O3 $CXXFLAGS"
|
||||
AC_PROG_CXX
|
||||
AC_PROG_RANLIB
|
||||
|
||||
@ -31,14 +24,12 @@ AX_GXX_VERSION
|
||||
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
||||
[version of g++ that will compile the code])
|
||||
|
||||
|
||||
|
||||
############### Checks for typedefs, structures, and compiler characteristics
|
||||
AC_TYPE_SIZE_T
|
||||
AC_TYPE_UINT32_T
|
||||
AC_TYPE_UINT64_T
|
||||
|
||||
############### OpenMP
|
||||
############### OpenMP
|
||||
AC_OPENMP
|
||||
ac_openmp=no
|
||||
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
||||
@ -54,14 +45,9 @@ AC_CHECK_HEADERS(malloc/malloc.h)
|
||||
AC_CHECK_HEADERS(malloc.h)
|
||||
AC_CHECK_HEADERS(endian.h)
|
||||
AC_CHECK_HEADERS(execinfo.h)
|
||||
AC_CHECK_HEADERS(numaif.h)
|
||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
||||
|
||||
############## Standard libraries
|
||||
AC_CHECK_LIB([m],[cos])
|
||||
AC_CHECK_LIB([stdc++],[abort])
|
||||
|
||||
############### GMP and MPFR
|
||||
AC_ARG_WITH([gmp],
|
||||
[AS_HELP_STRING([--with-gmp=prefix],
|
||||
@ -74,23 +60,16 @@ AC_ARG_WITH([mpfr],
|
||||
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
||||
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
||||
|
||||
############### FFTW3
|
||||
AC_ARG_WITH([fftw],
|
||||
############### FFTW3
|
||||
AC_ARG_WITH([fftw],
|
||||
[AS_HELP_STRING([--with-fftw=prefix],
|
||||
[try this for a non-standard install prefix of the FFTW3 library])],
|
||||
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
||||
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
||||
|
||||
############### LIME
|
||||
AC_ARG_WITH([lime],
|
||||
[AS_HELP_STRING([--with-lime=prefix],
|
||||
[try this for a non-standard install prefix of the LIME library])],
|
||||
[AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
|
||||
[AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
|
||||
|
||||
############### lapack
|
||||
############### lapack
|
||||
AC_ARG_ENABLE([lapack],
|
||||
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
|
||||
|
||||
case ${ac_LAPACK} in
|
||||
@ -104,18 +83,6 @@ case ${ac_LAPACK} in
|
||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||
esac
|
||||
|
||||
############### FP16 conversions
|
||||
AC_ARG_ENABLE([sfw-fp16],
|
||||
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
|
||||
[ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
|
||||
case ${ac_SFW_FP16} in
|
||||
yes)
|
||||
AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);;
|
||||
no);;
|
||||
*)
|
||||
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
|
||||
esac
|
||||
|
||||
############### MKL
|
||||
AC_ARG_ENABLE([mkl],
|
||||
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
|
||||
@ -141,7 +108,7 @@ AC_ARG_WITH([hdf5],
|
||||
|
||||
############### first-touch
|
||||
AC_ARG_ENABLE([numa],
|
||||
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
|
||||
|
||||
case ${ac_NUMA} in
|
||||
@ -167,8 +134,8 @@ if test "${ac_MKL}x" != "nox"; then
|
||||
fi
|
||||
|
||||
AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
|
||||
[AC_DEFINE([HAVE_LIBMPFR], [1],
|
||||
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
|
||||
[AC_DEFINE([HAVE_LIBMPFR], [1],
|
||||
[Define to 1 if you have the `MPFR' library])]
|
||||
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
|
||||
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
|
||||
@ -177,7 +144,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||
if test "${ac_LAPACK}x" != "nox"; then
|
||||
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
|
||||
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
||||
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
|
||||
@ -185,23 +152,6 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
||||
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
|
||||
[have_fftw=true])
|
||||
|
||||
AC_SEARCH_LIBS([limeCreateReader], [lime],
|
||||
[AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
|
||||
[have_lime=true],
|
||||
[AC_MSG_WARN(C-LIME library was not found in your system.
|
||||
In order to use ILGG file format please install or provide the correct path to your installation
|
||||
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
|
||||
|
||||
AC_SEARCH_LIBS([crc32], [z],
|
||||
[AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])]
|
||||
[have_zlib=true] [LIBS="${LIBS} -lz"],
|
||||
[AC_MSG_ERROR(zlib library was not found in your system.)])
|
||||
|
||||
AC_SEARCH_LIBS([move_pages], [numa],
|
||||
[AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])]
|
||||
[have_libnuma=true] [LIBS="${LIBS} -lnuma"],
|
||||
[AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)])
|
||||
|
||||
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
|
||||
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
|
||||
[have_hdf5=true]
|
||||
@ -226,26 +176,19 @@ case ${ax_cv_cxx_compiler_vendor} in
|
||||
case ${ac_SIMD} in
|
||||
SSE4)
|
||||
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||
case ${ac_SFW_FP16} in
|
||||
yes)
|
||||
SIMD_FLAGS='-msse4.2';;
|
||||
no)
|
||||
SIMD_FLAGS='-msse4.2 -mf16c';;
|
||||
*)
|
||||
AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);;
|
||||
esac;;
|
||||
SIMD_FLAGS='-msse4.2';;
|
||||
AVX)
|
||||
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||
SIMD_FLAGS='-mavx -mf16c';;
|
||||
SIMD_FLAGS='-mavx';;
|
||||
AVXFMA4)
|
||||
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
|
||||
SIMD_FLAGS='-mavx -mfma4 -mf16c';;
|
||||
SIMD_FLAGS='-mavx -mfma4';;
|
||||
AVXFMA)
|
||||
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
|
||||
SIMD_FLAGS='-mavx -mfma -mf16c';;
|
||||
SIMD_FLAGS='-mavx -mfma';;
|
||||
AVX2)
|
||||
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||
SIMD_FLAGS='-mavx2 -mfma -mf16c';;
|
||||
SIMD_FLAGS='-mavx2 -mfma';;
|
||||
AVX512)
|
||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
||||
@ -254,7 +197,6 @@ case ${ax_cv_cxx_compiler_vendor} in
|
||||
SIMD_FLAGS='';;
|
||||
KNL)
|
||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||
AC_DEFINE([KNL],[1],[Knights landing processor])
|
||||
SIMD_FLAGS='-march=knl';;
|
||||
GEN)
|
||||
AC_DEFINE([GEN],[1],[generic vector code])
|
||||
@ -262,9 +204,6 @@ case ${ax_cv_cxx_compiler_vendor} in
|
||||
[generic SIMD vector width (in bytes)])
|
||||
SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)"
|
||||
SIMD_FLAGS='';;
|
||||
NEONv8)
|
||||
AC_DEFINE([NEONV8],[1],[ARMv8 NEON])
|
||||
SIMD_FLAGS='-march=armv8-a';;
|
||||
QPX|BGQ)
|
||||
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
|
||||
SIMD_FLAGS='';;
|
||||
@ -293,7 +232,6 @@ case ${ax_cv_cxx_compiler_vendor} in
|
||||
SIMD_FLAGS='';;
|
||||
KNL)
|
||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
|
||||
AC_DEFINE([KNL],[1],[Knights landing processor])
|
||||
SIMD_FLAGS='-xmic-avx512';;
|
||||
GEN)
|
||||
AC_DEFINE([GEN],[1],[generic vector code])
|
||||
@ -331,41 +269,8 @@ case ${ac_PRECISION} in
|
||||
double)
|
||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
###################### Shared memory allocation technique under MPI3
|
||||
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs],
|
||||
[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen])
|
||||
|
||||
case ${ac_SHM} in
|
||||
|
||||
shmget)
|
||||
AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
|
||||
;;
|
||||
|
||||
shmopen)
|
||||
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
|
||||
;;
|
||||
|
||||
hugetlbfs)
|
||||
AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] )
|
||||
;;
|
||||
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
###################### Shared base path for SHMMMAP
|
||||
AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
|
||||
[Select SHM mmap base path for hugetlbfs])],
|
||||
[ac_SHMPATH=${enable_shmpath}],
|
||||
[ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/])
|
||||
AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing])
|
||||
|
||||
############### communication type selection
|
||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
|
||||
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
||||
@ -375,14 +280,14 @@ case ${ac_COMMS} in
|
||||
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
||||
comms_type='none'
|
||||
;;
|
||||
mpi3l*)
|
||||
AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] )
|
||||
comms_type='mpi3l'
|
||||
;;
|
||||
mpi3*)
|
||||
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
|
||||
comms_type='mpi3'
|
||||
;;
|
||||
mpit)
|
||||
AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] )
|
||||
comms_type='mpit'
|
||||
;;
|
||||
mpi*)
|
||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||
comms_type='mpi'
|
||||
@ -392,7 +297,7 @@ case ${ac_COMMS} in
|
||||
comms_type='shmem'
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||
;;
|
||||
esac
|
||||
case ${ac_COMMS} in
|
||||
@ -410,13 +315,13 @@ esac
|
||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ])
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPI, [ test "${comms_type}X" == "mpiX" ])
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] )
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPIT, [ test "${comms_type}X" == "mpitX" ] )
|
||||
AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
|
||||
AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
|
||||
|
||||
############### RNG selection
|
||||
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\
|
||||
[Select Random Number Generator to be used])],\
|
||||
[ac_RNG=${enable_rng}],[ac_RNG=sitmo])
|
||||
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
||||
|
||||
case ${ac_RNG} in
|
||||
ranlux48)
|
||||
@ -429,7 +334,7 @@ case ${ac_RNG} in
|
||||
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
||||
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -446,7 +351,7 @@ case ${ac_TIMERS} in
|
||||
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
||||
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -458,7 +363,7 @@ case ${ac_CHROMA} in
|
||||
yes|no)
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -479,67 +384,12 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
|
||||
|
||||
############### Ouput
|
||||
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
|
||||
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||
GRID_LIBS=$LIBS
|
||||
GRID_SHORT_SHA=`git rev-parse --short HEAD`
|
||||
GRID_SHA=`git rev-parse HEAD`
|
||||
GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
|
||||
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
|
||||
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
|
||||
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
|
||||
AC_SUBST([AM_CFLAGS])
|
||||
AC_SUBST([AM_CXXFLAGS])
|
||||
AC_SUBST([AM_LDFLAGS])
|
||||
AC_SUBST([GRID_CXXFLAGS])
|
||||
AC_SUBST([GRID_LDFLAGS])
|
||||
AC_SUBST([GRID_LIBS])
|
||||
AC_SUBST([GRID_SHA])
|
||||
AC_SUBST([GRID_BRANCH])
|
||||
|
||||
git_commit=`cd $srcdir && ./scripts/configure.commit`
|
||||
|
||||
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Summary of configuration for $PACKAGE v$VERSION
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
----- GIT VERSION -------------------------------------
|
||||
$git_commit
|
||||
----- PLATFORM ----------------------------------------
|
||||
architecture (build) : $build_cpu
|
||||
os (build) : $build_os
|
||||
architecture (target) : $target_cpu
|
||||
os (target) : $target_os
|
||||
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||
compiler version : ${ax_cv_gxx_version}
|
||||
----- BUILD OPTIONS -----------------------------------
|
||||
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
||||
Threading : ${ac_openmp}
|
||||
Communications type : ${comms_type}
|
||||
Shared memory allocator : ${ac_SHM}
|
||||
Shared memory mmap path : ${ac_SHMPATH}
|
||||
Default precision : ${ac_PRECISION}
|
||||
Software FP16 conversion : ${ac_SFW_FP16}
|
||||
RNG choice : ${ac_RNG}
|
||||
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||
LAPACK : ${ac_LAPACK}
|
||||
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||
LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
|
||||
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
|
||||
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
|
||||
----- BUILD FLAGS -------------------------------------
|
||||
CXXFLAGS:
|
||||
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LDFLAGS:
|
||||
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LIBS:
|
||||
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
-------------------------------------------------------" > grid.configure.summary
|
||||
|
||||
GRID_SUMMARY="`cat grid.configure.summary`"
|
||||
AM_SUBST_NOTMAKE([GRID_SUMMARY])
|
||||
AC_SUBST([GRID_SUMMARY])
|
||||
|
||||
AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_CONFIG_FILES(lib/Makefile)
|
||||
AC_CONFIG_FILES(tests/Makefile)
|
||||
@ -550,16 +400,42 @@ AC_CONFIG_FILES(tests/forces/Makefile)
|
||||
AC_CONFIG_FILES(tests/hadrons/Makefile)
|
||||
AC_CONFIG_FILES(tests/hmc/Makefile)
|
||||
AC_CONFIG_FILES(tests/solver/Makefile)
|
||||
AC_CONFIG_FILES(tests/lanczos/Makefile)
|
||||
AC_CONFIG_FILES(tests/smearing/Makefile)
|
||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
||||
AC_CONFIG_FILES(tests/testu01/Makefile)
|
||||
AC_CONFIG_FILES(benchmarks/Makefile)
|
||||
AC_CONFIG_FILES(extras/Makefile)
|
||||
AC_CONFIG_FILES(extras/Hadrons/Makefile)
|
||||
AC_OUTPUT
|
||||
|
||||
echo ""
|
||||
cat grid.configure.summary
|
||||
echo ""
|
||||
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Summary of configuration for $PACKAGE v$VERSION
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
----- PLATFORM ----------------------------------------
|
||||
architecture (build) : $build_cpu
|
||||
os (build) : $build_os
|
||||
architecture (target) : $target_cpu
|
||||
os (target) : $target_os
|
||||
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||
compiler version : ${ax_cv_gxx_version}
|
||||
----- BUILD OPTIONS -----------------------------------
|
||||
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
|
||||
Threading : ${ac_openmp}
|
||||
Communications type : ${comms_type}
|
||||
Default precision : ${ac_PRECISION}
|
||||
RNG choice : ${ac_RNG}
|
||||
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||
LAPACK : ${ac_LAPACK}
|
||||
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
|
||||
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
|
||||
----- BUILD FLAGS -------------------------------------
|
||||
CXXFLAGS:
|
||||
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LDFLAGS:
|
||||
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
LIBS:
|
||||
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||
-------------------------------------------------------" > config.summary
|
||||
echo ""
|
||||
cat config.summary
|
||||
echo ""
|
||||
|
@ -162,8 +162,7 @@ void Application::saveParameterFile(const std::string parameterFileName)
|
||||
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
|
||||
|
||||
#define DEFINE_MEMPEAK \
|
||||
GeneticScheduler<unsigned int>::ObjFunc memPeak = \
|
||||
[this](const std::vector<unsigned int> &program)\
|
||||
auto memPeak = [this](const std::vector<unsigned int> &program)\
|
||||
{\
|
||||
unsigned int memPeak;\
|
||||
bool msg;\
|
||||
|
@ -41,10 +41,9 @@ using namespace Hadrons;
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
Environment::Environment(void)
|
||||
{
|
||||
dim_ = GridDefaultLatt();
|
||||
nd_ = dim_.size();
|
||||
nd_ = GridDefaultLatt().size();
|
||||
grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
|
||||
dim_, GridDefaultSimd(nd_, vComplex::Nsimd()),
|
||||
GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()),
|
||||
GridDefaultMpi()));
|
||||
gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
|
||||
auto loc = getGrid()->LocalDimensions();
|
||||
@ -133,16 +132,6 @@ unsigned int Environment::getNd(void) const
|
||||
return nd_;
|
||||
}
|
||||
|
||||
std::vector<int> Environment::getDim(void) const
|
||||
{
|
||||
return dim_;
|
||||
}
|
||||
|
||||
int Environment::getDim(const unsigned int mu) const
|
||||
{
|
||||
return dim_[mu];
|
||||
}
|
||||
|
||||
// random number generator /////////////////////////////////////////////////////
|
||||
void Environment::setSeed(const std::vector<int> &seed)
|
||||
{
|
||||
@ -282,21 +271,6 @@ std::string Environment::getModuleType(const std::string name) const
|
||||
return getModuleType(getModuleAddress(name));
|
||||
}
|
||||
|
||||
std::string Environment::getModuleNamespace(const unsigned int address) const
|
||||
{
|
||||
std::string type = getModuleType(address), ns;
|
||||
|
||||
auto pos2 = type.rfind("::");
|
||||
auto pos1 = type.rfind("::", pos2 - 2);
|
||||
|
||||
return type.substr(pos1 + 2, pos2 - pos1 - 2);
|
||||
}
|
||||
|
||||
std::string Environment::getModuleNamespace(const std::string name) const
|
||||
{
|
||||
return getModuleNamespace(getModuleAddress(name));
|
||||
}
|
||||
|
||||
bool Environment::hasModule(const unsigned int address) const
|
||||
{
|
||||
return (address < module_.size());
|
||||
@ -518,14 +492,7 @@ std::string Environment::getObjectType(const unsigned int address) const
|
||||
{
|
||||
if (hasRegisteredObject(address))
|
||||
{
|
||||
if (object_[address].type)
|
||||
{
|
||||
return typeName(object_[address].type);
|
||||
}
|
||||
else
|
||||
{
|
||||
return "<no type>";
|
||||
}
|
||||
return typeName(object_[address].type);
|
||||
}
|
||||
else if (hasObject(address))
|
||||
{
|
||||
@ -565,23 +532,6 @@ Environment::Size Environment::getObjectSize(const std::string name) const
|
||||
return getObjectSize(getObjectAddress(name));
|
||||
}
|
||||
|
||||
unsigned int Environment::getObjectModule(const unsigned int address) const
|
||||
{
|
||||
if (hasObject(address))
|
||||
{
|
||||
return object_[address].module;
|
||||
}
|
||||
else
|
||||
{
|
||||
HADRON_ERROR("no object with address " + std::to_string(address));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int Environment::getObjectModule(const std::string name) const
|
||||
{
|
||||
return getObjectModule(getObjectAddress(name));
|
||||
}
|
||||
|
||||
unsigned int Environment::getObjectLs(const unsigned int address) const
|
||||
{
|
||||
if (hasRegisteredObject(address))
|
||||
|
@ -106,8 +106,6 @@ public:
|
||||
void createGrid(const unsigned int Ls);
|
||||
GridCartesian * getGrid(const unsigned int Ls = 1) const;
|
||||
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
|
||||
std::vector<int> getDim(void) const;
|
||||
int getDim(const unsigned int mu) const;
|
||||
unsigned int getNd(void) const;
|
||||
// random number generator
|
||||
void setSeed(const std::vector<int> &seed);
|
||||
@ -133,8 +131,6 @@ public:
|
||||
std::string getModuleName(const unsigned int address) const;
|
||||
std::string getModuleType(const unsigned int address) const;
|
||||
std::string getModuleType(const std::string name) const;
|
||||
std::string getModuleNamespace(const unsigned int address) const;
|
||||
std::string getModuleNamespace(const std::string name) const;
|
||||
bool hasModule(const unsigned int address) const;
|
||||
bool hasModule(const std::string name) const;
|
||||
Graph<unsigned int> makeModuleGraph(void) const;
|
||||
@ -175,8 +171,6 @@ public:
|
||||
std::string getObjectType(const std::string name) const;
|
||||
Size getObjectSize(const unsigned int address) const;
|
||||
Size getObjectSize(const std::string name) const;
|
||||
unsigned int getObjectModule(const unsigned int address) const;
|
||||
unsigned int getObjectModule(const std::string name) const;
|
||||
unsigned int getObjectLs(const unsigned int address) const;
|
||||
unsigned int getObjectLs(const std::string name) const;
|
||||
bool hasObject(const unsigned int address) const;
|
||||
@ -187,10 +181,6 @@ public:
|
||||
bool hasCreatedObject(const std::string name) const;
|
||||
bool isObject5d(const unsigned int address) const;
|
||||
bool isObject5d(const std::string name) const;
|
||||
template <typename T>
|
||||
bool isObjectOfType(const unsigned int address) const;
|
||||
template <typename T>
|
||||
bool isObjectOfType(const std::string name) const;
|
||||
Environment::Size getTotalSize(void) const;
|
||||
void addOwnership(const unsigned int owner,
|
||||
const unsigned int property);
|
||||
@ -207,7 +197,6 @@ private:
|
||||
bool dryRun_{false};
|
||||
unsigned int traj_, locVol_;
|
||||
// grids
|
||||
std::vector<int> dim_;
|
||||
GridPt grid4d_;
|
||||
std::map<unsigned int, GridPt> grid5d_;
|
||||
GridRbPt gridRb4d_;
|
||||
@ -354,7 +343,7 @@ T * Environment::getObject(const unsigned int address) const
|
||||
else
|
||||
{
|
||||
HADRON_ERROR("object with address " + std::to_string(address) +
|
||||
" does not have type '" + typeName(&typeid(T)) +
|
||||
" does not have type '" + typeid(T).name() +
|
||||
"' (has type '" + getObjectType(address) + "')");
|
||||
}
|
||||
}
|
||||
@ -391,37 +380,6 @@ T * Environment::createLattice(const std::string name)
|
||||
return createLattice<T>(getObjectAddress(name));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Environment::isObjectOfType(const unsigned int address) const
|
||||
{
|
||||
if (hasRegisteredObject(address))
|
||||
{
|
||||
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (hasObject(address))
|
||||
{
|
||||
HADRON_ERROR("object with address " + std::to_string(address) +
|
||||
" exists but is not registered");
|
||||
}
|
||||
else
|
||||
{
|
||||
HADRON_ERROR("no object with address " + std::to_string(address));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Environment::isObjectOfType(const std::string name) const
|
||||
{
|
||||
return isObjectOfType<T>(getObjectAddress(name));
|
||||
}
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Environment_hpp_
|
||||
|
@ -51,43 +51,23 @@ using Grid::operator<<;
|
||||
* error with GCC 5 (clang & GCC 6 compile fine without it).
|
||||
*/
|
||||
|
||||
// FIXME: find a way to do that in a more general fashion
|
||||
#ifndef FIMPL
|
||||
#define FIMPL WilsonImplR
|
||||
#endif
|
||||
#ifndef SIMPL
|
||||
#define SIMPL ScalarImplCR
|
||||
#endif
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
// type aliases
|
||||
#define FERM_TYPE_ALIASES(FImpl, suffix)\
|
||||
#define TYPE_ALIASES(FImpl, suffix)\
|
||||
typedef FermionOperator<FImpl> FMat##suffix; \
|
||||
typedef typename FImpl::FermionField FermionField##suffix; \
|
||||
typedef typename FImpl::PropagatorField PropagatorField##suffix; \
|
||||
typedef typename FImpl::SitePropagator SitePropagator##suffix; \
|
||||
typedef std::vector<typename FImpl::SitePropagator::scalar_object> \
|
||||
SlicedPropagator##suffix;
|
||||
|
||||
#define GAUGE_TYPE_ALIASES(FImpl, suffix)\
|
||||
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;
|
||||
|
||||
#define SCALAR_TYPE_ALIASES(SImpl, suffix)\
|
||||
typedef typename SImpl::Field ScalarField##suffix;\
|
||||
typedef typename SImpl::Field PropagatorField##suffix;
|
||||
|
||||
#define SOLVER_TYPE_ALIASES(FImpl, suffix)\
|
||||
typedef std::function<void(FermionField##suffix &,\
|
||||
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\
|
||||
typedef std::function<void(FermionField##suffix &, \
|
||||
const FermionField##suffix &)> SolverFn##suffix;
|
||||
|
||||
#define SINK_TYPE_ALIASES(suffix)\
|
||||
typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix;
|
||||
|
||||
#define FGS_TYPE_ALIASES(FImpl, suffix)\
|
||||
FERM_TYPE_ALIASES(FImpl, suffix)\
|
||||
GAUGE_TYPE_ALIASES(FImpl, suffix)\
|
||||
SOLVER_TYPE_ALIASES(FImpl, suffix)
|
||||
|
||||
// logger
|
||||
class HadronsLogger: public Logger
|
||||
{
|
||||
@ -165,15 +145,6 @@ std::string typeName(void)
|
||||
return typeName(typeIdPt<T>());
|
||||
}
|
||||
|
||||
// default writers/readers
|
||||
#ifdef HAVE_HDF5
|
||||
typedef Hdf5Reader CorrReader;
|
||||
typedef Hdf5Writer CorrWriter;
|
||||
#else
|
||||
typedef XmlReader CorrReader;
|
||||
typedef XmlWriter CorrWriter;
|
||||
#endif
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_Global_hpp_
|
||||
|
@ -1,25 +1,40 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Hadrons/Modules/MAction/DWF.hpp>
|
||||
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
|
||||
#include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
|
||||
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
|
||||
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSink/Point.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/Point.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
|
||||
#include <Grid/Hadrons/Modules/MSource/Z2.hpp>
|
||||
#include <Grid/Hadrons/Modules/Quark.hpp>
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MAction_DWF_hpp_
|
||||
#define Hadrons_MAction_DWF_hpp_
|
||||
#ifndef Hadrons_DWF_hpp_
|
||||
#define Hadrons_DWF_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -48,15 +48,14 @@ public:
|
||||
std::string, gauge,
|
||||
unsigned int, Ls,
|
||||
double , mass,
|
||||
double , M5,
|
||||
std::string , boundary);
|
||||
double , M5);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TDWF: public Module<DWFPar>
|
||||
{
|
||||
public:
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TDWF(const std::string name);
|
||||
@ -117,19 +116,14 @@ void TDWF<FImpl>::execute(void)
|
||||
<< par().mass << ", M5= " << par().M5 << " and Ls= "
|
||||
<< par().Ls << " using gauge field '" << par().gauge << "'"
|
||||
<< std::endl;
|
||||
LOG(Message) << "Fermion boundary conditions: " << par().boundary
|
||||
<< std::endl;
|
||||
env().createGrid(par().Ls);
|
||||
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
|
||||
auto &g4 = *env().getGrid();
|
||||
auto &grb4 = *env().getRbGrid();
|
||||
auto &g5 = *env().getGrid(par().Ls);
|
||||
auto &grb5 = *env().getRbGrid(par().Ls);
|
||||
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
|
||||
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
|
||||
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
|
||||
par().mass, par().M5,
|
||||
implParams);
|
||||
par().mass, par().M5);
|
||||
env().setObject(getName(), fMatPt);
|
||||
}
|
||||
|
||||
@ -137,4 +131,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MAction_DWF_hpp_
|
||||
#endif // Hadrons_DWF_hpp_
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MAction_Wilson_hpp_
|
||||
#define Hadrons_MAction_Wilson_hpp_
|
||||
#ifndef Hadrons_Wilson_hpp_
|
||||
#define Hadrons_Wilson_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -46,15 +46,14 @@ class WilsonPar: Serializable
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
|
||||
std::string, gauge,
|
||||
double , mass,
|
||||
std::string, boundary);
|
||||
double , mass);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TWilson: public Module<WilsonPar>
|
||||
{
|
||||
public:
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TWilson(const std::string name);
|
||||
@ -113,15 +112,10 @@ void TWilson<FImpl>::execute()
|
||||
{
|
||||
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
|
||||
<< " using gauge field '" << par().gauge << "'" << std::endl;
|
||||
LOG(Message) << "Fermion boundary conditions: " << par().boundary
|
||||
<< std::endl;
|
||||
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
|
||||
auto &grid = *env().getGrid();
|
||||
auto &gridRb = *env().getRbGrid();
|
||||
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
|
||||
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
|
||||
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
|
||||
implParams);
|
||||
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass);
|
||||
env().setObject(getName(), fMatPt);
|
||||
}
|
||||
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_Baryon_hpp_
|
||||
#define Hadrons_MContraction_Baryon_hpp_
|
||||
#ifndef Hadrons_Baryon_hpp_
|
||||
#define Hadrons_Baryon_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
class TBaryon: public Module<BaryonPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl1, 1);
|
||||
FERM_TYPE_ALIASES(FImpl2, 2);
|
||||
FERM_TYPE_ALIASES(FImpl3, 3);
|
||||
TYPE_ALIASES(FImpl1, 1);
|
||||
TYPE_ALIASES(FImpl2, 2);
|
||||
TYPE_ALIASES(FImpl3, 3);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
|
||||
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
|
||||
<< par().q3 << "'" << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
XmlWriter writer(par().output);
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);
|
||||
@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
|
||||
|
||||
// FIXME: do contractions
|
||||
|
||||
// write(writer, "meson", result);
|
||||
write(writer, "meson", result);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_Baryon_hpp_
|
||||
#endif // Hadrons_Baryon_hpp_
|
||||
|
@ -1,144 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_DiscLoop_hpp_
|
||||
#define Hadrons_MContraction_DiscLoop_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* DiscLoop *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
class DiscLoopPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar,
|
||||
std::string, q_loop,
|
||||
Gamma::Algebra, gamma,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TDiscLoop: public Module<DiscLoopPar>
|
||||
{
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
Gamma::Algebra, gamma,
|
||||
std::vector<Complex>, corr);
|
||||
};
|
||||
public:
|
||||
// constructor
|
||||
TDiscLoop(const std::string name);
|
||||
// destructor
|
||||
virtual ~TDiscLoop(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction);
|
||||
|
||||
/******************************************************************************
|
||||
* TDiscLoop implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TDiscLoop<FImpl>::TDiscLoop(const std::string name)
|
||||
: Module<DiscLoopPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q_loop};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TDiscLoop<FImpl>::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TDiscLoop<FImpl>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing disconnected loop contraction '" << getName()
|
||||
<< "' using '" << par().q_loop << "' with " << par().gamma
|
||||
<< " insertion." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
|
||||
LatticeComplex c(env().getGrid());
|
||||
Gamma gamma(par().gamma);
|
||||
std::vector<TComplex> buf;
|
||||
Result result;
|
||||
|
||||
c = trace(gamma*q_loop);
|
||||
sliceSum(c, buf, Tp);
|
||||
|
||||
result.gamma = par().gamma;
|
||||
result.corr.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result.corr[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
|
||||
write(writer, "disc", result);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_DiscLoop_hpp_
|
@ -1,170 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_Gamma3pt_hpp_
|
||||
#define Hadrons_MContraction_Gamma3pt_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
* 3pt contraction with gamma matrix insertion.
|
||||
*
|
||||
* Schematic:
|
||||
*
|
||||
* q2 q3
|
||||
* /----<------*------<----¬
|
||||
* / gamma \
|
||||
* / \
|
||||
* i * * f
|
||||
* \ /
|
||||
* \ /
|
||||
* \----------->----------/
|
||||
* q1
|
||||
*
|
||||
* trace(g5*q1*adj(q2)*g5*gamma*q3)
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* Gamma3pt *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
class Gamma3ptPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, q3,
|
||||
Gamma::Algebra, gamma,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
class TGamma3pt: public Module<Gamma3ptPar>
|
||||
{
|
||||
FERM_TYPE_ALIASES(FImpl1, 1);
|
||||
FERM_TYPE_ALIASES(FImpl2, 2);
|
||||
FERM_TYPE_ALIASES(FImpl3, 3);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
Gamma::Algebra, gamma,
|
||||
std::vector<Complex>, corr);
|
||||
};
|
||||
public:
|
||||
// constructor
|
||||
TGamma3pt(const std::string name);
|
||||
// destructor
|
||||
virtual ~TGamma3pt(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction);
|
||||
|
||||
/******************************************************************************
|
||||
* TGamma3pt implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name)
|
||||
: Module<Gamma3ptPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl1, typename FImpl2, typename FImpl3>
|
||||
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
|
||||
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
|
||||
<< par().q3 << "', with " << par().gamma << " insertion."
|
||||
<< std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
|
||||
LatticeComplex c(env().getGrid());
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
Gamma gamma(par().gamma);
|
||||
std::vector<TComplex> buf;
|
||||
Result result;
|
||||
|
||||
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
|
||||
sliceSum(c, buf, Tp);
|
||||
|
||||
result.gamma = par().gamma;
|
||||
result.corr.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result.corr[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
|
||||
write(writer, "gamma3pt", result);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_Gamma3pt_hpp_
|
@ -6,10 +6,8 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -29,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_Meson_hpp_
|
||||
#define Hadrons_MContraction_Meson_hpp_
|
||||
#ifndef Hadrons_Meson_hpp_
|
||||
#define Hadrons_Meson_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -38,56 +36,32 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
|
||||
Meson contractions
|
||||
-----------------------------
|
||||
|
||||
* options:
|
||||
- q1: input propagator 1 (string)
|
||||
- q2: input propagator 2 (string)
|
||||
- gammas: gamma products to insert at sink & source, pairs of gamma matrices
|
||||
(space-separated strings) in angled brackets (i.e. <g_sink g_src>),
|
||||
in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>").
|
||||
|
||||
Special values: "all" - perform all possible contractions.
|
||||
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."),
|
||||
given as multiples of (2*pi) / L.
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* TMeson *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair;
|
||||
|
||||
class MesonPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, gammas,
|
||||
std::string, sink,
|
||||
std::string, output);
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, output,
|
||||
Gamma::Algebra, gammaSource,
|
||||
Gamma::Algebra, gammaSink);
|
||||
};
|
||||
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
class TMeson: public Module<MesonPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl1, 1);
|
||||
FERM_TYPE_ALIASES(FImpl2, 2);
|
||||
FERM_TYPE_ALIASES(ScalarImplCR, Scalar);
|
||||
SINK_TYPE_ALIASES(Scalar);
|
||||
TYPE_ALIASES(FImpl1, 1);
|
||||
TYPE_ALIASES(FImpl2, 2);
|
||||
class Result: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
Gamma::Algebra, gamma_snk,
|
||||
Gamma::Algebra, gamma_src,
|
||||
std::vector<Complex>, corr);
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
|
||||
};
|
||||
public:
|
||||
// constructor
|
||||
@ -97,7 +71,6 @@ public:
|
||||
// dependencies/products
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
virtual void parseGammaString(std::vector<GammaPair> &gammaList);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
@ -117,7 +90,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name)
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> input = {par().q1, par().q2, par().sink};
|
||||
std::vector<std::string> input = {par().q1, par().q2};
|
||||
|
||||
return input;
|
||||
}
|
||||
@ -130,35 +103,7 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
|
||||
return output;
|
||||
}
|
||||
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
|
||||
{
|
||||
gammaList.clear();
|
||||
// Determine gamma matrices to insert at source/sink.
|
||||
if (par().gammas.compare("all") == 0)
|
||||
{
|
||||
// Do all contractions.
|
||||
for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
|
||||
{
|
||||
for (unsigned int j = 1; j < Gamma::nGamma; j += 2)
|
||||
{
|
||||
gammaList.push_back(std::make_pair((Gamma::Algebra)i,
|
||||
(Gamma::Algebra)j));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Parse individual contractions from input string.
|
||||
gammaList = strToVec<GammaPair>(par().gammas);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
#define mesonConnected(q1, q2, gSnk, gSrc) \
|
||||
(g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2)
|
||||
|
||||
template <typename FImpl1, typename FImpl2>
|
||||
void TMeson<FImpl1, FImpl2>::execute(void)
|
||||
{
|
||||
@ -166,73 +111,21 @@ void TMeson<FImpl1, FImpl2>::execute(void)
|
||||
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'"
|
||||
<< std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
std::vector<TComplex> buf;
|
||||
std::vector<Result> result;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
std::vector<GammaPair> gammaList;
|
||||
int nt = env().getDim(Tp);
|
||||
XmlWriter writer(par().output);
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
LatticeComplex c(env().getGrid());
|
||||
Gamma gSrc(par().gammaSource), gSnk(par().gammaSink);
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
std::vector<TComplex> buf;
|
||||
Result result;
|
||||
|
||||
parseGammaString(gammaList);
|
||||
result.resize(gammaList.size());
|
||||
for (unsigned int i = 0; i < result.size(); ++i)
|
||||
c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
|
||||
sliceSum(c, buf, Tp);
|
||||
result.corr.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result[i].gamma_snk = gammaList[i].first;
|
||||
result[i].gamma_src = gammaList[i].second;
|
||||
result[i].corr.resize(nt);
|
||||
}
|
||||
if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and
|
||||
env().template isObjectOfType<SlicedPropagator2>(par().q2))
|
||||
{
|
||||
SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1);
|
||||
SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2);
|
||||
|
||||
LOG(Message) << "(propagator already sinked)" << std::endl;
|
||||
for (unsigned int i = 0; i < result.size(); ++i)
|
||||
{
|
||||
Gamma gSnk(gammaList[i].first);
|
||||
Gamma gSrc(gammaList[i].second);
|
||||
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc)));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
|
||||
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
|
||||
LatticeComplex c(env().getGrid());
|
||||
|
||||
LOG(Message) << "(using sink '" << par().sink << "')" << std::endl;
|
||||
for (unsigned int i = 0; i < result.size(); ++i)
|
||||
{
|
||||
Gamma gSnk(gammaList[i].first);
|
||||
Gamma gSrc(gammaList[i].second);
|
||||
std::string ns;
|
||||
|
||||
ns = env().getModuleNamespace(env().getObjectModule(par().sink));
|
||||
if (ns == "MSource")
|
||||
{
|
||||
PropagatorField1 &sink =
|
||||
*env().template getObject<PropagatorField1>(par().sink);
|
||||
|
||||
c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink);
|
||||
sliceSum(c, buf, Tp);
|
||||
}
|
||||
else if (ns == "MSink")
|
||||
{
|
||||
SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink);
|
||||
|
||||
c = trace(mesonConnected(q1, q2, gSnk, gSrc));
|
||||
buf = sink(c);
|
||||
}
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result[i].corr[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
}
|
||||
result.corr[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
write(writer, "meson", result);
|
||||
}
|
||||
@ -241,4 +134,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_Meson_hpp_
|
||||
#endif // Hadrons_Meson_hpp_
|
||||
|
@ -1,114 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakHamiltonian_hpp_
|
||||
#define Hadrons_MContraction_WeakHamiltonian_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakHamiltonian *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
/*******************************************************************************
|
||||
* Utilities for contractions involving the Weak Hamiltonian.
|
||||
******************************************************************************/
|
||||
//// Sum and store correlator.
|
||||
#define MAKE_DIAG(exp, buf, res, n)\
|
||||
sliceSum(exp, buf, Tp);\
|
||||
res.name = (n);\
|
||||
res.corr.resize(buf.size());\
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)\
|
||||
{\
|
||||
res.corr[t] = TensorRemove(buf[t]);\
|
||||
}
|
||||
|
||||
//// Contraction of mu index: use 'mu' variable in exp.
|
||||
#define SUM_MU(buf,exp)\
|
||||
buf = zero;\
|
||||
for (unsigned int mu = 0; mu < ndim; ++mu)\
|
||||
{\
|
||||
buf += exp;\
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
i_V = 0,
|
||||
i_A = 1,
|
||||
n_i = 2
|
||||
};
|
||||
|
||||
class WeakHamiltonianPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar,
|
||||
std::string, q1,
|
||||
std::string, q2,
|
||||
std::string, q3,
|
||||
std::string, q4,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
#define MAKE_WEAK_MODULE(modname)\
|
||||
class T##modname: public Module<WeakHamiltonianPar>\
|
||||
{\
|
||||
public:\
|
||||
FERM_TYPE_ALIASES(FIMPL,)\
|
||||
class Result: Serializable\
|
||||
{\
|
||||
public:\
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\
|
||||
std::string, name,\
|
||||
std::vector<Complex>, corr);\
|
||||
};\
|
||||
public:\
|
||||
/* constructor */ \
|
||||
T##modname(const std::string name);\
|
||||
/* destructor */ \
|
||||
virtual ~T##modname(void) = default;\
|
||||
/* dependency relation */ \
|
||||
virtual std::vector<std::string> getInput(void);\
|
||||
virtual std::vector<std::string> getOutput(void);\
|
||||
/* setup */ \
|
||||
virtual void setup(void);\
|
||||
/* execution */ \
|
||||
virtual void execute(void);\
|
||||
std::vector<std::string> VA_label = {"V", "A"};\
|
||||
};\
|
||||
MODULE_REGISTER_NS(modname, T##modname, MContraction);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakHamiltonian_hpp_
|
@ -1,137 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MContraction;
|
||||
|
||||
/*
|
||||
* Weak Hamiltonian current-current contractions, Eye-type.
|
||||
*
|
||||
* These contractions are generated by the Q1 and Q2 operators in the physical
|
||||
* basis (see e.g. Fig 3 of arXiv:1507.03094).
|
||||
*
|
||||
* Schematics: q4 |
|
||||
* /-<-¬ |
|
||||
* / \ | q2 q3
|
||||
* \ / | /----<------*------<----¬
|
||||
* q2 \ / q3 | / /-*-¬ \
|
||||
* /-----<-----* *-----<----¬ | / / \ \
|
||||
* i * H_W * f | i * \ / q4 * f
|
||||
* \ / | \ \->-/ /
|
||||
* \ / | \ /
|
||||
* \---------->---------/ | \----------->----------/
|
||||
* q1 | q1
|
||||
* |
|
||||
* Saucer (S) | Eye (E)
|
||||
*
|
||||
* S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2])
|
||||
* E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2])
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* TWeakHamiltonianEye implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name)
|
||||
: Module<WeakHamiltonianPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TWeakHamiltonianEye::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianEye::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianEye::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"
|
||||
<< getName() << "' using quarks '" << par().q1 << "', '"
|
||||
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
|
||||
<< "'." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
|
||||
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
|
||||
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
|
||||
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
|
||||
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
|
||||
LatticeComplex expbuf(env().getGrid());
|
||||
std::vector<TComplex> corrbuf;
|
||||
std::vector<Result> result(n_eye_diag);
|
||||
unsigned int ndim = env().getNd();
|
||||
|
||||
PropagatorField tmp1(env().getGrid());
|
||||
LatticeComplex tmp2(env().getGrid());
|
||||
std::vector<PropagatorField> S_body(ndim, tmp1);
|
||||
std::vector<PropagatorField> S_loop(ndim, tmp1);
|
||||
std::vector<LatticeComplex> E_body(ndim, tmp2);
|
||||
std::vector<LatticeComplex> E_loop(ndim, tmp2);
|
||||
|
||||
// Setup for S-type contractions.
|
||||
for (int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu]));
|
||||
S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu]));
|
||||
}
|
||||
|
||||
// Perform S-type contractions.
|
||||
SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu]))
|
||||
MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S")
|
||||
|
||||
// Recycle sub-expressions for E-type contractions.
|
||||
for (unsigned int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
E_body[mu] = trace(S_body[mu]);
|
||||
E_loop[mu] = trace(S_loop[mu]);
|
||||
}
|
||||
|
||||
// Perform E-type contractions.
|
||||
SUM_MU(expbuf, E_body[mu]*E_loop[mu])
|
||||
MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E")
|
||||
|
||||
write(writer, "HW_Eye", result);
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_
|
||||
#define Hadrons_MContraction_WeakHamiltonianEye_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakHamiltonianEye *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
enum
|
||||
{
|
||||
S_diag = 0,
|
||||
E_diag = 1,
|
||||
n_eye_diag = 2
|
||||
};
|
||||
|
||||
// Saucer and Eye subdiagram contractions.
|
||||
#define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma)
|
||||
#define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma)
|
||||
|
||||
MAKE_WEAK_MODULE(WeakHamiltonianEye)
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_
|
@ -1,139 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MContraction;
|
||||
|
||||
/*
|
||||
* Weak Hamiltonian current-current contractions, Non-Eye-type.
|
||||
*
|
||||
* These contractions are generated by the Q1 and Q2 operators in the physical
|
||||
* basis (see e.g. Fig 3 of arXiv:1507.03094).
|
||||
*
|
||||
* Schematic:
|
||||
* q2 q3 | q2 q3
|
||||
* /--<--¬ /--<--¬ | /--<--¬ /--<--¬
|
||||
* / \ / \ | / \ / \
|
||||
* / \ / \ | / \ / \
|
||||
* / \ / \ | / \ / \
|
||||
* i * * H_W * f | i * * * H_W * f
|
||||
* \ * | | \ / \ /
|
||||
* \ / \ / | \ / \ /
|
||||
* \ / \ / | \ / \ /
|
||||
* \ / \ / | \-->--/ \-->--/
|
||||
* \-->--/ \-->--/ | q1 q4
|
||||
* q1 q4 |
|
||||
* Connected (C) | Wing (W)
|
||||
*
|
||||
* C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu])
|
||||
* W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu])
|
||||
*
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* TWeakHamiltonianNonEye implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name)
|
||||
: Module<WeakHamiltonianPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TWeakHamiltonianNonEye::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianNonEye::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TWeakHamiltonianNonEye::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"
|
||||
<< getName() << "' using quarks '" << par().q1 << "', '"
|
||||
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
|
||||
<< "'." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
|
||||
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
|
||||
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
|
||||
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
|
||||
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
|
||||
LatticeComplex expbuf(env().getGrid());
|
||||
std::vector<TComplex> corrbuf;
|
||||
std::vector<Result> result(n_noneye_diag);
|
||||
unsigned int ndim = env().getNd();
|
||||
|
||||
PropagatorField tmp1(env().getGrid());
|
||||
LatticeComplex tmp2(env().getGrid());
|
||||
std::vector<PropagatorField> C_i_side_loop(ndim, tmp1);
|
||||
std::vector<PropagatorField> C_f_side_loop(ndim, tmp1);
|
||||
std::vector<LatticeComplex> W_i_side_loop(ndim, tmp2);
|
||||
std::vector<LatticeComplex> W_f_side_loop(ndim, tmp2);
|
||||
|
||||
// Setup for C-type contractions.
|
||||
for (int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu]));
|
||||
C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu]));
|
||||
}
|
||||
|
||||
// Perform C-type contractions.
|
||||
SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu]))
|
||||
MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C")
|
||||
|
||||
// Recycle sub-expressions for W-type contractions.
|
||||
for (unsigned int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
W_i_side_loop[mu] = trace(C_i_side_loop[mu]);
|
||||
W_f_side_loop[mu] = trace(C_f_side_loop[mu]);
|
||||
}
|
||||
|
||||
// Perform W-type contractions.
|
||||
SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu])
|
||||
MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W")
|
||||
|
||||
write(writer, "HW_NonEye", result);
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
|
||||
#define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakHamiltonianNonEye *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
enum
|
||||
{
|
||||
W_diag = 0,
|
||||
C_diag = 1,
|
||||
n_noneye_diag = 2
|
||||
};
|
||||
|
||||
// Wing and Connected subdiagram contractions
|
||||
#define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
|
||||
|
||||
MAKE_WEAK_MODULE(WeakHamiltonianNonEye)
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
|
@ -1,135 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MContraction;
|
||||
|
||||
/*
|
||||
* Weak Hamiltonian + current contractions, disconnected topology for neutral
|
||||
* mesons.
|
||||
*
|
||||
* These contractions are generated by operators Q_1,...,10 of the dS=1 Weak
|
||||
* Hamiltonian in the physical basis and an additional current J (see e.g.
|
||||
* Fig 11 of arXiv:1507.03094).
|
||||
*
|
||||
* Schematic:
|
||||
*
|
||||
* q2 q4 q3
|
||||
* /--<--¬ /---<--¬ /---<--¬
|
||||
* / \ / \ / \
|
||||
* i * * H_W | J * * f
|
||||
* \ / \ / \ /
|
||||
* \--->---/ \-------/ \------/
|
||||
* q1
|
||||
*
|
||||
* options
|
||||
* - q1: input propagator 1 (string)
|
||||
* - q2: input propagator 2 (string)
|
||||
* - q3: input propagator 3 (string), assumed to be sequential propagator
|
||||
* - q4: input propagator 4 (string), assumed to be a loop
|
||||
*
|
||||
* type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5)
|
||||
* type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5)
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* TWeakNeutral4ptDisc implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name)
|
||||
: Module<WeakHamiltonianPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TWeakNeutral4ptDisc::setup(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TWeakNeutral4ptDisc::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"
|
||||
<< getName() << "' using quarks '" << par().q1 << "', '"
|
||||
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
|
||||
<< "'." << std::endl;
|
||||
|
||||
CorrWriter writer(par().output);
|
||||
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
|
||||
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
|
||||
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
|
||||
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
|
||||
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
|
||||
LatticeComplex expbuf(env().getGrid());
|
||||
std::vector<TComplex> corrbuf;
|
||||
std::vector<Result> result(n_neut_disc_diag);
|
||||
unsigned int ndim = env().getNd();
|
||||
|
||||
PropagatorField tmp(env().getGrid());
|
||||
std::vector<PropagatorField> meson(ndim, tmp);
|
||||
std::vector<PropagatorField> loop(ndim, tmp);
|
||||
LatticeComplex curr(env().getGrid());
|
||||
|
||||
// Setup for type 1 contractions.
|
||||
for (int mu = 0; mu < ndim; ++mu)
|
||||
{
|
||||
meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu]));
|
||||
loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu]));
|
||||
}
|
||||
curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5));
|
||||
|
||||
// Perform type 1 contractions.
|
||||
SUM_MU(expbuf, trace(meson[mu]*loop[mu]))
|
||||
expbuf *= curr;
|
||||
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1")
|
||||
|
||||
// Perform type 2 contractions.
|
||||
SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu]))
|
||||
expbuf *= curr;
|
||||
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2")
|
||||
|
||||
write(writer, "HW_disc0", result);
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
|
||||
#define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* WeakNeutral4ptDisc *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MContraction)
|
||||
|
||||
enum
|
||||
{
|
||||
neut_disc_1_diag = 0,
|
||||
neut_disc_2_diag = 1,
|
||||
n_neut_disc_diag = 2
|
||||
};
|
||||
|
||||
// Neutral 4pt disconnected subdiagram contractions.
|
||||
#define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
|
||||
#define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma)
|
||||
#define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma))
|
||||
|
||||
MAKE_WEAK_MODULE(WeakNeutral4ptDisc)
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
|
@ -65,7 +65,7 @@ void TLoad::setup(void)
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TLoad::execute(void)
|
||||
{
|
||||
FieldMetaData header;
|
||||
NerscField header;
|
||||
std::string fileName = par().file + "."
|
||||
+ std::to_string(env().getTrajectory());
|
||||
|
||||
@ -74,5 +74,5 @@ void TLoad::execute(void)
|
||||
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
|
||||
NerscIO::readConfiguration(U, header, fileName);
|
||||
LOG(Message) << "NERSC header:" << std::endl;
|
||||
dump_meta_data(header, LOG(Message));
|
||||
dump_nersc_header(header, LOG(Message));
|
||||
}
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MGauge_Load_hpp_
|
||||
#define Hadrons_MGauge_Load_hpp_
|
||||
#ifndef Hadrons_Load_hpp_
|
||||
#define Hadrons_Load_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -70,4 +70,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MGauge_Load_hpp_
|
||||
#endif // Hadrons_Load_hpp_
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MGauge_Random_hpp_
|
||||
#define Hadrons_MGauge_Random_hpp_
|
||||
#ifndef Hadrons_Random_hpp_
|
||||
#define Hadrons_Random_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MGauge_Random_hpp_
|
||||
#endif // Hadrons_Random_hpp_
|
||||
|
@ -1,88 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MGauge/StochEm.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MGauge;
|
||||
|
||||
/******************************************************************************
|
||||
* TStochEm implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TStochEm::TStochEm(const std::string name)
|
||||
: Module<StochEmPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TStochEm::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in;
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TStochEm::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TStochEm::setup(void)
|
||||
{
|
||||
if (!env().hasRegisteredObject("_" + getName() + "_weight"))
|
||||
{
|
||||
env().registerLattice<EmComp>("_" + getName() + "_weight");
|
||||
}
|
||||
env().registerLattice<EmField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TStochEm::execute(void)
|
||||
{
|
||||
PhotonR photon(par().gauge, par().zmScheme);
|
||||
EmField &a = *env().createLattice<EmField>(getName());
|
||||
EmComp *w;
|
||||
|
||||
if (!env().hasCreatedObject("_" + getName() + "_weight"))
|
||||
{
|
||||
LOG(Message) << "Caching stochatic EM potential weight (gauge: "
|
||||
<< par().gauge << ", zero-mode scheme: "
|
||||
<< par().zmScheme << ")..." << std::endl;
|
||||
w = env().createLattice<EmComp>("_" + getName() + "_weight");
|
||||
photon.StochasticWeight(*w);
|
||||
}
|
||||
else
|
||||
{
|
||||
w = env().getObject<EmComp>("_" + getName() + "_weight");
|
||||
}
|
||||
LOG(Message) << "Generating stochatic EM potential..." << std::endl;
|
||||
photon.StochasticField(a, *env().get4dRng(), *w);
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef Hadrons_MGauge_StochEm_hpp_
|
||||
#define Hadrons_MGauge_StochEm_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* StochEm *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MGauge)
|
||||
|
||||
class StochEmPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar,
|
||||
PhotonR::Gauge, gauge,
|
||||
PhotonR::ZmScheme, zmScheme);
|
||||
};
|
||||
|
||||
class TStochEm: public Module<StochEmPar>
|
||||
{
|
||||
public:
|
||||
typedef PhotonR::GaugeField EmField;
|
||||
typedef PhotonR::GaugeLinkField EmComp;
|
||||
public:
|
||||
// constructor
|
||||
TStochEm(const std::string name);
|
||||
// destructor
|
||||
virtual ~TStochEm(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(StochEm, TStochEm, MGauge);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MGauge_StochEm_hpp_
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MGauge_Unit_hpp_
|
||||
#define Hadrons_MGauge_Unit_hpp_
|
||||
#ifndef Hadrons_Unit_hpp_
|
||||
#define Hadrons_Unit_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MGauge_Unit_hpp_
|
||||
#endif // Hadrons_Unit_hpp_
|
||||
|
@ -1,132 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MLoop_NoiseLoop_hpp_
|
||||
#define Hadrons_MLoop_NoiseLoop_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
|
||||
Noise loop propagator
|
||||
-----------------------------
|
||||
* loop_x = q_x * adj(eta_x)
|
||||
|
||||
* options:
|
||||
- q = Result of inversion on noise source.
|
||||
- eta = noise source.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
* NoiseLoop *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MLoop)
|
||||
|
||||
class NoiseLoopPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar,
|
||||
std::string, q,
|
||||
std::string, eta);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TNoiseLoop: public Module<NoiseLoopPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TNoiseLoop(const std::string name);
|
||||
// destructor
|
||||
virtual ~TNoiseLoop(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop);
|
||||
|
||||
/******************************************************************************
|
||||
* TNoiseLoop implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TNoiseLoop<FImpl>::TNoiseLoop(const std::string name)
|
||||
: Module<NoiseLoopPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TNoiseLoop<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().q, par().eta};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TNoiseLoop<FImpl>::setup(void)
|
||||
{
|
||||
env().template registerLattice<PropagatorField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TNoiseLoop<FImpl>::execute(void)
|
||||
{
|
||||
PropagatorField &loop = *env().template createLattice<PropagatorField>(getName());
|
||||
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
|
||||
PropagatorField &eta = *env().template getObject<PropagatorField>(par().eta);
|
||||
loop = q*adj(eta);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MLoop_NoiseLoop_hpp_
|
@ -1,226 +0,0 @@
|
||||
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MScalar;
|
||||
|
||||
/******************************************************************************
|
||||
* TChargedProp implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TChargedProp::TChargedProp(const std::string name)
|
||||
: Module<ChargedPropPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TChargedProp::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().source, par().emField};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TChargedProp::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TChargedProp::setup(void)
|
||||
{
|
||||
freeMomPropName_ = FREEMOMPROP(par().mass);
|
||||
phaseName_.clear();
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
phaseName_.push_back("_shiftphase_" + std::to_string(mu));
|
||||
}
|
||||
GFSrcName_ = "_" + getName() + "_DinvSrc";
|
||||
if (!env().hasRegisteredObject(freeMomPropName_))
|
||||
{
|
||||
env().registerLattice<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
if (!env().hasRegisteredObject(phaseName_[0]))
|
||||
{
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
env().registerLattice<ScalarField>(phaseName_[mu]);
|
||||
}
|
||||
}
|
||||
if (!env().hasRegisteredObject(GFSrcName_))
|
||||
{
|
||||
env().registerLattice<ScalarField>(GFSrcName_);
|
||||
}
|
||||
env().registerLattice<ScalarField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TChargedProp::execute(void)
|
||||
{
|
||||
// CACHING ANALYTIC EXPRESSIONS
|
||||
ScalarField &source = *env().getObject<ScalarField>(par().source);
|
||||
Complex ci(0.0,1.0);
|
||||
FFT fft(env().getGrid());
|
||||
|
||||
// cache free scalar propagator
|
||||
if (!env().hasCreatedObject(freeMomPropName_))
|
||||
{
|
||||
LOG(Message) << "Caching momentum space free scalar propagator"
|
||||
<< " (mass= " << par().mass << ")..." << std::endl;
|
||||
freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_);
|
||||
SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass);
|
||||
}
|
||||
else
|
||||
{
|
||||
freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
// cache G*F*src
|
||||
if (!env().hasCreatedObject(GFSrcName_))
|
||||
|
||||
{
|
||||
GFSrc_ = env().createLattice<ScalarField>(GFSrcName_);
|
||||
fft.FFT_all_dim(*GFSrc_, source, FFT::forward);
|
||||
*GFSrc_ = (*freeMomProp_)*(*GFSrc_);
|
||||
}
|
||||
else
|
||||
{
|
||||
GFSrc_ = env().getObject<ScalarField>(GFSrcName_);
|
||||
}
|
||||
// cache phases
|
||||
if (!env().hasCreatedObject(phaseName_[0]))
|
||||
{
|
||||
std::vector<int> &l = env().getGrid()->_fdimensions;
|
||||
|
||||
LOG(Message) << "Caching shift phases..." << std::endl;
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Real twoPiL = M_PI*2./l[mu];
|
||||
|
||||
phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu]));
|
||||
LatticeCoordinate(*(phase_[mu]), mu);
|
||||
*(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu])));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
phase_.push_back(env().getObject<ScalarField>(phaseName_[mu]));
|
||||
}
|
||||
}
|
||||
|
||||
// PROPAGATOR CALCULATION
|
||||
LOG(Message) << "Computing charged scalar propagator"
|
||||
<< " (mass= " << par().mass
|
||||
<< ", charge= " << par().charge << ")..." << std::endl;
|
||||
|
||||
ScalarField &prop = *env().createLattice<ScalarField>(getName());
|
||||
ScalarField buf(env().getGrid());
|
||||
ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_;
|
||||
double q = par().charge;
|
||||
|
||||
// G*F*Src
|
||||
prop = GFSrc;
|
||||
|
||||
// - q*G*momD1*G*F*Src (momD1 = F*D1*Finv)
|
||||
buf = GFSrc;
|
||||
momD1(buf, fft);
|
||||
buf = G*buf;
|
||||
prop = prop - q*buf;
|
||||
|
||||
// + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src)
|
||||
momD1(buf, fft);
|
||||
prop = prop + q*q*G*buf;
|
||||
|
||||
// - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv)
|
||||
buf = GFSrc;
|
||||
momD2(buf, fft);
|
||||
prop = prop - q*q*G*buf;
|
||||
|
||||
// final FT
|
||||
fft.FFT_all_dim(prop, prop, FFT::backward);
|
||||
|
||||
// OUTPUT IF NECESSARY
|
||||
if (!par().output.empty())
|
||||
{
|
||||
std::string filename = par().output + "." +
|
||||
std::to_string(env().getTrajectory());
|
||||
|
||||
LOG(Message) << "Saving zero-momentum projection to '"
|
||||
<< filename << "'..." << std::endl;
|
||||
|
||||
CorrWriter writer(filename);
|
||||
std::vector<TComplex> vecBuf;
|
||||
std::vector<Complex> result;
|
||||
|
||||
sliceSum(prop, vecBuf, Tp);
|
||||
result.resize(vecBuf.size());
|
||||
for (unsigned int t = 0; t < vecBuf.size(); ++t)
|
||||
{
|
||||
result[t] = TensorRemove(vecBuf[t]);
|
||||
}
|
||||
write(writer, "charge", q);
|
||||
write(writer, "prop", result);
|
||||
}
|
||||
}
|
||||
|
||||
void TChargedProp::momD1(ScalarField &s, FFT &fft)
|
||||
{
|
||||
EmField &A = *env().getObject<EmField>(par().emField);
|
||||
ScalarField buf(env().getGrid()), result(env().getGrid()),
|
||||
Amu(env().getGrid());
|
||||
Complex ci(0.0,1.0);
|
||||
|
||||
result = zero;
|
||||
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = (*phase_[mu])*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::backward);
|
||||
buf = Amu*buf;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result - ci*buf;
|
||||
}
|
||||
fft.FFT_all_dim(s, s, FFT::backward);
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = Amu*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result + ci*adj(*phase_[mu])*buf;
|
||||
}
|
||||
|
||||
s = result;
|
||||
}
|
||||
|
||||
void TChargedProp::momD2(ScalarField &s, FFT &fft)
|
||||
{
|
||||
EmField &A = *env().getObject<EmField>(par().emField);
|
||||
ScalarField buf(env().getGrid()), result(env().getGrid()),
|
||||
Amu(env().getGrid());
|
||||
|
||||
result = zero;
|
||||
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = (*phase_[mu])*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::backward);
|
||||
buf = Amu*Amu*buf;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result + .5*buf;
|
||||
}
|
||||
fft.FFT_all_dim(s, s, FFT::backward);
|
||||
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
|
||||
{
|
||||
Amu = peekLorentz(A, mu);
|
||||
buf = Amu*Amu*s;
|
||||
fft.FFT_all_dim(buf, buf, FFT::forward);
|
||||
result = result + .5*adj(*phase_[mu])*buf;
|
||||
}
|
||||
|
||||
s = result;
|
||||
}
|
@ -1,61 +0,0 @@
|
||||
#ifndef Hadrons_MScalar_ChargedProp_hpp_
|
||||
#define Hadrons_MScalar_ChargedProp_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* Charged scalar propagator *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MScalar)
|
||||
|
||||
class ChargedPropPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar,
|
||||
std::string, emField,
|
||||
std::string, source,
|
||||
double, mass,
|
||||
double, charge,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
class TChargedProp: public Module<ChargedPropPar>
|
||||
{
|
||||
public:
|
||||
SCALAR_TYPE_ALIASES(SIMPL,);
|
||||
typedef PhotonR::GaugeField EmField;
|
||||
typedef PhotonR::GaugeLinkField EmComp;
|
||||
public:
|
||||
// constructor
|
||||
TChargedProp(const std::string name);
|
||||
// destructor
|
||||
virtual ~TChargedProp(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
private:
|
||||
void momD1(ScalarField &s, FFT &fft);
|
||||
void momD2(ScalarField &s, FFT &fft);
|
||||
private:
|
||||
std::string freeMomPropName_, GFSrcName_;
|
||||
std::vector<std::string> phaseName_;
|
||||
ScalarField *freeMomProp_, *GFSrc_;
|
||||
std::vector<ScalarField *> phase_;
|
||||
EmField *A;
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MScalar_ChargedProp_hpp_
|
@ -1,79 +0,0 @@
|
||||
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
|
||||
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Hadrons;
|
||||
using namespace MScalar;
|
||||
|
||||
/******************************************************************************
|
||||
* TFreeProp implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
TFreeProp::TFreeProp(const std::string name)
|
||||
: Module<FreePropPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
std::vector<std::string> TFreeProp::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().source};
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::vector<std::string> TFreeProp::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
void TFreeProp::setup(void)
|
||||
{
|
||||
freeMomPropName_ = FREEMOMPROP(par().mass);
|
||||
|
||||
if (!env().hasRegisteredObject(freeMomPropName_))
|
||||
{
|
||||
env().registerLattice<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
env().registerLattice<ScalarField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
void TFreeProp::execute(void)
|
||||
{
|
||||
ScalarField &prop = *env().createLattice<ScalarField>(getName());
|
||||
ScalarField &source = *env().getObject<ScalarField>(par().source);
|
||||
ScalarField *freeMomProp;
|
||||
|
||||
if (!env().hasCreatedObject(freeMomPropName_))
|
||||
{
|
||||
LOG(Message) << "Caching momentum space free scalar propagator"
|
||||
<< " (mass= " << par().mass << ")..." << std::endl;
|
||||
freeMomProp = env().createLattice<ScalarField>(freeMomPropName_);
|
||||
SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass);
|
||||
}
|
||||
else
|
||||
{
|
||||
freeMomProp = env().getObject<ScalarField>(freeMomPropName_);
|
||||
}
|
||||
LOG(Message) << "Computing free scalar propagator..." << std::endl;
|
||||
SIMPL::FreePropagator(source, prop, *freeMomProp);
|
||||
|
||||
if (!par().output.empty())
|
||||
{
|
||||
TextWriter writer(par().output + "." +
|
||||
std::to_string(env().getTrajectory()));
|
||||
std::vector<TComplex> buf;
|
||||
std::vector<Complex> result;
|
||||
|
||||
sliceSum(prop, buf, Tp);
|
||||
result.resize(buf.size());
|
||||
for (unsigned int t = 0; t < buf.size(); ++t)
|
||||
{
|
||||
result[t] = TensorRemove(buf[t]);
|
||||
}
|
||||
write(writer, "prop", result);
|
||||
}
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
#ifndef Hadrons_MScalar_FreeProp_hpp_
|
||||
#define Hadrons_MScalar_FreeProp_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* FreeProp *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MScalar)
|
||||
|
||||
class FreePropPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar,
|
||||
std::string, source,
|
||||
double, mass,
|
||||
std::string, output);
|
||||
};
|
||||
|
||||
class TFreeProp: public Module<FreePropPar>
|
||||
{
|
||||
public:
|
||||
SCALAR_TYPE_ALIASES(SIMPL,);
|
||||
public:
|
||||
// constructor
|
||||
TFreeProp(const std::string name);
|
||||
// destructor
|
||||
virtual ~TFreeProp(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
private:
|
||||
std::string freeMomPropName_;
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar);
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MScalar_FreeProp_hpp_
|
@ -1,6 +0,0 @@
|
||||
#ifndef Hadrons_Scalar_hpp_
|
||||
#define Hadrons_Scalar_hpp_
|
||||
|
||||
#define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m)
|
||||
|
||||
#endif // Hadrons_Scalar_hpp_
|
@ -1,114 +0,0 @@
|
||||
#ifndef Hadrons_MSink_Point_hpp_
|
||||
#define Hadrons_MSink_Point_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* Point *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MSink)
|
||||
|
||||
class PointPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar,
|
||||
std::string, mom);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TPoint: public Module<PointPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
SINK_TYPE_ALIASES();
|
||||
public:
|
||||
// constructor
|
||||
TPoint(const std::string name);
|
||||
// destructor
|
||||
virtual ~TPoint(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSink);
|
||||
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink);
|
||||
|
||||
/******************************************************************************
|
||||
* TPoint implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TPoint<FImpl>::TPoint(const std::string name)
|
||||
: Module<PointPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TPoint<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in;
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TPoint<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TPoint<FImpl>::setup(void)
|
||||
{
|
||||
unsigned int size;
|
||||
|
||||
size = env().template lattice4dSize<LatticeComplex>();
|
||||
env().registerObject(getName(), size);
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TPoint<FImpl>::execute(void)
|
||||
{
|
||||
std::vector<Real> p = strToVec<Real>(par().mom);
|
||||
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
|
||||
Complex i(0.0,1.0);
|
||||
|
||||
LOG(Message) << "Setting up point sink function for momentum ["
|
||||
<< par().mom << "]" << std::endl;
|
||||
ph = zero;
|
||||
for(unsigned int mu = 0; mu < env().getNd(); mu++)
|
||||
{
|
||||
LatticeCoordinate(coor, mu);
|
||||
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
|
||||
}
|
||||
ph = exp((Real)(2*M_PI)*i*ph);
|
||||
auto sink = [ph](const PropagatorField &field)
|
||||
{
|
||||
SlicedPropagator res;
|
||||
PropagatorField tmp = ph*field;
|
||||
|
||||
sliceSum(tmp, res, Tp);
|
||||
|
||||
return res;
|
||||
};
|
||||
env().setObject(getName(), new SinkFn(sink));
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSink_Point_hpp_
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MSolver_RBPrecCG_hpp_
|
||||
#define Hadrons_MSolver_RBPrecCG_hpp_
|
||||
#ifndef Hadrons_RBPrecCG_hpp_
|
||||
#define Hadrons_RBPrecCG_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -53,7 +53,7 @@ template <typename FImpl>
|
||||
class TRBPrecCG: public Module<RBPrecCGPar>
|
||||
{
|
||||
public:
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TRBPrecCG(const std::string name);
|
||||
@ -129,4 +129,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSolver_RBPrecCG_hpp_
|
||||
#endif // Hadrons_RBPrecCG_hpp_
|
||||
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MSource_Point_hpp_
|
||||
#define Hadrons_MSource_Point_hpp_
|
||||
#ifndef Hadrons_Point_hpp_
|
||||
#define Hadrons_Point_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -63,7 +63,7 @@ template <typename FImpl>
|
||||
class TPoint: public Module<PointPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TPoint(const std::string name);
|
||||
@ -78,8 +78,7 @@ public:
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
|
||||
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource);
|
||||
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
|
||||
|
||||
/******************************************************************************
|
||||
* TPoint template implementation *
|
||||
@ -133,4 +132,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSource_Point_hpp_
|
||||
#endif // Hadrons_Point_hpp_
|
||||
|
@ -6,7 +6,6 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
|
||||
@ -28,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MSource_SeqGamma_hpp_
|
||||
#define Hadrons_MSource_SeqGamma_hpp_
|
||||
#ifndef Hadrons_SeqGamma_hpp_
|
||||
#define Hadrons_SeqGamma_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -72,7 +71,7 @@ template <typename FImpl>
|
||||
class TSeqGamma: public Module<SeqGammaPar>
|
||||
{
|
||||
public:
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TSeqGamma(const std::string name);
|
||||
@ -150,9 +149,9 @@ void TSeqGamma<FImpl>::execute(void)
|
||||
for(unsigned int mu = 0; mu < env().getNd(); mu++)
|
||||
{
|
||||
LatticeCoordinate(coor, mu);
|
||||
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
|
||||
ph = ph + p[mu]*coor;
|
||||
}
|
||||
ph = exp((Real)(2*M_PI)*i*ph);
|
||||
ph = exp(i*ph);
|
||||
LatticeCoordinate(t, Tp);
|
||||
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
|
||||
}
|
||||
@ -161,4 +160,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSource_SeqGamma_hpp_
|
||||
#endif // Hadrons_SeqGamma_hpp_
|
||||
|
@ -1,147 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/MSource/Wall.hpp
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MSource_WallSource_hpp_
|
||||
#define Hadrons_MSource_WallSource_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/*
|
||||
|
||||
Wall source
|
||||
-----------------------------
|
||||
* src_x = delta(x_3 - tW) * exp(i x.mom)
|
||||
|
||||
* options:
|
||||
- tW: source timeslice (integer)
|
||||
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
|
||||
|
||||
*/
|
||||
|
||||
/******************************************************************************
|
||||
* Wall *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MSource)
|
||||
|
||||
class WallPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar,
|
||||
unsigned int, tW,
|
||||
std::string, mom);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TWall: public Module<WallPar>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TWall(const std::string name);
|
||||
// destructor
|
||||
virtual ~TWall(void) = default;
|
||||
// dependency relation
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
|
||||
|
||||
/******************************************************************************
|
||||
* TWall implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TWall<FImpl>::TWall(const std::string name)
|
||||
: Module<WallPar>(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TWall<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in;
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TWall<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName()};
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TWall<FImpl>::setup(void)
|
||||
{
|
||||
env().template registerLattice<PropagatorField>(getName());
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TWall<FImpl>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Generating wall source at t = " << par().tW
|
||||
<< " with momentum " << par().mom << std::endl;
|
||||
|
||||
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
|
||||
Lattice<iScalar<vInteger>> t(env().getGrid());
|
||||
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
|
||||
std::vector<Real> p;
|
||||
Complex i(0.0,1.0);
|
||||
|
||||
p = strToVec<Real>(par().mom);
|
||||
ph = zero;
|
||||
for(unsigned int mu = 0; mu < Nd; mu++)
|
||||
{
|
||||
LatticeCoordinate(coor, mu);
|
||||
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
|
||||
}
|
||||
ph = exp((Real)(2*M_PI)*i*ph);
|
||||
LatticeCoordinate(t, Tp);
|
||||
src = 1.;
|
||||
src = where((t == par().tW), src*ph, 0.*src);
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSource_WallSource_hpp_
|
@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_MSource_Z2_hpp_
|
||||
#define Hadrons_MSource_Z2_hpp_
|
||||
#ifndef Hadrons_Z2_hpp_
|
||||
#define Hadrons_Z2_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -67,7 +67,7 @@ template <typename FImpl>
|
||||
class TZ2: public Module<Z2Par>
|
||||
{
|
||||
public:
|
||||
FERM_TYPE_ALIASES(FImpl,);
|
||||
TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TZ2(const std::string name);
|
||||
@ -82,8 +82,7 @@ public:
|
||||
virtual void execute(void);
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
|
||||
MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource);
|
||||
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
|
||||
|
||||
/******************************************************************************
|
||||
* TZ2 template implementation *
|
||||
@ -149,4 +148,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MSource_Z2_hpp_
|
||||
#endif // Hadrons_Z2_hpp_
|
||||
|
@ -1,5 +1,34 @@
|
||||
#ifndef Hadrons_MFermion_GaugeProp_hpp_
|
||||
#define Hadrons_MFermion_GaugeProp_hpp_
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: extras/Hadrons/Modules/Quark.hpp
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Hadrons_Quark_hpp_
|
||||
#define Hadrons_Quark_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -8,29 +37,27 @@
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
/******************************************************************************
|
||||
* GaugeProp *
|
||||
* TQuark *
|
||||
******************************************************************************/
|
||||
BEGIN_MODULE_NAMESPACE(MFermion)
|
||||
|
||||
class GaugePropPar: Serializable
|
||||
class QuarkPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar,
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar,
|
||||
std::string, source,
|
||||
std::string, solver);
|
||||
};
|
||||
|
||||
template <typename FImpl>
|
||||
class TGaugeProp: public Module<GaugePropPar>
|
||||
class TQuark: public Module<QuarkPar>
|
||||
{
|
||||
public:
|
||||
FGS_TYPE_ALIASES(FImpl,);
|
||||
TYPE_ALIASES(FImpl,);
|
||||
public:
|
||||
// constructor
|
||||
TGaugeProp(const std::string name);
|
||||
TQuark(const std::string name);
|
||||
// destructor
|
||||
virtual ~TGaugeProp(void) = default;
|
||||
// dependency relation
|
||||
virtual ~TQuark(void) = default;
|
||||
// dependencies/products
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
// setup
|
||||
@ -42,20 +69,20 @@ private:
|
||||
SolverFn *solver_{nullptr};
|
||||
};
|
||||
|
||||
MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion);
|
||||
MODULE_REGISTER(Quark, TQuark<FIMPL>);
|
||||
|
||||
/******************************************************************************
|
||||
* TGaugeProp implementation *
|
||||
* TQuark implementation *
|
||||
******************************************************************************/
|
||||
// constructor /////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
TGaugeProp<FImpl>::TGaugeProp(const std::string name)
|
||||
: Module<GaugePropPar>(name)
|
||||
TQuark<FImpl>::TQuark(const std::string name)
|
||||
: Module(name)
|
||||
{}
|
||||
|
||||
// dependencies/products ///////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TGaugeProp<FImpl>::getInput(void)
|
||||
std::vector<std::string> TQuark<FImpl>::getInput(void)
|
||||
{
|
||||
std::vector<std::string> in = {par().source, par().solver};
|
||||
|
||||
@ -63,7 +90,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getInput(void)
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
std::vector<std::string> TGaugeProp<FImpl>::getOutput(void)
|
||||
std::vector<std::string> TQuark<FImpl>::getOutput(void)
|
||||
{
|
||||
std::vector<std::string> out = {getName(), getName() + "_5d"};
|
||||
|
||||
@ -72,7 +99,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getOutput(void)
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TGaugeProp<FImpl>::setup(void)
|
||||
void TQuark<FImpl>::setup(void)
|
||||
{
|
||||
Ls_ = env().getObjectLs(par().solver);
|
||||
env().template registerLattice<PropagatorField>(getName());
|
||||
@ -84,13 +111,13 @@ void TGaugeProp<FImpl>::setup(void)
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TGaugeProp<FImpl>::execute(void)
|
||||
void TQuark<FImpl>::execute(void)
|
||||
{
|
||||
LOG(Message) << "Computing quark propagator '" << getName() << "'"
|
||||
<< std::endl;
|
||||
<< std::endl;
|
||||
|
||||
FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)),
|
||||
tmp(env().getGrid());
|
||||
tmp(env().getGrid());
|
||||
std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d");
|
||||
PropagatorField &prop = *env().template createLattice<PropagatorField>(propName);
|
||||
PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source);
|
||||
@ -101,7 +128,7 @@ void TGaugeProp<FImpl>::execute(void)
|
||||
}
|
||||
|
||||
LOG(Message) << "Inverting using solver '" << par().solver
|
||||
<< "' on source '" << par().source << "'" << std::endl;
|
||||
<< "' on source '" << par().source << "'" << std::endl;
|
||||
for (unsigned int s = 0; s < Ns; ++s)
|
||||
for (unsigned int c = 0; c < Nc; ++c)
|
||||
{
|
||||
@ -143,18 +170,16 @@ void TGaugeProp<FImpl>::execute(void)
|
||||
if (Ls_ > 1)
|
||||
{
|
||||
PropagatorField &p4d =
|
||||
*env().template getObject<PropagatorField>(getName());
|
||||
*env().template getObject<PropagatorField>(getName());
|
||||
|
||||
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
|
||||
axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1);
|
||||
axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
|
||||
ExtractSlice(tmp, sol, 0, 0);
|
||||
FermToProp(p4d, tmp, s, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons_MFermion_GaugeProp_hpp_
|
||||
#endif // Hadrons_Quark_hpp_
|
@ -1,5 +1,5 @@
|
||||
#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#ifndef Hadrons____FILEBASENAME____hpp_
|
||||
#define Hadrons____FILEBASENAME____hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -41,4 +41,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#endif // Hadrons____FILEBASENAME____hpp_
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#ifndef Hadrons____FILEBASENAME____hpp_
|
||||
#define Hadrons____FILEBASENAME____hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
@ -82,4 +82,4 @@ END_MODULE_NAMESPACE
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_
|
||||
#endif // Hadrons____FILEBASENAME____hpp_
|
||||
|
@ -1,38 +1,19 @@
|
||||
modules_cc =\
|
||||
Modules/MContraction/WeakHamiltonianEye.cc \
|
||||
Modules/MContraction/WeakHamiltonianNonEye.cc \
|
||||
Modules/MContraction/WeakNeutral4ptDisc.cc \
|
||||
Modules/MGauge/Load.cc \
|
||||
Modules/MGauge/Random.cc \
|
||||
Modules/MGauge/StochEm.cc \
|
||||
Modules/MGauge/Unit.cc \
|
||||
Modules/MScalar/ChargedProp.cc \
|
||||
Modules/MScalar/FreeProp.cc
|
||||
Modules/MGauge/Unit.cc
|
||||
|
||||
modules_hpp =\
|
||||
Modules/MAction/DWF.hpp \
|
||||
Modules/MAction/Wilson.hpp \
|
||||
Modules/MContraction/Baryon.hpp \
|
||||
Modules/MContraction/DiscLoop.hpp \
|
||||
Modules/MContraction/Gamma3pt.hpp \
|
||||
Modules/MContraction/Meson.hpp \
|
||||
Modules/MContraction/WeakHamiltonian.hpp \
|
||||
Modules/MContraction/WeakHamiltonianEye.hpp \
|
||||
Modules/MContraction/WeakHamiltonianNonEye.hpp \
|
||||
Modules/MContraction/WeakNeutral4ptDisc.hpp \
|
||||
Modules/MFermion/GaugeProp.hpp \
|
||||
Modules/MGauge/Load.hpp \
|
||||
Modules/MGauge/Random.hpp \
|
||||
Modules/MGauge/StochEm.hpp \
|
||||
Modules/MGauge/Unit.hpp \
|
||||
Modules/MLoop/NoiseLoop.hpp \
|
||||
Modules/MScalar/ChargedProp.hpp \
|
||||
Modules/MScalar/FreeProp.hpp \
|
||||
Modules/MScalar/Scalar.hpp \
|
||||
Modules/MSink/Point.hpp \
|
||||
Modules/MSolver/RBPrecCG.hpp \
|
||||
Modules/MSource/Point.hpp \
|
||||
Modules/MSource/SeqGamma.hpp \
|
||||
Modules/MSource/Wall.hpp \
|
||||
Modules/MSource/Z2.hpp
|
||||
Modules/MSource/Z2.hpp \
|
||||
Modules/Quark.hpp
|
||||
|
||||
|
@ -1,11 +0,0 @@
|
||||
#include <qed-fvol/Global.hpp>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace QCD;
|
||||
using namespace QedFVol;
|
||||
|
||||
QedFVolLogger QedFVol::QedFVolLogError(1,"Error");
|
||||
QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning");
|
||||
QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message");
|
||||
QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative");
|
||||
QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug");
|
@ -1,42 +0,0 @@
|
||||
#ifndef QedFVol_Global_hpp_
|
||||
#define QedFVol_Global_hpp_
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#define BEGIN_QEDFVOL_NAMESPACE \
|
||||
namespace Grid {\
|
||||
using namespace QCD;\
|
||||
namespace QedFVol {\
|
||||
using Grid::operator<<;
|
||||
#define END_QEDFVOL_NAMESPACE }}
|
||||
|
||||
/* the 'using Grid::operator<<;' statement prevents a very nasty compilation
|
||||
* error with GCC (clang compiles fine without it).
|
||||
*/
|
||||
|
||||
BEGIN_QEDFVOL_NAMESPACE
|
||||
|
||||
class QedFVolLogger: public Logger
|
||||
{
|
||||
public:
|
||||
QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm,
|
||||
GridLogColours, "BLACK"){};
|
||||
};
|
||||
|
||||
#define LOG(channel) std::cout << QedFVolLog##channel
|
||||
#define QEDFVOL_ERROR(msg)\
|
||||
LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\
|
||||
<< __LINE__ << ")" << std::endl;\
|
||||
abort();
|
||||
|
||||
#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl;
|
||||
|
||||
extern QedFVolLogger QedFVolLogError;
|
||||
extern QedFVolLogger QedFVolLogWarning;
|
||||
extern QedFVolLogger QedFVolLogMessage;
|
||||
extern QedFVolLogger QedFVolLogIterative;
|
||||
extern QedFVolLogger QedFVolLogDebug;
|
||||
|
||||
END_QEDFVOL_NAMESPACE
|
||||
|
||||
#endif // QedFVol_Global_hpp_
|
@ -1,9 +0,0 @@
|
||||
AM_CXXFLAGS += -I$(top_srcdir)/extras
|
||||
|
||||
bin_PROGRAMS = qed-fvol
|
||||
|
||||
qed_fvol_SOURCES = \
|
||||
qed-fvol.cc \
|
||||
Global.cc
|
||||
|
||||
qed_fvol_LDADD = -lGrid
|
@ -1,265 +0,0 @@
|
||||
#ifndef QEDFVOL_WILSONLOOPS_H
|
||||
#define QEDFVOL_WILSONLOOPS_H
|
||||
|
||||
#include <Global.hpp>
|
||||
|
||||
BEGIN_QEDFVOL_NAMESPACE
|
||||
|
||||
template <class Gimpl> class NewWilsonLoops : public Gimpl {
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
typedef typename Gimpl::GaugeLinkField GaugeMat;
|
||||
typedef typename Gimpl::GaugeField GaugeLorentz;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// directed plaquette oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U,
|
||||
const int mu, const int nu) {
|
||||
// Annoyingly, must use either scope resolution to find dependent base
|
||||
// class,
|
||||
// or this-> ; there is no "this" in a static method. This forces explicit
|
||||
// Gimpl scope
|
||||
// resolution throughout the usage in this file, and rather defeats the
|
||||
// purpose of deriving
|
||||
// from Gimpl.
|
||||
plaq = Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftBackward(
|
||||
U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu])));
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// trace of directed plaquette oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void traceDirPlaquette(LatticeComplex &plaq,
|
||||
const std::vector<GaugeMat> &U, const int mu,
|
||||
const int nu) {
|
||||
GaugeMat sp(U[0]._grid);
|
||||
dirPlaquette(sp, U, mu, nu);
|
||||
plaq = trace(sp);
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static void sitePlaquette(LatticeComplex &Plaq,
|
||||
const std::vector<GaugeMat> &U) {
|
||||
LatticeComplex sitePlaq(U[0]._grid);
|
||||
Plaq = zero;
|
||||
for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
|
||||
for (int nu = 0; nu < mu; nu++) {
|
||||
traceDirPlaquette(sitePlaq, U, mu, nu);
|
||||
Plaq = Plaq + sitePlaq;
|
||||
}
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumPlaquette(const GaugeLorentz &Umu) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Plaq(Umu._grid);
|
||||
|
||||
sitePlaquette(Plaq, U);
|
||||
|
||||
TComplex Tp = sum(Plaq);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgPlaquette(const GaugeLorentz &Umu) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumplaq = sumPlaquette(Umu);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = (1.0 * ndim * (ndim - 1)) / 2.0;
|
||||
return sumplaq / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Wilson loop of size (R1, R2), oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U,
|
||||
const int Rmu, const int Rnu,
|
||||
const int mu, const int nu) {
|
||||
wl = U[nu];
|
||||
|
||||
for(int i = 0; i < Rnu-1; i++){
|
||||
wl = Gimpl::CovShiftForward(U[nu], nu, wl);
|
||||
}
|
||||
|
||||
for(int i = 0; i < Rmu; i++){
|
||||
wl = Gimpl::CovShiftForward(U[mu], mu, wl);
|
||||
}
|
||||
|
||||
for(int i = 0; i < Rnu; i++){
|
||||
wl = Gimpl::CovShiftBackward(U[nu], nu, wl);
|
||||
}
|
||||
|
||||
for(int i = 0; i < Rmu; i++){
|
||||
wl = Gimpl::CovShiftBackward(U[mu], mu, wl);
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// trace of Wilson Loop oriented in mu,nu plane
|
||||
//////////////////////////////////////////////////
|
||||
static void traceWilsonLoop(LatticeComplex &wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int Rmu, const int Rnu,
|
||||
const int mu, const int nu) {
|
||||
GaugeMat sp(U[0]._grid);
|
||||
wilsonLoop(sp, U, Rmu, Rnu, mu, nu);
|
||||
wl = trace(sp);
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all planes of Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static void siteWilsonLoop(LatticeComplex &Wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int R1, const int R2) {
|
||||
LatticeComplex siteWl(U[0]._grid);
|
||||
Wl = zero;
|
||||
for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) {
|
||||
for (int nu = 0; nu < mu; nu++) {
|
||||
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
}
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over planes of Wilson loop with length R1
|
||||
// in the time direction
|
||||
//////////////////////////////////////////////////
|
||||
static void siteTimelikeWilsonLoop(LatticeComplex &Wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int R1, const int R2) {
|
||||
LatticeComplex siteWl(U[0]._grid);
|
||||
|
||||
int ndim = U[0]._grid->_ndimension;
|
||||
|
||||
Wl = zero;
|
||||
for (int nu = 0; nu < ndim - 1; nu++) {
|
||||
traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu);
|
||||
Wl = Wl + siteWl;
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum Wilson loop over all planes orthogonal to the time direction
|
||||
//////////////////////////////////////////////////
|
||||
static void siteSpatialWilsonLoop(LatticeComplex &Wl,
|
||||
const std::vector<GaugeMat> &U,
|
||||
const int R1, const int R2) {
|
||||
LatticeComplex siteWl(U[0]._grid);
|
||||
|
||||
Wl = zero;
|
||||
for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) {
|
||||
for (int nu = 0; nu < mu; nu++) {
|
||||
traceWilsonLoop(siteWl, U, R1, R2, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
traceWilsonLoop(siteWl, U, R2, R1, mu, nu);
|
||||
Wl = Wl + siteWl;
|
||||
}
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Wl(Umu._grid);
|
||||
|
||||
siteWilsonLoop(Wl, U, R1, R2);
|
||||
|
||||
TComplex Tp = sum(Wl);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of timelike Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Wl(Umu._grid);
|
||||
|
||||
siteTimelikeWilsonLoop(Wl, U, R1, R2);
|
||||
|
||||
TComplex Tp = sum(Wl);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of spatial Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
std::vector<GaugeMat> U(4, Umu._grid);
|
||||
|
||||
for (int mu = 0; mu < Umu._grid->_ndimension; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
|
||||
LatticeComplex Wl(Umu._grid);
|
||||
|
||||
siteSpatialWilsonLoop(Wl, U, R1, R2);
|
||||
|
||||
TComplex Tp = sum(Wl);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumWl = sumWilsonLoop(Umu, R1, R2);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = 1.0 * ndim * (ndim - 1);
|
||||
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of timelike Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = 1.0 * (ndim - 1);
|
||||
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of spatial Wilson loop
|
||||
//////////////////////////////////////////////////
|
||||
static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu,
|
||||
const int R1, const int R2) {
|
||||
int ndim = Umu._grid->_ndimension;
|
||||
Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2);
|
||||
Real vol = Umu._grid->gSites();
|
||||
Real faces = 1.0 * (ndim - 1) * (ndim - 2);
|
||||
return sumWl / vol / faces / Nc; // Nc dependent... FIXME
|
||||
}
|
||||
};
|
||||
|
||||
END_QEDFVOL_NAMESPACE
|
||||
|
||||
#endif // QEDFVOL_WILSONLOOPS_H
|
@ -1,88 +0,0 @@
|
||||
#include <Global.hpp>
|
||||
#include <WilsonLoops.h>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace QCD;
|
||||
using namespace QedFVol;
|
||||
|
||||
typedef PeriodicGaugeImpl<QedGimplR> QedPeriodicGimplR;
|
||||
typedef PhotonR::GaugeField EmField;
|
||||
typedef PhotonR::GaugeLinkField EmComp;
|
||||
|
||||
const int NCONFIGS = 10;
|
||||
const int NWILSON = 10;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// parse command line
|
||||
std::string parameterFileName;
|
||||
|
||||
if (argc < 2)
|
||||
{
|
||||
std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]";
|
||||
std::cerr << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
parameterFileName = argv[1];
|
||||
|
||||
// initialization
|
||||
Grid_init(&argc, &argv);
|
||||
QedFVolLogError.Active(GridLogError.isActive());
|
||||
QedFVolLogWarning.Active(GridLogWarning.isActive());
|
||||
QedFVolLogMessage.Active(GridLogMessage.isActive());
|
||||
QedFVolLogIterative.Active(GridLogIterative.isActive());
|
||||
QedFVolLogDebug.Active(GridLogDebug.isActive());
|
||||
LOG(Message) << "Grid initialized" << std::endl;
|
||||
|
||||
// QED stuff
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd());
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
GridCartesian grid(latt_size,simd_layout,mpi_layout);
|
||||
GridParallelRNG pRNG(&grid);
|
||||
PhotonR photon(PhotonR::Gauge::feynman,
|
||||
PhotonR::ZmScheme::qedL);
|
||||
EmField a(&grid);
|
||||
EmField expA(&grid);
|
||||
|
||||
Complex imag_unit(0, 1);
|
||||
|
||||
Real wlA;
|
||||
std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0);
|
||||
|
||||
pRNG.SeedRandomDevice();
|
||||
|
||||
LOG(Message) << "Wilson loop calculation beginning" << std::endl;
|
||||
for(int ic = 0; ic < NCONFIGS; ic++){
|
||||
LOG(Message) << "Configuration " << ic <<std::endl;
|
||||
photon.StochasticField(a, pRNG);
|
||||
|
||||
// Exponentiate photon field
|
||||
expA = exp(imag_unit*a);
|
||||
|
||||
// Calculate Wilson loops
|
||||
for(int iw=1; iw<=NWILSON; iw++){
|
||||
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3;
|
||||
logWlAvg[iw-1] -= 2*log(wlA);
|
||||
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3;
|
||||
logWlTime[iw-1] -= 2*log(wlA);
|
||||
wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3;
|
||||
logWlSpace[iw-1] -= 2*log(wlA);
|
||||
}
|
||||
}
|
||||
LOG(Message) << "Wilson loop calculation completed" << std::endl;
|
||||
|
||||
// Calculate Wilson loops
|
||||
for(int iw=1; iw<=10; iw++){
|
||||
LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl;
|
||||
LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl;
|
||||
LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl;
|
||||
LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl;
|
||||
}
|
||||
|
||||
// epilogue
|
||||
LOG(Message) << "Grid is finalizing now" << std::endl;
|
||||
Grid_finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
@ -20,17 +20,4 @@ The simple testcase in this directory is the submitted bug report that encapsula
|
||||
problem. The test case works with icpc and with clang++, but fails consistently on g++
|
||||
current variants.
|
||||
|
||||
Peter
|
||||
|
||||
|
||||
************
|
||||
|
||||
Second GCC bug reported, see Issue 100.
|
||||
|
||||
https://wandbox.org/permlink/tzssJza6R9XnqANw
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652
|
||||
|
||||
Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the
|
||||
CI test suite. The limitations of Travis runtime limits & weak cores are being shown.
|
||||
|
||||
Travis uses 5.4.1 for g++-5.
|
||||
Peter
|
@ -1,86 +0,0 @@
|
||||
#! /bin/sh
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
includedir=@includedir@
|
||||
|
||||
usage()
|
||||
{
|
||||
cat <<EOF
|
||||
Usage: grid-config [OPTION]
|
||||
|
||||
Known values for OPTION are:
|
||||
|
||||
--prefix show Grid installation prefix
|
||||
--cxxflags print pre-processor and compiler flags
|
||||
--ldflags print library linking flags
|
||||
--libs print library linking information
|
||||
--summary print full build summary
|
||||
--help display this help and exit
|
||||
--version output version information
|
||||
--git print git revision
|
||||
|
||||
EOF
|
||||
|
||||
exit $1
|
||||
}
|
||||
|
||||
if test $# -eq 0; then
|
||||
usage 1
|
||||
fi
|
||||
|
||||
cflags=false
|
||||
libs=false
|
||||
|
||||
while test $# -gt 0; do
|
||||
case "$1" in
|
||||
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case "$1" in
|
||||
--prefix)
|
||||
echo $prefix
|
||||
;;
|
||||
|
||||
--version)
|
||||
echo @VERSION@
|
||||
exit 0
|
||||
;;
|
||||
|
||||
--git)
|
||||
echo "@GRID_BRANCH@ @GRID_SHA@"
|
||||
exit 0
|
||||
;;
|
||||
|
||||
--help)
|
||||
usage 0
|
||||
;;
|
||||
|
||||
--cxxflags)
|
||||
echo @GRID_CXXFLAGS@
|
||||
;;
|
||||
|
||||
--ldflags)
|
||||
echo @GRID_LDFLAGS@
|
||||
;;
|
||||
|
||||
--libs)
|
||||
echo @GRID_LIBS@
|
||||
;;
|
||||
|
||||
--summary)
|
||||
echo ""
|
||||
echo "@GRID_SUMMARY@"
|
||||
echo ""
|
||||
;;
|
||||
|
||||
*)
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
exit 0
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Algorithms.h
|
||||
|
||||
@ -37,30 +37,39 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||
#include <Grid/algorithms/approx/Remez.h>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
|
||||
#include <Grid/algorithms/densematrix/DenseMatrix.h>
|
||||
#include <Grid/algorithms/densematrix/Francis.h>
|
||||
#include <Grid/algorithms/densematrix/Householder.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientShifted.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h>
|
||||
#include <Grid/algorithms/iterative/SimpleLanczos.h>
|
||||
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||
#include <Grid/algorithms/FFT.h>
|
||||
|
||||
// Lanczos support
|
||||
#include <Grid/algorithms/iterative/MatrixUtils.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
|
||||
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||
|
||||
// Eigen/lanczos
|
||||
// EigCg
|
||||
// MCR
|
||||
// Pcg
|
||||
// Multishift CG
|
||||
// Hdcg
|
||||
// GCR
|
||||
// etc..
|
||||
|
||||
// integrator/Leapfrog
|
||||
// integrator/Omelyan
|
||||
// integrator/ForceGradient
|
||||
|
||||
// montecarlo/hmc
|
||||
// montecarlo/rhmc
|
||||
// montecarlo/metropolis
|
||||
// etc...
|
||||
|
||||
|
||||
#endif
|
65
lib/AlignedAllocator.cc
Normal file
65
lib/AlignedAllocator.cc
Normal file
@ -0,0 +1,65 @@
|
||||
|
||||
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
int PointerCache::victim;
|
||||
|
||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
||||
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return NULL;
|
||||
|
||||
#ifdef _OPENMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
void * ret = NULL;
|
||||
int v = -1;
|
||||
|
||||
for(int e=0;e<Ncache;e++) {
|
||||
if ( Entries[e].valid==0 ) {
|
||||
v=e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( v==-1 ) {
|
||||
v=victim;
|
||||
victim = (victim+1)%Ncache;
|
||||
}
|
||||
|
||||
if ( Entries[v].valid ) {
|
||||
ret = Entries[v].address;
|
||||
Entries[v].valid = 0;
|
||||
Entries[v].address = NULL;
|
||||
Entries[v].bytes = 0;
|
||||
}
|
||||
|
||||
Entries[v].address=ptr;
|
||||
Entries[v].bytes =bytes;
|
||||
Entries[v].valid =1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *PointerCache::Lookup(size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return NULL;
|
||||
|
||||
#ifdef _OPENMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
for(int e=0;e<Ncache;e++){
|
||||
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
|
||||
Entries[e].valid = 0;
|
||||
return Entries[e].address;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
}
|
@ -64,8 +64,6 @@ namespace Grid {
|
||||
|
||||
};
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES);
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// A lattice of something, but assume the something is SIMDized.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
@ -94,34 +92,18 @@ public:
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
|
||||
// if ( ptr != NULL )
|
||||
// std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl;
|
||||
|
||||
//////////////////
|
||||
// Hack 2MB align; could make option probably doesn't need configurability
|
||||
//////////////////
|
||||
//define GRID_ALLOC_ALIGN (128)
|
||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128);
|
||||
#else
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes);
|
||||
#endif
|
||||
// std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl;
|
||||
// First touch optimise in threaded loop
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
|
||||
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
@ -200,19 +182,10 @@ public:
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN);
|
||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||
#else
|
||||
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp));
|
||||
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||
#endif
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
if ( ptr ) {
|
||||
// One touch per 4k page, static OMP loop to catch same loop order
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
void deallocate(pointer __p, size_type) {
|
@ -42,7 +42,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/cshift/Cshift_mpi.h>
|
||||
#endif
|
||||
|
||||
#ifdef GRID_COMMS_MPIT
|
||||
#ifdef GRID_COMMS_MPI3L
|
||||
#include <Grid/cshift/Cshift_mpi.h>
|
||||
#endif
|
||||
|
@ -1,37 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/DisableWarnings.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef DISABLE_WARNINGS_H
|
||||
#define DISABLE_WARNINGS_H
|
||||
|
||||
//disables and intel compiler specific warning (in json.hpp)
|
||||
#pragma warning disable 488
|
||||
|
||||
|
||||
#endif
|
@ -230,7 +230,6 @@ namespace Grid {
|
||||
// Barrel shift and collect global pencil
|
||||
std::vector<int> lcoor(Nd), gcoor(Nd);
|
||||
result = source;
|
||||
int pc = processor_coor[dim];
|
||||
for(int p=0;p<processors[dim];p++) {
|
||||
PARALLEL_REGION
|
||||
{
|
||||
@ -241,8 +240,7 @@ namespace Grid {
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||
peekLocalSite(s,result,cbuf);
|
||||
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
||||
// cbuf[dim]+=p*L;
|
||||
cbuf[dim]+=p*L;
|
||||
pokeLocalSite(s,pgbuf,cbuf);
|
||||
}
|
||||
}
|
||||
@ -280,6 +278,7 @@ namespace Grid {
|
||||
flops+= flops_call*NN;
|
||||
|
||||
// writing out result
|
||||
int pc = processor_coor[dim];
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
54
lib/Grid.h
54
lib/Grid.h
@ -38,12 +38,52 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef GRID_H
|
||||
#define GRID_H
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/GridQCDcore.h>
|
||||
#include <Grid/qcd/action/Action.h>
|
||||
#include <Grid/qcd/utils/GaugeFix.h>
|
||||
#include <Grid/qcd/smearing/Smearing.h>
|
||||
#include <Grid/parallelIO/MetaData.h>
|
||||
#include <Grid/qcd/hmc/HMC_aggregate.h>
|
||||
///////////////////
|
||||
// Std C++ dependencies
|
||||
///////////////////
|
||||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <random>
|
||||
#include <functional>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <ctime>
|
||||
#include <sys/time.h>
|
||||
#include <chrono>
|
||||
|
||||
///////////////////
|
||||
// Grid headers
|
||||
///////////////////
|
||||
#include "Config.h"
|
||||
#include <Grid/Timer.h>
|
||||
#include <Grid/PerfCount.h>
|
||||
#include <Grid/Log.h>
|
||||
#include <Grid/AlignedAllocator.h>
|
||||
#include <Grid/Simd.h>
|
||||
#include <Grid/serialisation/Serialisation.h>
|
||||
#include <Grid/Threads.h>
|
||||
#include <Grid/Lexicographic.h>
|
||||
#include <Grid/Init.h>
|
||||
#include <Grid/Communicator.h>
|
||||
#include <Grid/Cartesian.h>
|
||||
#include <Grid/Tensors.h>
|
||||
#include <Grid/Lattice.h>
|
||||
#include <Grid/Cshift.h>
|
||||
#include <Grid/Stencil.h>
|
||||
#include <Grid/Algorithms.h>
|
||||
#include <Grid/parallelIO/BinaryIO.h>
|
||||
#include <Grid/FFT.h>
|
||||
|
||||
#include <Grid/qcd/QCD.h>
|
||||
#include <Grid/parallelIO/NerscIO.h>
|
||||
#include <Grid/qcd/hmc/NerscCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/HmcRunner.h>
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -1,29 +0,0 @@
|
||||
#ifndef GRID_STD_H
|
||||
#define GRID_STD_H
|
||||
|
||||
///////////////////
|
||||
// Std C++ dependencies
|
||||
///////////////////
|
||||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <random>
|
||||
#include <functional>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <ctime>
|
||||
#include <sys/time.h>
|
||||
#include <chrono>
|
||||
#include <zlib.h>
|
||||
|
||||
///////////////////
|
||||
// Grid config
|
||||
///////////////////
|
||||
#include "Config.h"
|
||||
|
||||
#endif /* GRID_STD_H */
|
@ -1,9 +0,0 @@
|
||||
#pragma once
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
#include <Grid/Eigen/Dense>
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Init.cc
|
||||
|
||||
@ -36,20 +36,17 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <signal.h>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <Grid/Grid.h>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#include <Grid/util/CompilerCompatible.h>
|
||||
|
||||
|
||||
#include <fenv.h>
|
||||
#ifdef __APPLE__
|
||||
@ -95,14 +92,14 @@ const std::vector<int> GridDefaultSimd(int dims,int nsimd)
|
||||
if ( nn>=2) {
|
||||
layout[d]=2;
|
||||
nn/=2;
|
||||
} else {
|
||||
} else {
|
||||
layout[d]=1;
|
||||
}
|
||||
}
|
||||
assert(nn==1);
|
||||
return layout;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Command line parsing assist for stock controls
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -146,7 +143,7 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
||||
vec.push_back(i);
|
||||
if(std::ispunct(ss.peek()))
|
||||
ss.ignore();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -219,74 +216,11 @@ void Grid_init(int *argc,char ***argv)
|
||||
int MB;
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm");
|
||||
GridCmdOptionInt(arg,MB);
|
||||
uint64_t MB64 = MB;
|
||||
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){
|
||||
CartesianCommunicator::Hugepages = 1;
|
||||
}
|
||||
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||
Grid_debug_handler_init();
|
||||
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
|
||||
}
|
||||
|
||||
CartesianCommunicator::Init(argc,argv);
|
||||
|
||||
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
||||
Grid_quiesce_nodes();
|
||||
} else {
|
||||
FILE *fp;
|
||||
std::ostringstream fname;
|
||||
fname<<"Grid.stdout.";
|
||||
fname<<CartesianCommunicator::RankWorld();
|
||||
fp=freopen(fname.str().c_str(),"w",stdout);
|
||||
assert(fp!=(FILE *)NULL);
|
||||
|
||||
std::ostringstream ename;
|
||||
ename<<"Grid.stderr.";
|
||||
ename<<CartesianCommunicator::RankWorld();
|
||||
fp=freopen(ename.str().c_str(),"w",stderr);
|
||||
assert(fp!=(FILE *)NULL);
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Banner
|
||||
////////////////////////////////////
|
||||
if ( CartesianCommunicator::RankWorld() == 0 ) {
|
||||
std::cout <<std::endl;
|
||||
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
|
||||
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
|
||||
std::cout << "__|_ | | | | | | | | | | | | _|__"<<std::endl;
|
||||
std::cout << "__|_ _|__"<<std::endl;
|
||||
std::cout << "__|_ GGGG RRRR III DDDD _|__"<<std::endl;
|
||||
std::cout << "__|_ G R R I D D _|__"<<std::endl;
|
||||
std::cout << "__|_ G R R I D D _|__"<<std::endl;
|
||||
std::cout << "__|_ G GG RRRR I D D _|__"<<std::endl;
|
||||
std::cout << "__|_ G G R R I D D _|__"<<std::endl;
|
||||
std::cout << "__|_ GGGG R R III DDDD _|__"<<std::endl;
|
||||
std::cout << "__|_ _|__"<<std::endl;
|
||||
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
|
||||
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
|
||||
std::cout << " | | | | | | | | | | | | | | "<<std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
|
||||
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
|
||||
std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
|
||||
std::cout << "(at your option) any later version."<<std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
|
||||
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
|
||||
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
|
||||
std::cout << "GNU General Public License for more details."<<std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////
|
||||
// Logging
|
||||
////////////////////////////////////
|
||||
@ -296,6 +230,9 @@ void Grid_init(int *argc,char ***argv)
|
||||
GridCmdOptionCSL(defaultLog,logstreams);
|
||||
GridLogConfigure(logstreams);
|
||||
|
||||
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
||||
Grid_quiesce_nodes();
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
|
||||
arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log");
|
||||
@ -311,80 +248,101 @@ void Grid_init(int *argc,char ***argv)
|
||||
std::cout<<GridLogMessage<<" --help : this message"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Geometry:"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --log list : comma separated list from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --debug-signals : catch sigsegv and print a blame report"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --debug-stdout : print stdout from EVERY node"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --notimestamp : suppress millisecond resolution stamps"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --debug-stdout : print stdout from EVERY node"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --notimestamp : suppress millisecond resolution stamps"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Performance:"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --comms-concurrent : Asynchronous MPI calls; several dirs at a time "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --comms-sequential : Synchronous MPI calls; one dirs at a time "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --comms-overlap : Overlap comms with compute "<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --dslash-generic: Wilson kernel for generic Nc"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --dslash-asm : Wilson kernel for AVX512"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --lebesgue : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl;
|
||||
std::cout<<GridLogMessage<<" --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
////////////////////////////////////
|
||||
// Banner
|
||||
////////////////////////////////////
|
||||
|
||||
std::string COL_RED = GridLogColours.colour["RED"];
|
||||
std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
|
||||
std::string COL_BLACK = GridLogColours.colour["BLACK"];
|
||||
std::string COL_GREEN = GridLogColours.colour["GREEN"];
|
||||
std::string COL_BLUE = GridLogColours.colour["BLUE"];
|
||||
std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
|
||||
std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
|
||||
|
||||
std::cout <<std::endl;
|
||||
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||
std::cout <<COL_RED << "__|_ | | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
|
||||
std::cout <<COL_RED << "__|_ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl;
|
||||
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl;
|
||||
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl;
|
||||
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl;
|
||||
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl;
|
||||
std::cout <<COL_BLUE << "__|_ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl;
|
||||
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||
std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout <<COL_YELLOW<< std::endl;
|
||||
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
|
||||
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
|
||||
std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
|
||||
std::cout << "(at your option) any later version."<<std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
|
||||
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
|
||||
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
|
||||
std::cout << "GNU General Public License for more details."<<std::endl;
|
||||
std::cout << COL_BACKGROUND <<std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
////////////////////////////////////
|
||||
// Debug and performance options
|
||||
////////////////////////////////////
|
||||
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||
Grid_debug_handler_init();
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
|
||||
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll;
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
|
||||
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm;
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
|
||||
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric;
|
||||
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptGeneric;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){
|
||||
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsAndCompute;
|
||||
} else {
|
||||
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute;
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-concurrent") ){
|
||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicyConcurrent);
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
|
||||
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||
LebesgueOrder::UseLebesgueOrder=1;
|
||||
}
|
||||
CartesianCommunicator::nCommThreads = -1;
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads");
|
||||
GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads);
|
||||
}
|
||||
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
||||
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){
|
||||
GridLogTimestamp(0);
|
||||
} else {
|
||||
} else {
|
||||
GridLogTimestamp(1);
|
||||
}
|
||||
|
||||
@ -393,12 +351,9 @@ void Grid_init(int *argc,char ***argv)
|
||||
Grid_default_mpi);
|
||||
|
||||
std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
|
||||
if ( CartesianCommunicator::Hugepages) {
|
||||
std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
||||
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
|
||||
std::cout<<GridLogMessage<<"Grid Decomposition\n";
|
||||
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
|
||||
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
|
||||
@ -411,39 +366,30 @@ void Grid_init(int *argc,char ***argv)
|
||||
Grid_is_initialised = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Grid_finalize(void)
|
||||
{
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT)
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||
MPI_Finalize();
|
||||
Grid_unquiesce_nodes();
|
||||
#endif
|
||||
#if defined (GRID_COMMS_SHMEM)
|
||||
shmem_finalize();
|
||||
#endif
|
||||
}
|
||||
|
||||
void GridLogLayout() {
|
||||
std::cout << GridLogMessage << "Grid Layout\n";
|
||||
std::cout << GridLogMessage << "\tGlobal lattice size : "<< GridCmdVectorIntToString(GridDefaultLatt()) << std::endl;
|
||||
std::cout << GridLogMessage << "\tOpenMP threads : "<< GridThread::GetThreads() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMPI tasks : "<< GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
|
||||
}
|
||||
|
||||
void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
{
|
||||
fprintf(stderr,"Caught signal %d\n",si->si_signo);
|
||||
fprintf(stderr," mem address %llx\n",(unsigned long long)si->si_addr);
|
||||
fprintf(stderr," code %d\n",si->si_code);
|
||||
printf("Caught signal %d\n",si->si_signo);
|
||||
printf(" mem address %llx\n",(unsigned long long)si->si_addr);
|
||||
printf(" code %d\n",si->si_code);
|
||||
|
||||
// Linux/Posix
|
||||
#ifdef __linux__
|
||||
// And x86 64bit
|
||||
#ifdef __x86_64__
|
||||
ucontext_t * uc= (ucontext_t *)ptr;
|
||||
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
||||
fprintf(stderr," instruction %llx\n",(unsigned long long)sc->rip);
|
||||
printf(" instruction %llx\n",(unsigned long long)sc->rip);
|
||||
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
|
||||
REG(rdi);
|
||||
REG(rsi);
|
||||
@ -466,11 +412,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
REG(r15);
|
||||
#endif
|
||||
#endif
|
||||
fflush(stderr);
|
||||
BACKTRACEFP(stderr);
|
||||
fprintf(stderr,"Called backtrace\n");
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
BACKTRACE();
|
||||
exit(0);
|
||||
return;
|
||||
};
|
||||
@ -483,12 +425,9 @@ void Grid_debug_handler_init(void)
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sigaction(SIGSEGV,&sa,NULL);
|
||||
sigaction(SIGTRAP,&sa,NULL);
|
||||
sigaction(SIGBUS,&sa,NULL);
|
||||
|
||||
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
|
||||
sigaction(SIGFPE,&sa,NULL);
|
||||
sigaction(SIGKILL,&sa,NULL);
|
||||
sigaction(SIGILL,&sa,NULL);
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Init.h
|
||||
|
||||
@ -46,7 +46,6 @@ namespace Grid {
|
||||
const int &GridThreads(void) ;
|
||||
void GridSetThreads(int t) ;
|
||||
void GridLogTimestamp(int);
|
||||
void GridLogLayout();
|
||||
|
||||
// Common parsing chores
|
||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
@ -7,7 +7,7 @@ namespace Grid{
|
||||
class Lexicographic {
|
||||
public:
|
||||
|
||||
static inline void CoorFromIndex (std::vector<int>& coor,int index,const std::vector<int> &dims){
|
||||
static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
|
||||
int nd= dims.size();
|
||||
coor.resize(nd);
|
||||
for(int d=0;d<nd;d++){
|
||||
@ -16,12 +16,8 @@ namespace Grid{
|
||||
}
|
||||
}
|
||||
|
||||
static inline void IndexFromCoor (const std::vector<int>& coor,int &index,const std::vector<int> &dims){
|
||||
static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
|
||||
int nd=dims.size();
|
||||
if(nd > coor.size()) {
|
||||
std::cout<< "coor.size "<<coor.size()<<" >dims.size "<<dims.size()<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
int stride=1;
|
||||
index=0;
|
||||
for(int d=0;d<nd;d++){
|
@ -29,11 +29,9 @@ See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/util/CompilerCompatible.h>
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <memory>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -95,7 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||
////////////////////////////////////////////////////////////
|
||||
void Grid_quiesce_nodes(void) {
|
||||
int me = 0;
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L)
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
#endif
|
||||
#ifdef GRID_COMMS_SHMEM
|
@ -110,8 +110,8 @@ public:
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
if ( log.active ) {
|
||||
stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : ";
|
||||
stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : ";
|
||||
if ( log.timestamp ) {
|
||||
StopWatch.Stop();
|
||||
GridTime now = StopWatch.Elapsed();
|
@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_MPIT
|
||||
extra_sources+=communicator/Communicator_mpit.cc
|
||||
if BUILD_COMMS_MPI3L
|
||||
extra_sources+=communicator/Communicator_mpi3_leader.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
|
BIN
lib/Old/Endeavour.tgz
Normal file
BIN
lib/Old/Endeavour.tgz
Normal file
Binary file not shown.
154
lib/Old/Tensor_peek.h
Normal file
154
lib/Old/Tensor_peek.h
Normal file
@ -0,0 +1,154 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Old/Tensor_peek.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_MATH_PEEK_H
|
||||
#define GRID_MATH_PEEK_H
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Peek on a specific index; returns a scalar in that index, tensor inherits rest
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// If we hit the right index, return scalar with no further recursion
|
||||
|
||||
//template<int Level> inline ComplexF peekIndex(const ComplexF arg) { return arg;}
|
||||
//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
|
||||
//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
|
||||
//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
|
||||
#if 0
|
||||
// Scalar peek, no indices
|
||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<vtype>
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
// Vector peek, one index
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iScalar<vtype> // Index matches
|
||||
{
|
||||
iScalar<vtype> ret; // return scalar
|
||||
ret._internal = arg._internal[i];
|
||||
return ret;
|
||||
}
|
||||
// Matrix peek, two indices
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iScalar<vtype>
|
||||
{
|
||||
iScalar<vtype> ret; // return scalar
|
||||
ret._internal = arg._internal[i][j];
|
||||
return ret;
|
||||
}
|
||||
|
||||
/////////////
|
||||
// No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
|
||||
/////////////
|
||||
// scalar
|
||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<decltype(peekIndex<Level>(arg._internal))>
|
||||
{
|
||||
iScalar<decltype(peekIndex<Level>(arg._internal))> ret;
|
||||
ret._internal= peekIndex<Level>(arg._internal);
|
||||
return ret;
|
||||
}
|
||||
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iScalar<vtype> &arg,int i) -> iScalar<decltype(peekIndex<Level>(arg._internal,i))>
|
||||
{
|
||||
iScalar<decltype(peekIndex<Level>(arg._internal,i))> ret;
|
||||
ret._internal=peekIndex<Level>(arg._internal,i);
|
||||
return ret;
|
||||
}
|
||||
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iScalar<vtype> &arg,int i,int j) -> iScalar<decltype(peekIndex<Level>(arg._internal,i,j))>
|
||||
{
|
||||
iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> ret;
|
||||
ret._internal=peekIndex<Level>(arg._internal,i,j);
|
||||
return ret;
|
||||
}
|
||||
// vector
|
||||
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iVector<vtype,N> &arg) -> iVector<decltype(peekIndex<Level>(arg._internal[0])),N>
|
||||
{
|
||||
iVector<decltype(peekIndex<Level>(arg._internal[0])),N> ret;
|
||||
for(int ii=0;ii<N;ii++){
|
||||
ret._internal[ii]=peekIndex<Level>(arg._internal[ii]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N>
|
||||
{
|
||||
iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret;
|
||||
for(int ii=0;ii<N;ii++){
|
||||
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iVector<vtype,N> &arg,int i,int j) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N>
|
||||
{
|
||||
iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> ret;
|
||||
for(int ii=0;ii<N;ii++){
|
||||
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i,j);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// matrix
|
||||
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>
|
||||
{
|
||||
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> ret;
|
||||
for(int ii=0;ii<N;ii++){
|
||||
for(int jj=0;jj<N;jj++){
|
||||
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iMatrix<vtype,N> &arg,int i) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N>
|
||||
{
|
||||
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N> ret;
|
||||
for(int ii=0;ii<N;ii++){
|
||||
for(int jj=0;jj<N;jj++){
|
||||
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N>
|
||||
{
|
||||
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> ret;
|
||||
for(int ii=0;ii<N;ii++){
|
||||
for(int jj=0;jj<N;jj++){
|
||||
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i,j);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#endif
|
127
lib/Old/Tensor_poke.h
Normal file
127
lib/Old/Tensor_poke.h
Normal file
@ -0,0 +1,127 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Old/Tensor_poke.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_MATH_POKE_H
|
||||
#define GRID_MATH_POKE_H
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Poke a specific index;
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
#if 0
|
||||
// Scalar poke
|
||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||
void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
|
||||
{
|
||||
ret._internal = arg._internal;
|
||||
}
|
||||
// Vector poke, one index
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||
void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
|
||||
{
|
||||
ret._internal[i] = arg._internal;
|
||||
}
|
||||
//Matrix poke, two indices
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
|
||||
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
|
||||
{
|
||||
ret._internal[i][j] = arg._internal;
|
||||
}
|
||||
|
||||
/////////////
|
||||
// No match poke for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
|
||||
/////////////
|
||||
// scalar
|
||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))> &arg)
|
||||
{
|
||||
pokeIndex<Level>(ret._internal,arg._internal);
|
||||
}
|
||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0))> &arg, int i)
|
||||
|
||||
{
|
||||
pokeIndex<Level>(ret._internal,arg._internal,i);
|
||||
}
|
||||
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0,0))> &arg,int i,int j)
|
||||
{
|
||||
pokeIndex<Level>(ret._internal,arg._internal,i,j);
|
||||
}
|
||||
|
||||
// Vector
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iVector<vtype,N> &ret, iVector<decltype(peekIndex<Level>(ret._internal)),N> &arg)
|
||||
{
|
||||
for(int ii=0;ii<N;ii++){
|
||||
pokeIndex<Level>(ret._internal[ii],arg._internal[ii]);
|
||||
}
|
||||
}
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
|
||||
{
|
||||
for(int ii=0;ii<N;ii++){
|
||||
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i);
|
||||
}
|
||||
}
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg,int i,int j)
|
||||
{
|
||||
for(int ii=0;ii<N;ii++){
|
||||
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i,j);
|
||||
}
|
||||
}
|
||||
|
||||
// Matrix
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal)),N> &arg)
|
||||
{
|
||||
for(int ii=0;ii<N;ii++){
|
||||
for(int jj=0;jj<N;jj++){
|
||||
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj]);
|
||||
}}
|
||||
}
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
|
||||
{
|
||||
for(int ii=0;ii<N;ii++){
|
||||
for(int jj=0;jj<N;jj++){
|
||||
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i);
|
||||
}}
|
||||
}
|
||||
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
|
||||
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg, int i,int j)
|
||||
{
|
||||
for(int ii=0;ii<N;ii++){
|
||||
for(int jj=0;jj<N;jj++){
|
||||
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
|
||||
}}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
@ -26,8 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/PerfCount.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -40,7 +40,7 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." , INSTRUCTIONS},
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." , CPUCYCLES },
|
||||
// 4
|
||||
#ifdef KNL
|
||||
#ifdef AVX512
|
||||
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS },
|
@ -172,7 +172,7 @@ public:
|
||||
const char * name = PerformanceCounterConfigs[PCT].name;
|
||||
fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
|
||||
perror("Error is");
|
||||
}
|
||||
int norm = PerformanceCounterConfigs[PCT].normalisation;
|
||||
@ -181,7 +181,7 @@ public:
|
||||
name = PerformanceCounterConfigs[norm].name;
|
||||
cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||
if (cyclefd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
|
||||
perror("Error is");
|
||||
}
|
||||
#endif
|
||||
@ -205,14 +205,13 @@ public:
|
||||
void Stop(void) {
|
||||
count=0;
|
||||
cycles=0;
|
||||
size_t ign;
|
||||
#ifdef __linux__
|
||||
ssize_t ign;
|
||||
if ( fd!= -1) {
|
||||
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
ign=::read(fd, &count, sizeof(long long));
|
||||
ign+=::read(cyclefd, &cycles, sizeof(long long));
|
||||
assert(ign=2*sizeof(long long));
|
||||
ign=::read(cyclefd, &cycles, sizeof(long long));
|
||||
}
|
||||
elapsed = cyclecount() - begin;
|
||||
#else
|
@ -172,8 +172,8 @@ namespace Grid {
|
||||
|
||||
};
|
||||
|
||||
#include <Grid/simd/Grid_vector_types.h>
|
||||
#include <Grid/simd/Grid_vector_unops.h>
|
||||
#include "simd/Grid_vector_types.h"
|
||||
#include "simd/Grid_vector_unops.h"
|
||||
|
||||
namespace Grid {
|
||||
// Default precision
|
@ -1,9 +1,11 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/perfmon/Stat.h>
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/PerfCount.h>
|
||||
#include <Grid/Stat.h>
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
bool PmuStat::pmu_initialized=false;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -37,9 +37,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#include <omp.h>
|
||||
|
||||
#ifdef GRID_NUMA
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
|
||||
#endif
|
||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||
#define PARALLEL_CRITICAL _Pragma("omp critical")
|
||||
@ -51,9 +55,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#define PARALLEL_CRITICAL
|
||||
#endif
|
||||
|
||||
#define parallel_for PARALLEL_FOR_LOOP for
|
||||
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Introduce a class to gain deterministic bit reproducible reduction.
|
@ -267,7 +267,8 @@ namespace Grid {
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
|
||||
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
siteVector res = zero;
|
||||
siteVector nbr;
|
||||
int ptype;
|
||||
@ -379,7 +380,8 @@ namespace Grid {
|
||||
Subspace.ProjectToSubspace(oProj,oblock);
|
||||
// blockProject(iProj,iblock,Subspace.subspace);
|
||||
// blockProject(oProj,oblock,Subspace.subspace);
|
||||
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
for(int j=0;j<nbasis;j++){
|
||||
if( disp!= 0 ) {
|
||||
A[p]._odata[ss](j,i) = oProj._odata[ss](j);
|
||||
@ -425,7 +427,7 @@ namespace Grid {
|
||||
A[p]=zero;
|
||||
}
|
||||
|
||||
GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34}));
|
||||
GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice();
|
||||
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
|
||||
|
||||
Complex one(1.0);
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -163,10 +162,15 @@ namespace Grid {
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
ComplexD dot;
|
||||
|
||||
_Mat.M(in,out);
|
||||
|
||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||
n2=norm2(out);
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
|
||||
dot = innerProduct(out,out);
|
||||
n2=real(dot);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
@ -188,10 +192,10 @@ namespace Grid {
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
MpcDagMpc(in,out,n1,n2);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
@ -208,6 +212,7 @@ namespace Grid {
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
@ -230,7 +235,7 @@ namespace Grid {
|
||||
Field tmp(in._grid);
|
||||
|
||||
_Mat.MeooeDag(in,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
_Mat.MooeeDag(in,out);
|
||||
@ -265,6 +270,7 @@ namespace Grid {
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
@ -293,168 +299,6 @@ namespace Grid {
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Staggered use
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
n2 = Mpc(in,out);
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.Mooee(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
return Mpc(in,out);
|
||||
}
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
assert(0);// Never need with staggered
|
||||
}
|
||||
};
|
||||
// template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
||||
template<class Matrix,class Field>
|
||||
// class SchurStagOperator : public LinearOperatorBase<Field> {
|
||||
class SchurStagOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurStagOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
Field tmp2(in._grid);
|
||||
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp2);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp2,tmp);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
|
||||
return Mpc(in,out);
|
||||
}
|
||||
#if 0
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp(in._grid);
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
#endif
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
n2 = Mpc(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1 = real(dot);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#if 0
|
||||
// This is specific to (Z)mobius fermions
|
||||
template<class Matrix, class Field>
|
||||
class KappaSimilarityTransform {
|
||||
public:
|
||||
// INHERIT_IMPL_TYPES(Matrix);
|
||||
typedef typename Matrix::Coeff_t Coeff_t;
|
||||
std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
|
||||
|
||||
KappaSimilarityTransform (Matrix &zmob) {
|
||||
for (int i=0;i<(int)zmob.bs.size();i++) {
|
||||
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
|
||||
kappa.push_back( k );
|
||||
kappaDag.push_back( conj(k) );
|
||||
kappaInv.push_back( 1.0 / k );
|
||||
kappaInvDag.push_back( 1.0 / conj(k) );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj>
|
||||
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
|
||||
GridBase *grid=out._grid;
|
||||
out.checkerboard = in.checkerboard;
|
||||
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
|
||||
int Ls = grid->_rdimensions[0];
|
||||
parallel_for(int ss=0;ss<grid->oSites();ss++){
|
||||
vobj tmp = s[ss % Ls]*in._odata[ss];
|
||||
vstream(out._odata[ss],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) {
|
||||
sscale(in,out,s);
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
virtual RealD M (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]); }
|
||||
virtual RealD MDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);}
|
||||
virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);}
|
||||
virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);}
|
||||
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
|
||||
public:
|
||||
KappaSimilarityTransform<Matrix, Field> _S;
|
||||
SchurDiagTwoOperator<Matrix, Field> _Mat;
|
||||
|
||||
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
|
||||
_S.MInv(in,out);
|
||||
_Mat.Mpc(out,tmp);
|
||||
return _S.M(tmp,out);
|
||||
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
|
||||
_S.MDag(in,out);
|
||||
_Mat.MpcDag(out,tmp);
|
||||
return _S.MInvDag(tmp,out);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
0
lib/algorithms/approx/.dirstamp
Normal file
0
lib/algorithms/approx/.dirstamp
Normal file
@ -8,7 +8,6 @@
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -194,54 +193,12 @@ namespace Grid {
|
||||
return sum;
|
||||
};
|
||||
|
||||
RealD approxD(RealD x)
|
||||
{
|
||||
RealD Un;
|
||||
RealD Unm;
|
||||
RealD Unp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD U0=1;
|
||||
RealD U1=2*y;
|
||||
|
||||
RealD sum;
|
||||
sum = Coeffs[1]*U0;
|
||||
sum+= Coeffs[2]*U1*2.0;
|
||||
|
||||
Un =U1;
|
||||
Unm=U0;
|
||||
for(int i=2;i<order-1;i++){
|
||||
Unp=2*y*Un-Unm;
|
||||
Unm=Un;
|
||||
Un =Unp;
|
||||
sum+= Un*Coeffs[i+1]*(i+1.0);
|
||||
}
|
||||
return sum/(0.5*(hi-lo));
|
||||
};
|
||||
|
||||
RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
|
||||
RealD x = x0;
|
||||
RealD eps;
|
||||
|
||||
int i;
|
||||
for (i=0;i<maxiter;i++) {
|
||||
eps = approx(x) - z;
|
||||
if (fabs(eps / z) < resid)
|
||||
return x;
|
||||
x = x - eps / approxD(x);
|
||||
}
|
||||
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in._grid;
|
||||
|
||||
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
|
||||
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
|
||||
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user