mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			dirac-ITT-
			...
			feature/mi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 1e3fb32572 | ||
|  | 0d5af667d8 | ||
|  | e9712bc7fb | 
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -92,7 +92,6 @@ build*/* | |||||||
| ##################### | ##################### | ||||||
| *.xcodeproj/* | *.xcodeproj/* | ||||||
| build.sh | build.sh | ||||||
| .vscode |  | ||||||
|  |  | ||||||
| # Eigen source # | # Eigen source # | ||||||
| ################ | ################ | ||||||
| @@ -107,10 +106,6 @@ lib/fftw/* | |||||||
| m4/lt* | m4/lt* | ||||||
| m4/libtool.m4 | m4/libtool.m4 | ||||||
|  |  | ||||||
| # github pages # |  | ||||||
| ################ |  | ||||||
| gh-pages/ |  | ||||||
|  |  | ||||||
| # Buck files # | # Buck files # | ||||||
| ############## | ############## | ||||||
| .buck* | .buck* | ||||||
| @@ -122,4 +117,3 @@ make-bin-BUCK.sh | |||||||
| ##################### | ##################### | ||||||
| lib/qcd/spin/gamma-gen/*.h | lib/qcd/spin/gamma-gen/*.h | ||||||
| lib/qcd/spin/gamma-gen/*.cc | lib/qcd/spin/gamma-gen/*.cc | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										76
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										76
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -7,8 +7,64 @@ cache: | |||||||
| matrix: | matrix: | ||||||
|   include: |   include: | ||||||
|     - os:        osx |     - os:        osx | ||||||
|       osx_image: xcode8.3 |       osx_image: xcode7.2 | ||||||
|       compiler: clang |       compiler: clang | ||||||
|  |     - compiler: gcc | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.9 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: VERSION=-4.9 | ||||||
|  |     - compiler: gcc | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-5 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: VERSION=-5 | ||||||
|  |     - compiler: clang | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.8 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||||
|  |     - compiler: clang | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.8 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||||
|        |        | ||||||
| before_install: | before_install: | ||||||
|     - export GRIDDIR=`pwd` |     - export GRIDDIR=`pwd` | ||||||
| @@ -17,15 +73,13 @@ before_install: | |||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi | ||||||
|      |      | ||||||
| install: | install: | ||||||
|     - export CC=$CC$VERSION |     - export CC=$CC$VERSION | ||||||
|     - export CXX=$CXX$VERSION |     - export CXX=$CXX$VERSION | ||||||
|     - echo $PATH |     - echo $PATH | ||||||
|     - which autoconf |  | ||||||
|     - autoconf  --version |  | ||||||
|     - which automake |  | ||||||
|     - automake  --version |  | ||||||
|     - which $CC |     - which $CC | ||||||
|     - $CC  --version |     - $CC  --version | ||||||
|     - which $CXX |     - which $CXX | ||||||
| @@ -38,9 +92,15 @@ script: | |||||||
|     - cd build |     - cd build | ||||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4  |     - make -j4  | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals |     - ./benchmarks/Benchmark_dwf --threads 1 | ||||||
|     - echo make clean |     - echo make clean | ||||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4 |     - make -j4 | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals |     - ./benchmarks/Benchmark_dwf --threads 1 | ||||||
|     - make check |     - echo make clean | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi | ||||||
|  |     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto | ||||||
|  |     - make -j4 | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							| @@ -3,15 +3,10 @@ SUBDIRS = lib benchmarks tests extras | |||||||
|  |  | ||||||
| include $(top_srcdir)/doxygen.inc | include $(top_srcdir)/doxygen.inc | ||||||
|  |  | ||||||
| bin_SCRIPTS=grid-config | tests: all | ||||||
|  | 	$(MAKE) -C tests tests | ||||||
|  |  | ||||||
|  | .PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) | ||||||
| .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) |  | ||||||
|  |  | ||||||
| tests-local: all |  | ||||||
| bench-local: all |  | ||||||
| check-local: all |  | ||||||
|  |  | ||||||
| AM_CXXFLAGS += -I$(top_builddir)/include | AM_CXXFLAGS += -I$(top_builddir)/include | ||||||
|  |  | ||||||
| ACLOCAL_AMFLAGS = -I m4 | ACLOCAL_AMFLAGS = -I m4 | ||||||
|   | |||||||
							
								
								
									
										298
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										298
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,13 +1,41 @@ | |||||||
| # Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid) | # Grid | ||||||
|  | <table> | ||||||
|  | <tr> | ||||||
|  |     <td>Last stable release</td> | ||||||
|  |     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||||
|  |     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a> | ||||||
|  |     </td> | ||||||
|  | </tr> | ||||||
|  | <tr> | ||||||
|  |     <td>Development branch</td> | ||||||
|  |     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||||
|  |     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a> | ||||||
|  |     </td> | ||||||
|  | </tr> | ||||||
|  | </table> | ||||||
|  |  | ||||||
| **Data parallel C++ mathematical object library.** | **Data parallel C++ mathematical object library.** | ||||||
|  |  | ||||||
| License: GPL v2. | License: GPL v2. | ||||||
|  |  | ||||||
| Last update June 2017. | Last update Nov 2016. | ||||||
|  |  | ||||||
| _Please do not send pull requests to the `master` branch which is reserved for releases._ | _Please do not send pull requests to the `master` branch which is reserved for releases._ | ||||||
|  |  | ||||||
|  | ### Bug report | ||||||
|  |  | ||||||
|  | _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | ||||||
|  |  | ||||||
|  | When you file an issue, please go though the following checklist: | ||||||
|  |  | ||||||
|  | 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  | ||||||
|  | 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | ||||||
|  | 3. Give the exact `configure` command used. | ||||||
|  | 4. Attach `config.log`. | ||||||
|  | 5. Attach `config.summary`. | ||||||
|  | 6. Attach the output of `make V=1`. | ||||||
|  | 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Description | ### Description | ||||||
| @@ -30,68 +58,13 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi | |||||||
| for most programmers. | for most programmers. | ||||||
|  |  | ||||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||||
| Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. | Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). | ||||||
|  |  | ||||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.  | These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. | ||||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||||
|  |  | ||||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | MPI, OpenMP, and SIMD parallelism are present in the library. | ||||||
| Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. | Please see https://arxiv.org/abs/1512.03487 for more detail. | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Compilers |  | ||||||
|  |  | ||||||
| Intel ICPC v16.0.3 and later |  | ||||||
|  |  | ||||||
| Clang v3.5 and later (need 3.8 and later for OpenMP) |  | ||||||
|  |  | ||||||
| GCC   v4.9.x (recommended) |  | ||||||
|  |  | ||||||
| GCC   v6.3 and later |  | ||||||
|  |  | ||||||
| ### Important:  |  | ||||||
|  |  | ||||||
| Some versions of GCC appear to have a bug under high optimisation (-O2, -O3). |  | ||||||
|  |  | ||||||
| The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates. |  | ||||||
|  |  | ||||||
| GCC   v5.x |  | ||||||
|  |  | ||||||
| GCC   v6.1, v6.2 |  | ||||||
|  |  | ||||||
| ### Bug report |  | ||||||
|  |  | ||||||
| _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ |  | ||||||
|  |  | ||||||
| When you file an issue, please go though the following checklist: |  | ||||||
|  |  | ||||||
| 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  |  | ||||||
| 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. |  | ||||||
| 3. Give the exact `configure` command used. |  | ||||||
| 4. Attach `config.log`. |  | ||||||
| 5. Attach `grid.config.summary`. |  | ||||||
| 6. Attach the output of `make V=1`. |  | ||||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. |  | ||||||
|  |  | ||||||
| ### Required libraries |  | ||||||
| Grid requires: |  | ||||||
|  |  | ||||||
| [GMP](https://gmplib.org/),  |  | ||||||
|  |  | ||||||
| [MPFR](http://www.mpfr.org/)  |  | ||||||
|  |  | ||||||
| Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. |  | ||||||
|  |  | ||||||
| Grid optionally uses: |  | ||||||
|  |  | ||||||
| [HDF5](https://support.hdfgroup.org/HDF5/)   |  | ||||||
|  |  | ||||||
| [LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.  |  | ||||||
|  |  | ||||||
| [FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. |  | ||||||
|  |  | ||||||
| LAPACK either generic version or Intel MKL library. |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Quick start | ### Quick start | ||||||
| First, start by cloning the repository: | First, start by cloning the repository: | ||||||
| @@ -122,10 +95,10 @@ install Grid. Other options are detailed in the next section, you can also use ` | |||||||
| `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | ||||||
| customise the build. | customise the build. | ||||||
|  |  | ||||||
| Finally, you can build, check, and install Grid: | Finally, you can build and install Grid: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| make; make check; make install | make; make install | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | ||||||
| @@ -148,7 +121,7 @@ If you want to build all the tests at once just use `make tests`. | |||||||
| - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | ||||||
| - `--enable-precision={single|double}`: set the default precision (default: `double`). | - `--enable-precision={single|double}`: set the default precision (default: `double`). | ||||||
| - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | ||||||
| - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). | - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). | ||||||
| - `--disable-timers`: disable system dependent high-resolution timers. | - `--disable-timers`: disable system dependent high-resolution timers. | ||||||
| - `--enable-chroma`: enable Chroma regression tests. | - `--enable-chroma`: enable Chroma regression tests. | ||||||
| - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | ||||||
| @@ -162,6 +135,7 @@ The following options can be use with the `--enable-comms=` option to target dif | |||||||
| | `none`         | no communications                                             | | | `none`         | no communications                                             | | ||||||
| | `mpi[-auto]`   | MPI communications                                            | | | `mpi[-auto]`   | MPI communications                                            | | ||||||
| | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | ||||||
|  | | `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | | ||||||
| | `shmem `       | Cray SHMEM communications                                     | | | `shmem `       | Cray SHMEM communications                                     | | ||||||
|  |  | ||||||
| For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | ||||||
| @@ -179,13 +153,13 @@ The following options can be use with the `--enable-simd=` option to target diff | |||||||
| | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | ||||||
| | `AVX2`      | AVX 2 (256 bit)                        | | | `AVX2`      | AVX 2 (256 bit)                        | | ||||||
| | `AVX512`    | AVX 512 bit                            | | | `AVX512`    | AVX 512 bit                            | | ||||||
| | `NEONv8`    | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit)                     | | | `QPX`       | QPX (256 bit)                          | | ||||||
| | `QPX`       | IBM QPX (256 bit)                      | |  | ||||||
|  |  | ||||||
| Alternatively, some CPU codenames can be directly used: | Alternatively, some CPU codenames can be directly used: | ||||||
|  |  | ||||||
| | `<code>`    | Description                            | | | `<code>`    | Description                            | | ||||||
| | ----------- | -------------------------------------- | | | ----------- | -------------------------------------- | | ||||||
|  | | `KNC`       | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | | ||||||
| | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | ||||||
| | `BGQ`       | Blue Gene/Q                            | | | `BGQ`       | Blue Gene/Q                            | | ||||||
|  |  | ||||||
| @@ -203,204 +177,20 @@ The following configuration is recommended for the Intel Knights Landing platfor | |||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi-auto \ |              --enable-comms=mpi-auto \ | ||||||
|  |              --with-gmp=<path>        \ | ||||||
|  |              --with-mpfr=<path>       \ | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=icpc MPICXX=mpiicpc |              CXX=icpc MPICXX=mpiicpc | ||||||
| ``` | ``` | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi       \ |              --enable-comms=mpi       \ | ||||||
|  |              --with-gmp=<path>        \ | ||||||
|  |              --with-mpfr=<path>       \ | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=CC CC=cc |              CXX=CC CC=cc | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| Knight's Landing with Intel Omnipath adapters with two adapters per node  |  | ||||||
| presently performs better with use of more than one rank per node, using shared memory  |  | ||||||
| for interior communication. This is the mpi3 communications implementation.  |  | ||||||
| We recommend four ranks per node for best performance, but optimum is local volume dependent. |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=KNL        \ |  | ||||||
|              --enable-comms=mpi3-auto \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CC=icpc MPICXX=mpiicpc  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| ### Build setup for Intel Haswell Xeon platform |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the Intel Haswell platform: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3-auto \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=icpc MPICXX=mpiicpc |  | ||||||
| ``` |  | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=CC CC=cc |  | ||||||
| ``` |  | ||||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  |  | ||||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using |  | ||||||
| ``` |  | ||||||
|         export I_MPI_PIN=1 |  | ||||||
| ``` |  | ||||||
| This is the default. |  | ||||||
|  |  | ||||||
| ### Build setup for Intel Skylake Xeon platform |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the Intel Skylake platform: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX512     \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=mpiicpc |  | ||||||
| ``` |  | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX512     \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=CC CC=cc |  | ||||||
| ``` |  | ||||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  |  | ||||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using |  | ||||||
| ```  |  | ||||||
|         export I_MPI_PIN=1 |  | ||||||
| ``` |  | ||||||
| This is the default.  |  | ||||||
|  |  | ||||||
| #### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):  |  | ||||||
|  |  | ||||||
| mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18  |  | ||||||
|  |  | ||||||
| TBA |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Build setup for AMD EPYC / RYZEN |  | ||||||
|  |  | ||||||
| The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. |  | ||||||
| So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total |  | ||||||
| are common. Each chip within the module exposes a separate NUMA domain. |  | ||||||
| There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. |  | ||||||
| MPI-3 is recommended with the use of four ranks per socket, |  | ||||||
| and 8 threads per rank.  |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the AMD EPYC platform. |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3 \ |  | ||||||
|              CXX=mpicxx  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. |  | ||||||
| This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.  |  | ||||||
|  |  | ||||||
| It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and |  | ||||||
| shared memory to communicate within this node: |  | ||||||
|  |  | ||||||
| mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4  |  | ||||||
|  |  | ||||||
| Where omp_bind.sh does the following: |  | ||||||
| ``` |  | ||||||
| #!/bin/bash |  | ||||||
|  |  | ||||||
| numanode=` expr $PMI_RANK % 8 ` |  | ||||||
| basecore=`expr $numanode \* 16` |  | ||||||
| core0=`expr $basecore + 0 ` |  | ||||||
| core1=`expr $basecore + 2 ` |  | ||||||
| core2=`expr $basecore + 4 ` |  | ||||||
| core3=`expr $basecore + 6 ` |  | ||||||
| core4=`expr $basecore + 8 ` |  | ||||||
| core5=`expr $basecore + 10 ` |  | ||||||
| core6=`expr $basecore + 12 ` |  | ||||||
| core7=`expr $basecore + 14 ` |  | ||||||
|  |  | ||||||
| export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" |  | ||||||
| echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY |  | ||||||
|  |  | ||||||
| $@ |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| Performance: |  | ||||||
|  |  | ||||||
| #### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):  |  | ||||||
|  |  | ||||||
| mpirun  -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 |  | ||||||
|  |  | ||||||
| TBA |  | ||||||
|  |  | ||||||
| ### Build setup for BlueGene/Q |  | ||||||
|  |  | ||||||
| To be written... |  | ||||||
|  |  | ||||||
| ### Build setup for ARM Neon |  | ||||||
|  |  | ||||||
| To be written... |  | ||||||
|  |  | ||||||
| ### Build setup for laptops, other compilers, non-cluster builds |  | ||||||
|  |  | ||||||
| Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), |  | ||||||
| and omit the enable-mkl flag.  |  | ||||||
|  |  | ||||||
| Single node builds are enabled with  |  | ||||||
| ``` |  | ||||||
|             --enable-comms=none |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| FFTW support that is not in the default search path may then enabled with |  | ||||||
| ``` |  | ||||||
|     --with-fftw=<installpath> |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										70
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										70
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,35 +1,6 @@ | |||||||
| TODO: | TODO: | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
| Large item work list: |  | ||||||
|  |  | ||||||
| 1)- BG/Q port and check |  | ||||||
| 2)- Christoph's local basis expansion Lanczos |  | ||||||
| 3)- Precision conversion and sort out localConvert      <-- partial |  | ||||||
|  |  | ||||||
|   - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet |  | ||||||
| 4)- Physical propagator interface |  | ||||||
| 5)- Conserved currents |  | ||||||
| 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations |  | ||||||
| 7)- HDCR resume |  | ||||||
|  |  | ||||||
| Recent DONE  |  | ||||||
|  |  | ||||||
| -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O.  <--- DONE |  | ||||||
| -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE |  | ||||||
| -- GaugeFix into central location                      <-- DONE |  | ||||||
| -- Scidac and Ildg metadata handling                   <-- DONE |  | ||||||
| -- Binary I/O MPI2 IO                                  <-- DONE |  | ||||||
| -- Binary I/O speed up & x-strips                      <-- DONE |  | ||||||
| -- Cut down the exterior overhead                      <-- DONE |  | ||||||
| -- Interior legs from SHM comms                        <-- DONE |  | ||||||
| -- Half-precision comms                                <-- DONE |  | ||||||
| -- Merge high precision reduction into develop         <-- DONE |  | ||||||
| -- BlockCG, BCGrQ                                      <-- DONE |  | ||||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE |  | ||||||
|    -- slice* linalg routines for multiRHS, BlockCG     |  | ||||||
|  |  | ||||||
| ----- |  | ||||||
| * Forces; the UdSdU  term in gauge force term is half of what I think it should | * Forces; the UdSdU  term in gauge force term is half of what I think it should | ||||||
|   be. This is a consequence of taking ONLY the first term in: |   be. This is a consequence of taking ONLY the first term in: | ||||||
|  |  | ||||||
| @@ -50,8 +21,16 @@ Recent DONE | |||||||
|   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. |   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. | ||||||
|   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. |   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Policies: | ||||||
|  |  | ||||||
|  | * Link smearing/boundary conds; Policy class based implementation ; framework more in place | ||||||
|  |  | ||||||
| * Support different boundary conditions (finite temp, chem. potential ... ) | * Support different boundary conditions (finite temp, chem. potential ... ) | ||||||
|  |  | ||||||
|  | * Support different fermion representations?  | ||||||
|  |   - contained entirely within the integrator presently | ||||||
|  |  | ||||||
| - Sign of force term. | - Sign of force term. | ||||||
|  |  | ||||||
| - Reversibility test. | - Reversibility test. | ||||||
| @@ -62,6 +41,11 @@ Recent DONE | |||||||
|  |  | ||||||
| - Audit oIndex usage for cb behaviour | - Audit oIndex usage for cb behaviour | ||||||
|  |  | ||||||
|  | - Rectangle gauge actions. | ||||||
|  |   Iwasaki, | ||||||
|  |   Symanzik, | ||||||
|  |   ... etc... | ||||||
|  |  | ||||||
| - Prepare multigrid for HMC. - Alternate setup schemes. | - Prepare multigrid for HMC. - Alternate setup schemes. | ||||||
|  |  | ||||||
| - Support for ILDG --- ugly, not done | - Support for ILDG --- ugly, not done | ||||||
| @@ -71,11 +55,9 @@ Recent DONE | |||||||
| - FFTnD ? | - FFTnD ? | ||||||
|  |  | ||||||
| - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | ||||||
|  |  | ||||||
| - Gparity force term; Gparity (R)HMC. | - Gparity force term; Gparity (R)HMC. | ||||||
|  | - Random number state save restore | ||||||
| - Mobius implementation clean up to rmove #if 0 stale code sequences | - Mobius implementation clean up to rmove #if 0 stale code sequences | ||||||
|  |  | ||||||
| - CG -- profile carefully, kernel fusion, whole CG performance measurements. | - CG -- profile carefully, kernel fusion, whole CG performance measurements. | ||||||
|  |  | ||||||
| ================================================================ | ================================================================ | ||||||
| @@ -108,7 +90,6 @@ Insert/Extract | |||||||
| Not sure of status of this -- reverify. Things are working nicely now though. | Not sure of status of this -- reverify. Things are working nicely now though. | ||||||
|  |  | ||||||
| * Make the Tensor types and Complex etc... play more nicely. | * Make the Tensor types and Complex etc... play more nicely. | ||||||
|  |  | ||||||
|   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > |   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > | ||||||
|     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I |     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I | ||||||
|     want to introduce a syntax that does not require this. |     want to introduce a syntax that does not require this. | ||||||
| @@ -131,8 +112,6 @@ Not sure of status of this -- reverify. Things are working nicely now though. | |||||||
| RECENT | RECENT | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
|   - Support different fermion representations? -- DONE |  | ||||||
|   - contained entirely within the integrator presently |  | ||||||
|   - Clean up HMC                                                             -- DONE |   - Clean up HMC                                                             -- DONE | ||||||
|   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE |   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE | ||||||
|   - Simplified the integrators a bit.                                        -- DONE |   - Simplified the integrators a bit.                                        -- DONE | ||||||
| @@ -144,26 +123,6 @@ RECENT | |||||||
|   - Parallel io improvements                                  -- DONE |   - Parallel io improvements                                  -- DONE | ||||||
|   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE |   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE | ||||||
|  |  | ||||||
|  |  | ||||||
| DONE: |  | ||||||
| - MultiArray -- MultiRHS done |  | ||||||
| - ConjugateGradientMultiShift -- DONE |  | ||||||
| - MCR                         -- DONE |  | ||||||
| - Remez -- Mike or Boost?     -- DONE |  | ||||||
| - Proto (ET)                  -- DONE |  | ||||||
| - uBlas                       -- DONE ; Eigen |  | ||||||
| - Potentially Useful Boost libraries -- DONE ; Eigen |  | ||||||
| - Aligned allocator; memory pool -- DONE |  | ||||||
| - Multiprecision              -- DONE |  | ||||||
| - Serialization               -- DONE |  | ||||||
| - Regex -- Not needed |  | ||||||
| - Tokenize -- Why? |  | ||||||
|  |  | ||||||
| - Random number state save restore -- DONE |  | ||||||
| - Rectangle gauge actions. -- DONE |  | ||||||
|   Iwasaki, |  | ||||||
|   Symanzik, |  | ||||||
|   ... etc... |  | ||||||
| Done: Cayley, Partial , ContFrac force terms. | Done: Cayley, Partial , ContFrac force terms. | ||||||
|  |  | ||||||
| DONE | DONE | ||||||
| @@ -248,7 +207,6 @@ Done | |||||||
| FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | ||||||
| ====================================================================================================== | ====================================================================================================== | ||||||
|  |  | ||||||
| * Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE |  | ||||||
| * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | ||||||
|   user pass these? Is this a QCD specific? |   user pass these? Is this a QCD specific? | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								VERSION
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								VERSION
									
									
									
									
									
								
							| @@ -1,5 +1,6 @@ | |||||||
| Version : 0.7.0 | Version : 0.6.0 | ||||||
|  |  | ||||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended | - AVX512, AVX2, AVX, SSE good | ||||||
| - MPI and MPI3 comms optimisations for KNL and OPA finished | - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above | ||||||
| - Half precision comms | - MPI and MPI3 | ||||||
|  | - HiRep, Smearing, Generic gauge group | ||||||
|   | |||||||
| @@ -1,800 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_memory_bandwidth.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Grid::QCD; |  | ||||||
|  |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| std::vector<int> L_list; |  | ||||||
| std::vector<int> Ls_list; |  | ||||||
| std::vector<double> mflop_list; |  | ||||||
|  |  | ||||||
| double mflop_ref; |  | ||||||
| double mflop_ref_err; |  | ||||||
|  |  | ||||||
| int NN_global; |  | ||||||
|  |  | ||||||
| struct time_statistics{ |  | ||||||
|   double mean; |  | ||||||
|   double err; |  | ||||||
|   double min; |  | ||||||
|   double max; |  | ||||||
|  |  | ||||||
|   void statistics(std::vector<double> v){ |  | ||||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); |  | ||||||
|       mean = sum / v.size(); |  | ||||||
|  |  | ||||||
|       std::vector<double> diff(v.size()); |  | ||||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); |  | ||||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); |  | ||||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); |  | ||||||
|  |  | ||||||
|       auto result = std::minmax_element(v.begin(), v.end()); |  | ||||||
|       min = *result.first; |  | ||||||
|       max = *result.second; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| void comms_header(){ |  | ||||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" |  | ||||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| Gamma::Algebra Gmu [] = { |  | ||||||
|   Gamma::Algebra::GammaX, |  | ||||||
|   Gamma::Algebra::GammaY, |  | ||||||
|   Gamma::Algebra::GammaZ, |  | ||||||
|   Gamma::Algebra::GammaT |  | ||||||
| }; |  | ||||||
| struct controls { |  | ||||||
|   int Opt; |  | ||||||
|   int CommsOverlap; |  | ||||||
|   Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; |  | ||||||
|   //  int HugePages; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class Benchmark { |  | ||||||
| public: |  | ||||||
|   static void Decomposition (void ) { |  | ||||||
|  |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; |  | ||||||
|     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvReal          : "<<sizeof(vReal )*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvRealD         : "<<sizeof(vRealD)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplex       : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplexF      : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static void Comms(void) |  | ||||||
|   { |  | ||||||
|     int Nloop=200; |  | ||||||
|     int nmu=0; |  | ||||||
|     int maxlat=32; |  | ||||||
|  |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|  |  | ||||||
|     for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |  | ||||||
|  |  | ||||||
|     std::vector<double> t_time(Nloop); |  | ||||||
|     time_statistics timestat; |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|     comms_header(); |  | ||||||
|  |  | ||||||
|     for(int lat=4;lat<=maxlat;lat+=4){ |  | ||||||
|       for(int Ls=8;Ls<=8;Ls*=2){ |  | ||||||
|  |  | ||||||
| 	std::vector<int> latt_size  ({lat*mpi_layout[0], |  | ||||||
| 	      lat*mpi_layout[1], |  | ||||||
| 	      lat*mpi_layout[2], |  | ||||||
| 	      lat*mpi_layout[3]}); |  | ||||||
|  |  | ||||||
| 	GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
| 	RealD Nrank = Grid._Nprocessors; |  | ||||||
| 	RealD Nnode = Grid.NodeCount(); |  | ||||||
| 	RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
| 	std::vector<HalfSpinColourVectorD *> xbuf(8); |  | ||||||
| 	std::vector<HalfSpinColourVectorD *> rbuf(8); |  | ||||||
| 	Grid.ShmBufferFreeAll(); |  | ||||||
| 	for(int d=0;d<8;d++){ |  | ||||||
| 	  xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |  | ||||||
| 	int ncomm; |  | ||||||
| 	double dbytes; |  | ||||||
| 	std::vector<double> times(Nloop); |  | ||||||
| 	for(int i=0;i<Nloop;i++){ |  | ||||||
|  |  | ||||||
| 	  double start=usecond(); |  | ||||||
|  |  | ||||||
| 	  dbytes=0; |  | ||||||
| 	  ncomm=0; |  | ||||||
|  |  | ||||||
| 	  parallel_for(int dir=0;dir<8;dir++){ |  | ||||||
|  |  | ||||||
| 	    double tbytes; |  | ||||||
| 	    int mu =dir % 4; |  | ||||||
|  |  | ||||||
| 	    if (mpi_layout[mu]>1 ) { |  | ||||||
| 	         |  | ||||||
| 	      int xmit_to_rank; |  | ||||||
| 	      int recv_from_rank; |  | ||||||
| 	      if ( dir == mu ) {  |  | ||||||
| 		int comm_proc=1; |  | ||||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	      } else {  |  | ||||||
| 		int comm_proc = mpi_layout[mu]-1; |  | ||||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	      } |  | ||||||
| 	      tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, |  | ||||||
| 						 (void *)&rbuf[dir][0], recv_from_rank, |  | ||||||
| 						 bytes,dir); |  | ||||||
| 	   |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp atomic |  | ||||||
| #endif |  | ||||||
| 	      ncomm++; |  | ||||||
|  |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp atomic |  | ||||||
| #endif |  | ||||||
| 	      dbytes+=tbytes; |  | ||||||
| 	    } |  | ||||||
| 	  } |  | ||||||
| 	  Grid.Barrier(); |  | ||||||
| 	  double stop=usecond(); |  | ||||||
| 	  t_time[i] = stop-start; // microseconds |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	//	for(int i=0;i<t_time.size();i++){ |  | ||||||
| 	//	  std::cout << i<<" "<<t_time[i]<<std::endl; |  | ||||||
| 	//	} |  | ||||||
|  |  | ||||||
| 	dbytes=dbytes*ppn; |  | ||||||
| 	double xbytes    = dbytes*0.5; |  | ||||||
| 	double rbytes    = dbytes*0.5; |  | ||||||
| 	double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
| 		 <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
| 		 <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
| 		 <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
| 		 << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
| 		 << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|   |  | ||||||
| 	 |  | ||||||
| 	    } |  | ||||||
|     }     |  | ||||||
|  |  | ||||||
|     return; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static void Memory(void) |  | ||||||
|   { |  | ||||||
|     const int Nvec=8; |  | ||||||
|     typedef Lattice< iVector< vReal,Nvec> > LatticeVec; |  | ||||||
|     typedef iVector<vReal,Nvec> Vec; |  | ||||||
|  |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |  | ||||||
|    |  | ||||||
|     uint64_t NP; |  | ||||||
|     uint64_t NN; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   uint64_t lmax=48; |  | ||||||
| #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) |  | ||||||
|  |  | ||||||
|     GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |  | ||||||
|     for(int lat=8;lat<=lmax;lat+=4){ |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |  | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|  |  | ||||||
|       NP= Grid.RankCount(); |  | ||||||
|       NN =Grid.NodeCount(); |  | ||||||
|  |  | ||||||
|       Vec rn ; random(sRNG,rn); |  | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); z=rn; |  | ||||||
|       LatticeVec x(&Grid); x=rn; |  | ||||||
|       LatticeVec y(&Grid); y=rn; |  | ||||||
|       double a=2.0; |  | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |  | ||||||
|  |  | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
| 	z=a*x-y; |  | ||||||
|         x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away |  | ||||||
|         y._odata[4]=z._odata[4]; |  | ||||||
|       } |  | ||||||
|       double stop=usecond(); |  | ||||||
|       double time = (stop-start)/Nloop*1000; |  | ||||||
|       |  | ||||||
|       double flops=vol*Nvec*2;// mul,add |  | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |  | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3)  |  | ||||||
| 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000. |  | ||||||
| 	       << "\t\t"<< bytes/time/NN <<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   static double DWF5(int Ls,int L) |  | ||||||
|   { |  | ||||||
|     RealD mass=0.1; |  | ||||||
|     RealD M5  =1.8; |  | ||||||
|  |  | ||||||
|     double mflops; |  | ||||||
|     double mflops_best = 0; |  | ||||||
|     double mflops_worst= 0; |  | ||||||
|     std::vector<double> mflops_all; |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     // Set/Get the layout & grid size |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); |  | ||||||
|     std::vector<int> local({L,L,L,L}); |  | ||||||
|  |  | ||||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  |  | ||||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     uint64_t NP = TmpGrid->RankCount(); |  | ||||||
|     uint64_t NN = TmpGrid->NodeCount(); |  | ||||||
|     NN_global=NN; |  | ||||||
|     uint64_t SHM=NP/NN; |  | ||||||
|  |  | ||||||
|     std::vector<int> internal; |  | ||||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); |  | ||||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); |  | ||||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); |  | ||||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); |  | ||||||
|     else assert(0); |  | ||||||
|  |  | ||||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); |  | ||||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); |  | ||||||
|  |  | ||||||
|     ///////// Welcome message //////////// |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Lattice Init //////////// |  | ||||||
|     GridCartesian         * UGrid    = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * UrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|     GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); |  | ||||||
|     GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); |  | ||||||
|     GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|     ///////// RNG Init //////////// |  | ||||||
|     std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|     std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|     GridParallelRNG          RNG5(sFGrid);  RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Source preparation //////////// |  | ||||||
|     LatticeFermion src   (sFGrid); random(RNG5,src); |  | ||||||
|     LatticeFermion tmp   (sFGrid); |  | ||||||
|  |  | ||||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|     src = src*N2; |  | ||||||
|      |  | ||||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  |  | ||||||
|  |  | ||||||
|     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); |  | ||||||
|     LatticeFermion src_e (sFrbGrid); |  | ||||||
|     LatticeFermion src_o (sFrbGrid); |  | ||||||
|     LatticeFermion r_e   (sFrbGrid); |  | ||||||
|     LatticeFermion r_o   (sFrbGrid); |  | ||||||
|     LatticeFermion r_eo  (sFGrid); |  | ||||||
|     LatticeFermion err   (sFGrid); |  | ||||||
|     { |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,src); |  | ||||||
|       pickCheckerboard(Odd,src_o,src); |  | ||||||
|  |  | ||||||
| #if defined(AVX512)  |  | ||||||
|       const int num_cases = 6; |  | ||||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #else |  | ||||||
|       const int num_cases = 4; |  | ||||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #endif |  | ||||||
|       controls Cases [] = { |  | ||||||
| #ifdef AVX512 |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| #endif |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } |  | ||||||
|       };  |  | ||||||
|  |  | ||||||
|       for(int c=0;c<num_cases;c++) { |  | ||||||
|  |  | ||||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; |  | ||||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; |  | ||||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |  | ||||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
| 	int nwarm = 100; |  | ||||||
| 	uint64_t ncall = 1000; |  | ||||||
|  |  | ||||||
| 	double t0=usecond(); |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	for(int i=0;i<nwarm;i++){ |  | ||||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	} |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	double t1=usecond(); |  | ||||||
|  |  | ||||||
| 	sDw.ZeroCounters(); |  | ||||||
| 	time_statistics timestat; |  | ||||||
| 	std::vector<double> t_time(ncall); |  | ||||||
| 	for(uint64_t i=0;i<ncall;i++){ |  | ||||||
| 	  t0=usecond(); |  | ||||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	  t1=usecond(); |  | ||||||
| 	  t_time[i] = t1-t0; |  | ||||||
| 	} |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	 |  | ||||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
| 	double flops=(1344.0*volume)/2; |  | ||||||
| 	double mf_hi, mf_lo, mf_err; |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	mf_hi = flops/timestat.min; |  | ||||||
| 	mf_lo = flops/timestat.max; |  | ||||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; |  | ||||||
|  |  | ||||||
| 	mflops = flops/timestat.mean; |  | ||||||
| 	mflops_all.push_back(mflops); |  | ||||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; |  | ||||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; |  | ||||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; |  | ||||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank   "<< mflops/NP<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node   "<< mflops/NN<<std::endl; |  | ||||||
|  |  | ||||||
| 	sDw.Report(); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|       double robust = mflops_worst/mflops_best;; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |  | ||||||
|       std::cout<<GridLogMessage; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<mflops_all.size();i++){ |  | ||||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; |  | ||||||
|       } |  | ||||||
|       std::cout<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|     return mflops_best; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static double DWF(int Ls,int L, double & robust) |  | ||||||
|   { |  | ||||||
|     RealD mass=0.1; |  | ||||||
|     RealD M5  =1.8; |  | ||||||
|  |  | ||||||
|     double mflops; |  | ||||||
|     double mflops_best = 0; |  | ||||||
|     double mflops_worst= 0; |  | ||||||
|     std::vector<double> mflops_all; |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     // Set/Get the layout & grid size |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); |  | ||||||
|     std::vector<int> local({L,L,L,L}); |  | ||||||
|  |  | ||||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  |  | ||||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     uint64_t NP = TmpGrid->RankCount(); |  | ||||||
|     uint64_t NN = TmpGrid->NodeCount(); |  | ||||||
|     NN_global=NN; |  | ||||||
|     uint64_t SHM=NP/NN; |  | ||||||
|  |  | ||||||
|     std::vector<int> internal; |  | ||||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); |  | ||||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); |  | ||||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); |  | ||||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); |  | ||||||
|     else assert(0); |  | ||||||
|  |  | ||||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); |  | ||||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); |  | ||||||
|  |  | ||||||
|     ///////// Welcome message //////////// |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     ///////// Lattice Init //////////// |  | ||||||
|     GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|      |  | ||||||
|     ///////// RNG Init //////////// |  | ||||||
|     std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|     std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Source preparation //////////// |  | ||||||
|     LatticeFermion src   (FGrid); random(RNG5,src); |  | ||||||
|     LatticeFermion ref   (FGrid); |  | ||||||
|     LatticeFermion tmp   (FGrid); |  | ||||||
|  |  | ||||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|     src = src*N2; |  | ||||||
|      |  | ||||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  |  | ||||||
|  |  | ||||||
|     DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|  |  | ||||||
|     //////////////////////////////////// |  | ||||||
|     // Naive wilson implementation |  | ||||||
|     //////////////////////////////////// |  | ||||||
|     { |  | ||||||
|       LatticeGaugeField Umu5d(FGrid);  |  | ||||||
|       std::vector<LatticeColourMatrix> U(4,FGrid); |  | ||||||
|       for(int ss=0;ss<Umu._grid->oSites();ss++){ |  | ||||||
| 	for(int s=0;s<Ls;s++){ |  | ||||||
| 	  Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|       ref = zero; |  | ||||||
|       for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); |  | ||||||
|       } |  | ||||||
|       for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	 |  | ||||||
| 	tmp = U[mu]*Cshift(src,mu+1,1); |  | ||||||
| 	ref=ref + tmp - Gamma(Gmu[mu])*tmp; |  | ||||||
| 	 |  | ||||||
| 	tmp =adj(U[mu])*src; |  | ||||||
| 	tmp =Cshift(tmp,mu+1,-1); |  | ||||||
| 	ref=ref + tmp + Gamma(Gmu[mu])*tmp; |  | ||||||
|       } |  | ||||||
|       ref = -0.5*ref; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeFermion src_e (FrbGrid); |  | ||||||
|     LatticeFermion src_o (FrbGrid); |  | ||||||
|     LatticeFermion r_e   (FrbGrid); |  | ||||||
|     LatticeFermion r_o   (FrbGrid); |  | ||||||
|     LatticeFermion r_eo  (FGrid); |  | ||||||
|     LatticeFermion err   (FGrid); |  | ||||||
|     { |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,src); |  | ||||||
|       pickCheckerboard(Odd,src_o,src); |  | ||||||
|  |  | ||||||
| #if defined(AVX512)  |  | ||||||
|       const int num_cases = 6; |  | ||||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #else |  | ||||||
|       const int num_cases = 4; |  | ||||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #endif |  | ||||||
|       controls Cases [] = { |  | ||||||
| #ifdef AVX512 |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| #endif |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } |  | ||||||
|       };  |  | ||||||
|  |  | ||||||
|       for(int c=0;c<num_cases;c++) { |  | ||||||
|  |  | ||||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; |  | ||||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; |  | ||||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |  | ||||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
| 	int nwarm = 200; |  | ||||||
| 	double t0=usecond(); |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	for(int i=0;i<nwarm;i++){ |  | ||||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	} |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	double t1=usecond(); |  | ||||||
| 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); |  | ||||||
| 	//	if (ncall < 500) ncall = 500; |  | ||||||
| 	uint64_t ncall = 1000; |  | ||||||
|  |  | ||||||
| 	FGrid->Broadcast(0,&ncall,sizeof(ncall)); |  | ||||||
|  |  | ||||||
| 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; |  | ||||||
| 	Dw.ZeroCounters(); |  | ||||||
|  |  | ||||||
| 	time_statistics timestat; |  | ||||||
| 	std::vector<double> t_time(ncall); |  | ||||||
| 	for(uint64_t i=0;i<ncall;i++){ |  | ||||||
| 	  t0=usecond(); |  | ||||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	  t1=usecond(); |  | ||||||
| 	  t_time[i] = t1-t0; |  | ||||||
| 	} |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	 |  | ||||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
| 	double flops=(1344.0*volume)/2; |  | ||||||
| 	double mf_hi, mf_lo, mf_err; |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	mf_hi = flops/timestat.min; |  | ||||||
| 	mf_lo = flops/timestat.max; |  | ||||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; |  | ||||||
|  |  | ||||||
| 	mflops = flops/timestat.mean; |  | ||||||
| 	mflops_all.push_back(mflops); |  | ||||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; |  | ||||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; |  | ||||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; |  | ||||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl; |  | ||||||
|  |  | ||||||
| 	Dw.Report(); |  | ||||||
|  |  | ||||||
| 	Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	Dw.DhopOE(src_e,r_o,DaggerNo); |  | ||||||
| 	setCheckerboard(r_eo,r_o); |  | ||||||
| 	setCheckerboard(r_eo,r_e); |  | ||||||
| 	err = r_eo-ref;  |  | ||||||
| 	std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |  | ||||||
| 	assert((norm2(err)<1.0e-4)); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|       robust = mflops_worst/mflops_best; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust  <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |  | ||||||
|       std::cout<<GridLogMessage ; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<mflops_all.size();i++){ |  | ||||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; |  | ||||||
|       } |  | ||||||
|       std::cout<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|     return mflops_best; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); |  | ||||||
| #ifdef KNL |  | ||||||
|   LebesgueOrder::Block = std::vector<int>({8,2,2,2}); |  | ||||||
| #else |  | ||||||
|   LebesgueOrder::Block = std::vector<int>({2,2,2,2}); |  | ||||||
| #endif |  | ||||||
|   Benchmark::Decomposition(); |  | ||||||
|  |  | ||||||
|   int do_memory=1; |  | ||||||
|   int do_comms =1; |  | ||||||
|   int do_su3   =0; |  | ||||||
|   int do_wilson=1; |  | ||||||
|   int do_dwf   =1; |  | ||||||
|  |  | ||||||
|   if ( do_su3 ) { |  | ||||||
|     // empty for now |  | ||||||
|   } |  | ||||||
| #if 1 |  | ||||||
|   int sel=2; |  | ||||||
|   std::vector<int> L_list({8,12,16,24}); |  | ||||||
| #else |  | ||||||
|   int sel=1; |  | ||||||
|   std::vector<int> L_list({8,12}); |  | ||||||
| #endif |  | ||||||
|   int selm1=sel-1; |  | ||||||
|   std::vector<double> robust_list; |  | ||||||
|  |  | ||||||
|   std::vector<double> wilson; |  | ||||||
|   std::vector<double> dwf4; |  | ||||||
|   std::vector<double> dwf5; |  | ||||||
|  |  | ||||||
|   if ( do_wilson ) { |  | ||||||
|     int Ls=1; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       double robust; |  | ||||||
|       wilson.push_back(Benchmark::DWF(1,L_list[l],robust)); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int Ls=16; |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       double robust; |  | ||||||
|       double result = Benchmark::DWF(Ls,L_list[l],robust) ; |  | ||||||
|       dwf4.push_back(result); |  | ||||||
|       robust_list.push_back(robust); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl; |  | ||||||
|   for(int l=0;l<L_list.size();l++){ |  | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; |  | ||||||
|   } |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int NN=NN_global; |  | ||||||
|   if ( do_memory ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Memory(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_comms && (NN>1) ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Comms(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; |  | ||||||
|   for(int l=0;l<L_list.size();l++){ |  | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; |  | ||||||
|   } |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point     result: "  << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl; |  | ||||||
|   std::cout<<std::setprecision(3); |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point robustness: "  << robust_list[sel] <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
| } |  | ||||||
| @@ -31,32 +31,6 @@ using namespace std; | |||||||
| using namespace Grid; | using namespace Grid; | ||||||
| using namespace Grid::QCD; | using namespace Grid::QCD; | ||||||
|  |  | ||||||
| struct time_statistics{ |  | ||||||
|   double mean; |  | ||||||
|   double err; |  | ||||||
|   double min; |  | ||||||
|   double max; |  | ||||||
|  |  | ||||||
|   void statistics(std::vector<double> v){ |  | ||||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); |  | ||||||
|       mean = sum / v.size(); |  | ||||||
|  |  | ||||||
|       std::vector<double> diff(v.size()); |  | ||||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); |  | ||||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); |  | ||||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); |  | ||||||
|  |  | ||||||
|       auto result = std::minmax_element(v.begin(), v.end()); |  | ||||||
|       min = *result.first; |  | ||||||
|       max = *result.second; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| void header(){ |  | ||||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" |  | ||||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| @@ -66,21 +40,17 @@ int main (int argc, char ** argv) | |||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   int Nloop=100; |   int Nloop=10; | ||||||
|   int nmu=0; |   int nmu=0; | ||||||
|   int maxlat=32; |  | ||||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; |  | ||||||
|   std::vector<double> t_time(Nloop); |  | ||||||
|   time_statistics timestat; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   int maxlat=16; | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |   for(int lat=4;lat<=maxlat;lat+=2){ | ||||||
|  |     for(int Ls=1;Ls<=16;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -88,23 +58,15 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);	 |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|       for(int mu=0;mu<8;mu++){ |  | ||||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| @@ -117,6 +79,7 @@ int main (int argc, char ** argv) | |||||||
| 	    int comm_proc=1; | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
|  | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    Grid.SendToRecvFromBegin(requests, | 	    Grid.SendToRecvFromBegin(requests, | ||||||
| 				   (void *)&xbuf[mu][0], | 				   (void *)&xbuf[mu][0], | ||||||
| @@ -139,24 +102,18 @@ int main (int argc, char ** argv) | |||||||
| 	} | 	} | ||||||
| 	Grid.SendToRecvFromComplete(requests); | 	Grid.SendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|       } |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|       double dbytes    = bytes*ppn; |  | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |  | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |       double time = stop-start; // microseconds | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
| @@ -164,32 +121,25 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |  | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |   for(int lat=4;lat<=maxlat;lat+=2){ | ||||||
|  |     for(int Ls=1;Ls<=16;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); |       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  |  | ||||||
|       for(int mu=0;mu<8;mu++){ |  | ||||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|      |      | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| @@ -228,37 +178,30 @@ int main (int argc, char ** argv) | |||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double stop=usecond(); | ||||||
|        |        | ||||||
|       double dbytes    = bytes*ppn; |       double dbytes    = bytes; | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |       double time = stop-start; | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }   |   }   | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Nloop=100; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=2){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=1;Ls<=16;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -266,9 +209,6 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -276,86 +216,73 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double dbytes; |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
|  | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { | 	  if (mpi_layout[mu]>1 ) { | ||||||
| 	   | 	   | ||||||
| 	    ncomm++; | 	    ncomm++; | ||||||
| 	    int comm_proc=1; | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
|  | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= |  | ||||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					    (void *)&xbuf[mu][0], | 					    (void *)&xbuf[mu][0], | ||||||
| 					    xmit_to_rank, | 					    xmit_to_rank, | ||||||
| 					    (void *)&rbuf[mu][0], | 					    (void *)&rbuf[mu][0], | ||||||
| 					    recv_from_rank, | 					    recv_from_rank, | ||||||
| 					      bytes,mu); | 					    bytes); | ||||||
| 	 | 	 | ||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= |  | ||||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					    (void *)&xbuf[mu+4][0], | 					    (void *)&xbuf[mu+4][0], | ||||||
| 					    xmit_to_rank, | 					    xmit_to_rank, | ||||||
| 					    (void *)&rbuf[mu+4][0], | 					    (void *)&rbuf[mu+4][0], | ||||||
| 					    recv_from_rank, | 					    recv_from_rank, | ||||||
| 					      bytes,mu+4); | 					    bytes); | ||||||
| 	   | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.StencilSendToRecvFromComplete(requests,0); | 	Grid.StencilSendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  |  | ||||||
|  |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|  |  | ||||||
|       } |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|       timestat.statistics(t_time); |       double rbytes    = xbytes; | ||||||
|  |       double bidibytes = xbytes+rbytes; | ||||||
|       dbytes=dbytes*ppn; |  | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |       double time = stop-start; // microseconds | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Nloop=100; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=2){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=1;Ls<=16;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -363,9 +290,6 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -373,18 +297,16 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|       double dbytes; |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
| @@ -396,146 +318,44 @@ int main (int argc, char ** argv) | |||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= |  | ||||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					    (void *)&xbuf[mu][0], | 					    (void *)&xbuf[mu][0], | ||||||
| 					    xmit_to_rank, | 					    xmit_to_rank, | ||||||
| 					    (void *)&rbuf[mu][0], | 					    (void *)&rbuf[mu][0], | ||||||
| 					    recv_from_rank, | 					    recv_from_rank, | ||||||
| 					      bytes,mu); | 					    bytes); | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu); | 	    //	    Grid.StencilSendToRecvFromComplete(requests); | ||||||
| 	    requests.resize(0); | 	    //	    requests.resize(0); | ||||||
|  |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= |  | ||||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					    (void *)&xbuf[mu+4][0], | 					    (void *)&xbuf[mu+4][0], | ||||||
| 					    xmit_to_rank, | 					    xmit_to_rank, | ||||||
| 					    (void *)&rbuf[mu+4][0], | 					    (void *)&rbuf[mu+4][0], | ||||||
| 					    recv_from_rank, | 					    recv_from_rank, | ||||||
| 					      bytes,mu+4); | 					    bytes); | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu+4); | 	    Grid.StencilSendToRecvFromComplete(requests); | ||||||
| 	    requests.resize(0); | 	    requests.resize(0); | ||||||
| 	   | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  |  | ||||||
|  |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|  |  | ||||||
|       } |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|  |       double rbytes    = xbytes; | ||||||
|  |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double time = stop-start; // microseconds | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |  | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   header(); |  | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |  | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |  | ||||||
|       				    lat*mpi_layout[1], |  | ||||||
|       				    lat*mpi_layout[2], |  | ||||||
|       				    lat*mpi_layout[3]}); |  | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |  | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |  | ||||||
|       Grid.ShmBufferFreeAll(); |  | ||||||
|       for(int d=0;d<8;d++){ |  | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       int ncomm; |  | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |  | ||||||
|       double dbytes; |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
| 	double start=usecond(); |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; |  | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; |  | ||||||
|  |  | ||||||
| 	parallel_for(int dir=0;dir<8;dir++){ |  | ||||||
|  |  | ||||||
| 	  double tbytes; |  | ||||||
| 	  int mu =dir % 4; |  | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { |  | ||||||
| 	   |  | ||||||
| 	    ncomm++; |  | ||||||
| 	    int xmit_to_rank; |  | ||||||
| 	    int recv_from_rank; |  | ||||||
| 	    if ( dir == mu ) {  |  | ||||||
| 	      int comm_proc=1; |  | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	    } else {  |  | ||||||
| 	      int comm_proc = mpi_layout[mu]-1; |  | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	    } |  | ||||||
|  |  | ||||||
| 	    tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, |  | ||||||
| 					       (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); |  | ||||||
|  |  | ||||||
| #pragma omp atomic |  | ||||||
| 	    dbytes+=tbytes; |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
| 	Grid.Barrier(); |  | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |  | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |  | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|   |  | ||||||
|     } |  | ||||||
|   }     |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,6 +1,9 @@ | |||||||
|     /************************************************************************************* |     /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_dwf.cc |     Source file: ./benchmarks/Benchmark_dwf.cc | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| @@ -10,13 +13,16 @@ | |||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or |     the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. |     (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |     This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |     You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| @@ -42,16 +48,16 @@ typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; | |||||||
| typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; | typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; | typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; | ||||||
|  |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |  | ||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   std::vector<int> latt4 = GridDefaultLatt(); |   std::vector<int> latt4 = GridDefaultLatt(); | ||||||
|   const int Ls=16; |   const int Ls=8; | ||||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
| @@ -66,65 +72,34 @@ int main (int argc, char ** argv) | |||||||
|   std::vector<int> seeds4({1,2,3,4}); |   std::vector<int> seeds4({1,2,3,4}); | ||||||
|   std::vector<int> seeds5({5,6,7,8}); |   std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; |  | ||||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||||
|   std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; |  | ||||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); |   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||||
|   std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|   LatticeFermion src   (FGrid); random(RNG5,src); |   LatticeFermion src   (FGrid); random(RNG5,src); | ||||||
| #if 0 |  | ||||||
|   src = zero; |  | ||||||
|   { |  | ||||||
|     std::vector<int> origin({0,0,0,latt4[2]-1,0}); |  | ||||||
|     SpinColourVectorF tmp; |  | ||||||
|     tmp=zero; |  | ||||||
|     tmp()(0)(0)=Complex(-2.0,0.0); |  | ||||||
|     std::cout << " source site 0 " << tmp<<std::endl; |  | ||||||
|     pokeSite(tmp,src,origin); |  | ||||||
|   } |  | ||||||
| #else |  | ||||||
|   RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|   src = src*N2; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   LatticeFermion result(FGrid); result=zero; |   LatticeFermion result(FGrid); result=zero; | ||||||
|   LatticeFermion    ref(FGrid);    ref=zero; |   LatticeFermion    ref(FGrid);    ref=zero; | ||||||
|   LatticeFermion    tmp(FGrid); |   LatticeFermion    tmp(FGrid); | ||||||
|   LatticeFermion    err(FGrid); |   LatticeFermion    err(FGrid); | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "Drawing gauge field" << std::endl; |  | ||||||
|   LatticeGaugeField Umu(UGrid);  |   LatticeGaugeField Umu(UGrid);  | ||||||
|   SU3::HotConfiguration(RNG4,Umu);  |   random(RNG4,Umu); | ||||||
|   std::cout << GridLogMessage << "Random gauge initialised " << std::endl; |  | ||||||
| #if 0 |  | ||||||
|   Umu=1.0; |  | ||||||
|   for(int mu=0;mu<Nd;mu++){ |  | ||||||
|     LatticeColourMatrix ttmp(UGrid); |  | ||||||
|     ttmp = PeekIndex<LorentzIndex>(Umu,mu); |  | ||||||
|     //    if (mu !=2 ) ttmp = 0; |  | ||||||
|     //    ttmp = ttmp* pow(10.0,mu); |  | ||||||
|     PokeIndex<LorentzIndex>(Umu,ttmp,mu); |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "Forced to diagonal " << std::endl; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
|   //////////////////////////////////// |  | ||||||
|   // Naive wilson implementation |  | ||||||
|   //////////////////////////////////// |  | ||||||
|   // replicate across fifth dimension |  | ||||||
|   LatticeGaugeField Umu5d(FGrid);  |   LatticeGaugeField Umu5d(FGrid);  | ||||||
|   std::vector<LatticeColourMatrix> U(4,FGrid); |  | ||||||
|  |   // replicate across fifth dimension | ||||||
|   for(int ss=0;ss<Umu._grid->oSites();ss++){ |   for(int ss=0;ss<Umu._grid->oSites();ss++){ | ||||||
|     for(int s=0;s<Ls;s++){ |     for(int s=0;s<Ls;s++){ | ||||||
|       Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; |       Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   // Naive wilson implementation | ||||||
|  |   //////////////////////////////////// | ||||||
|  |   std::vector<LatticeColourMatrix> U(4,FGrid); | ||||||
|   for(int mu=0;mu<Nd;mu++){ |   for(int mu=0;mu<Nd;mu++){ | ||||||
|     U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); |     U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); | ||||||
|   } |   } | ||||||
|   std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; |  | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { |   { | ||||||
| @@ -145,7 +120,8 @@ int main (int argc, char ** argv) | |||||||
|   RealD M5  =1.8; |   RealD M5  =1.8; | ||||||
|  |  | ||||||
|   RealD NP = UGrid->_Nprocessors; |   RealD NP = UGrid->_Nprocessors; | ||||||
|   RealD NN = UGrid->NodeCount(); |  | ||||||
|  |   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; |   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; | ||||||
| @@ -155,22 +131,15 @@ int main (int argc, char ** argv) | |||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|  |  | ||||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |   int ncall =100; | ||||||
|   int ncall =500; |  | ||||||
|   if (1) { |   if (1) { | ||||||
|     FGrid->Barrier(); |     FGrid->Barrier(); | ||||||
|     Dw.ZeroCounters(); |     Dw.ZeroCounters(); | ||||||
|     Dw.Dhop(src,result,0); |  | ||||||
|     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; |  | ||||||
|     double t0=usecond(); |     double t0=usecond(); | ||||||
|     for(int i=0;i<ncall;i++){ |     for(int i=0;i<ncall;i++){ | ||||||
|       __SSC_START; |       __SSC_START; | ||||||
| @@ -184,55 +153,16 @@ int main (int argc, char ** argv) | |||||||
|     double flops=1344*volume*ncall; |     double flops=1344*volume*ncall; | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     //    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; |     std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; | ||||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |     std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     err = ref-result;  |     err = ref-result;  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
|     /* |  | ||||||
|     if(( norm2(err)>1.0e-4) ) {  |  | ||||||
|       std::cout << "RESULT\n " << result<<std::endl; |  | ||||||
|       std::cout << "REF   \n " << ref   <<std::endl; |  | ||||||
|       std::cout << "ERR   \n " << err   <<std::endl; |  | ||||||
|       FGrid->Barrier(); |  | ||||||
|       exit(-1); |  | ||||||
|     } |  | ||||||
|     */ |  | ||||||
|     assert (norm2(err)< 1.0e-4 ); |     assert (norm2(err)< 1.0e-4 ); | ||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|   if (1) { |  | ||||||
|     FGrid->Barrier(); |  | ||||||
|     DwH.ZeroCounters(); |  | ||||||
|     DwH.Dhop(src,result,0); |  | ||||||
|     double t0=usecond(); |  | ||||||
|     for(int i=0;i<ncall;i++){ |  | ||||||
|       __SSC_START; |  | ||||||
|       DwH.Dhop(src,result,0); |  | ||||||
|       __SSC_STOP; |  | ||||||
|     } |  | ||||||
|     double t1=usecond(); |  | ||||||
|     FGrid->Barrier(); |  | ||||||
|      |  | ||||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
|     double flops=1344*volume*ncall; |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     err = ref-result;  |  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |  | ||||||
|  |  | ||||||
|     assert (norm2(err)< 1.0e-3 ); |  | ||||||
|     DwH.Report(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { |   { | ||||||
|  |  | ||||||
| @@ -241,10 +171,6 @@ int main (int argc, char ** argv) | |||||||
|     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -257,12 +183,20 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); |     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); | ||||||
|    |    | ||||||
|     localConvert(src,ssrc); |     for(int x=0;x<latt4[0];x++){ | ||||||
|  |     for(int y=0;y<latt4[1];y++){ | ||||||
|  |     for(int z=0;z<latt4[2];z++){ | ||||||
|  |     for(int t=0;t<latt4[3];t++){ | ||||||
|  |     for(int s=0;s<Ls;s++){ | ||||||
|  |       std::vector<int> site({s,x,y,z,t}); | ||||||
|  |       SpinColourVector tmp; | ||||||
|  |       peekSite(tmp,src,site); | ||||||
|  |       pokeSite(tmp,ssrc,site); | ||||||
|  |     }}}}} | ||||||
|     std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl; |     std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl; | ||||||
|     FGrid->Barrier(); |     FGrid->Barrier(); | ||||||
|     sDw.Dhop(ssrc,sresult,0); |  | ||||||
|     sDw.ZeroCounters(); |  | ||||||
|     double t0=usecond(); |     double t0=usecond(); | ||||||
|  |     sDw.ZeroCounters(); | ||||||
|     for(int i=0;i<ncall;i++){ |     for(int i=0;i<ncall;i++){ | ||||||
|       __SSC_START; |       __SSC_START; | ||||||
|       sDw.Dhop(ssrc,sresult,0); |       sDw.Dhop(ssrc,sresult,0); | ||||||
| @@ -276,115 +210,9 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; |  | ||||||
|     sDw.Report(); |     sDw.Report(); | ||||||
|     RealD sum=0; |  | ||||||
|  |  | ||||||
|     err=zero; |  | ||||||
|     localConvert(sresult,err); |  | ||||||
|     err = err - ref; |  | ||||||
|     sum = norm2(err); |  | ||||||
|     std::cout<<GridLogMessage<<" difference between normal ref and simd is "<<sum<<std::endl; |  | ||||||
|     if(sum > 1.0e-4 ){ |  | ||||||
|       std::cout<< "sD REF\n " <<ref << std::endl; |  | ||||||
|       std::cout<< "sD ERR   \n " <<err  <<std::endl; |  | ||||||
|     } |  | ||||||
|     //    assert(sum < 1.0e-4); |  | ||||||
|  |  | ||||||
|     err=zero; |  | ||||||
|     localConvert(sresult,err); |  | ||||||
|     err = err - result; |  | ||||||
|     sum = norm2(err); |  | ||||||
|     std::cout<<GridLogMessage<<" difference between normal result and simd is "<<sum<<std::endl; |  | ||||||
|     if(sum > 1.0e-4 ){ |  | ||||||
|       std::cout<< "sD REF\n " <<result << std::endl; |  | ||||||
|       std::cout<< "sD ERR   \n " << err  <<std::endl; |  | ||||||
|     } |  | ||||||
|     assert(sum < 1.0e-4); |  | ||||||
|  |  | ||||||
|      |  | ||||||
|     if(1){ |  | ||||||
|       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |  | ||||||
|       std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |  | ||||||
|       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |  | ||||||
|       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |  | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  |  | ||||||
| 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  |  | ||||||
| 	std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm )  |  | ||||||
| 	std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
|       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |  | ||||||
|  |  | ||||||
|       LatticeFermion sr_eo(sFGrid); |  | ||||||
|       LatticeFermion ssrc_e (sFrbGrid); |  | ||||||
|       LatticeFermion ssrc_o (sFrbGrid); |  | ||||||
|       LatticeFermion sr_e   (sFrbGrid); |  | ||||||
|       LatticeFermion sr_o   (sFrbGrid); |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,ssrc_e,ssrc); |  | ||||||
|       pickCheckerboard(Odd,ssrc_o,ssrc); |  | ||||||
|       //      setCheckerboard(sr_eo,ssrc_o); |  | ||||||
|       //      setCheckerboard(sr_eo,ssrc_e); |  | ||||||
|  |  | ||||||
|       sr_e = zero; |  | ||||||
|       sr_o = zero; |  | ||||||
|  |  | ||||||
|       FGrid->Barrier(); |  | ||||||
|       sDw.DhopEO(ssrc_o, sr_e, DaggerNo); |  | ||||||
|       sDw.ZeroCounters(); |  | ||||||
|       //      sDw.stat.init("DhopEO"); |  | ||||||
|       double t0=usecond(); |  | ||||||
|       for (int i = 0; i < ncall; i++) { |  | ||||||
|         sDw.DhopEO(ssrc_o, sr_e, DaggerNo); |  | ||||||
|       } |  | ||||||
|       double t1=usecond(); |  | ||||||
|       FGrid->Barrier(); |  | ||||||
|       //      sDw.stat.print(); |  | ||||||
|  |  | ||||||
|       double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
|       double flops=(1344.0*volume*ncall)/2; |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|       sDw.Report(); |  | ||||||
|  |  | ||||||
|       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); |  | ||||||
|       sDw.DhopOE(ssrc_e,sr_o,DaggerNo); |  | ||||||
|       sDw.Dhop  (ssrc  ,sresult,DaggerNo); |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,ssrc_e,sresult); |  | ||||||
|       pickCheckerboard(Odd ,ssrc_o,sresult); |  | ||||||
|  |  | ||||||
|       ssrc_e = ssrc_e - sr_e; |  | ||||||
|       RealD error = norm2(ssrc_e); |  | ||||||
|       std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<< "  vec nrm"<<norm2(sr_e) <<std::endl; |  | ||||||
|  |  | ||||||
|       ssrc_o = ssrc_o - sr_o; |  | ||||||
|       error+= norm2(ssrc_o); |  | ||||||
|       std::cout<<GridLogMessage << "sO norm diff   "<< norm2(ssrc_o)<< "  vec nrm"<<norm2(sr_o) <<std::endl; |  | ||||||
|  |  | ||||||
|       if(( error>1.0e-4) ) {  |  | ||||||
| 	setCheckerboard(ssrc,ssrc_o); |  | ||||||
| 	setCheckerboard(ssrc,ssrc_e); |  | ||||||
| 	std::cout<< "DIFF\n " <<ssrc << std::endl; |  | ||||||
| 	setCheckerboard(ssrc,sr_o); |  | ||||||
| 	setCheckerboard(ssrc,sr_e); |  | ||||||
| 	std::cout<< "CBRESULT\n " <<ssrc << std::endl; |  | ||||||
| 	std::cout<< "RESULT\n " <<sresult<< std::endl; |  | ||||||
|       } |  | ||||||
|       assert(error<1.0e-4); |  | ||||||
|     } |  | ||||||
|    |    | ||||||
|     if(0){ |     if(0){ | ||||||
|     std::cout << "Single cache warm call to sDw.Dhop " <<std::endl; |  | ||||||
|       for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ |       for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ | ||||||
| 	sDw.Dhop(ssrc,sresult,0); | 	sDw.Dhop(ssrc,sresult,0); | ||||||
| 	PerformanceCounter Counter(i); | 	PerformanceCounter Counter(i); | ||||||
| @@ -395,9 +223,98 @@ int main (int argc, char ** argv) | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; | ||||||
|  |  | ||||||
|  |     RealD sum=0; | ||||||
|  |     for(int x=0;x<latt4[0];x++){ | ||||||
|  |     for(int y=0;y<latt4[1];y++){ | ||||||
|  |     for(int z=0;z<latt4[2];z++){ | ||||||
|  |     for(int t=0;t<latt4[3];t++){ | ||||||
|  |     for(int s=0;s<Ls;s++){ | ||||||
|  |       std::vector<int> site({s,x,y,z,t}); | ||||||
|  |       SpinColourVector normal, simd; | ||||||
|  |       peekSite(normal,result,site); | ||||||
|  |       peekSite(simd,sresult,site); | ||||||
|  |       sum=sum+norm2(normal-simd); | ||||||
|  |       if (norm2(normal-simd) > 1.0e-6 ) { | ||||||
|  | 	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl; | ||||||
|  | 	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl; | ||||||
|  | 	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd   "<<simd<<std::endl; | ||||||
|  |       } | ||||||
|  |     }}}}} | ||||||
|  |     std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl; | ||||||
|  |     assert (sum< 1.0e-4 ); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     if (1) { | ||||||
|  |  | ||||||
|  |       LatticeFermion sr_eo(sFGrid); | ||||||
|  |  | ||||||
|  |       LatticeFermion ssrc_e (sFrbGrid); | ||||||
|  |       LatticeFermion ssrc_o (sFrbGrid); | ||||||
|  |       LatticeFermion sr_e   (sFrbGrid); | ||||||
|  |       LatticeFermion sr_o   (sFrbGrid); | ||||||
|  |  | ||||||
|  |       pickCheckerboard(Even,ssrc_e,ssrc); | ||||||
|  |       pickCheckerboard(Odd,ssrc_o,ssrc); | ||||||
|  |  | ||||||
|  |       setCheckerboard(sr_eo,ssrc_o); | ||||||
|  |       setCheckerboard(sr_eo,ssrc_e); | ||||||
|  |  | ||||||
|  |       sr_e = zero; | ||||||
|  |       sr_o = zero; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|  |       std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl; | ||||||
|  |       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|  |       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|  |       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|  |       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|  |  | ||||||
|  |       FGrid->Barrier(); | ||||||
|  |       sDw.ZeroCounters(); | ||||||
|  |       sDw.stat.init("DhopEO"); | ||||||
|  |       double t0=usecond(); | ||||||
|  |       for (int i = 0; i < ncall; i++) { | ||||||
|  |         sDw.DhopEO(ssrc_o, sr_e, DaggerNo); | ||||||
|  |       } | ||||||
|  |       double t1=usecond(); | ||||||
|  |       FGrid->Barrier(); | ||||||
|  |       sDw.stat.print(); | ||||||
|  |  | ||||||
|  |       double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||||
|  |       double flops=(1344.0*volume*ncall)/2; | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |       sDw.Report(); | ||||||
|  |  | ||||||
|  |       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||||
|  |       sDw.DhopOE(ssrc_e,sr_o,DaggerNo); | ||||||
|  |       sDw.Dhop  (ssrc  ,sresult,DaggerNo); | ||||||
|  |  | ||||||
|  |       pickCheckerboard(Even,ssrc_e,sresult); | ||||||
|  |       pickCheckerboard(Odd ,ssrc_o,sresult); | ||||||
|  |       ssrc_e = ssrc_e - sr_e; | ||||||
|  |       RealD error = norm2(ssrc_e); | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<< "  vec nrm"<<norm2(sr_e) <<std::endl; | ||||||
|  |       ssrc_o = ssrc_o - sr_o; | ||||||
|  |  | ||||||
|  |       error+= norm2(ssrc_o); | ||||||
|  |       std::cout<<GridLogMessage << "sO norm diff   "<< norm2(ssrc_o)<< "  vec nrm"<<norm2(sr_o) <<std::endl; | ||||||
|  |       if(error>1.0e-4) {  | ||||||
|  | 	setCheckerboard(ssrc,ssrc_o); | ||||||
|  | 	setCheckerboard(ssrc,ssrc_e); | ||||||
|  | 	std::cout<< ssrc << std::endl; | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { // Naive wilson dag implementation |   { // Naive wilson dag implementation | ||||||
| @@ -418,19 +335,14 @@ int main (int argc, char ** argv) | |||||||
|     } |     } | ||||||
|     ref = -0.5*ref; |     ref = -0.5*ref; | ||||||
|   } |   } | ||||||
|   //  dump=1; |  | ||||||
|   Dw.Dhop(src,result,1); |   Dw.Dhop(src,result,1); | ||||||
|   std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl; |   std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl; | ||||||
|   std::cout<<GridLogMessage << "Called DwDag"<<std::endl; |   std::cout<<GridLogMessage << "Called DwDag"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl; |   std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "norm dag ref    "<< norm2(ref)<<std::endl; |   std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||||
|   err = ref-result;  |   err = ref-result;  | ||||||
|   std::cout<<GridLogMessage << "norm dag diff   "<< norm2(err)<<std::endl; |   std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|   if((norm2(err)>1.0e-4)){ |   assert(norm2(err)<1.0e-4); | ||||||
| 	std::cout<< "DAG RESULT\n "  <<ref     << std::endl; |  | ||||||
| 	std::cout<< "DAG sRESULT\n " <<result  << std::endl; |  | ||||||
| 	std::cout<< "DAG ERR   \n "  << err    <<std::endl; |  | ||||||
|   } |  | ||||||
|   LatticeFermion src_e (FrbGrid); |   LatticeFermion src_e (FrbGrid); | ||||||
|   LatticeFermion src_o (FrbGrid); |   LatticeFermion src_o (FrbGrid); | ||||||
|   LatticeFermion r_e   (FrbGrid); |   LatticeFermion r_e   (FrbGrid); | ||||||
| @@ -438,24 +350,18 @@ int main (int argc, char ** argv) | |||||||
|   LatticeFermion r_eo  (FGrid); |   LatticeFermion r_eo  (FGrid); | ||||||
|  |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl; |   std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl; | ||||||
|   pickCheckerboard(Even,src_e,src); |   pickCheckerboard(Even,src_e,src); | ||||||
|   pickCheckerboard(Odd,src_o,src); |   pickCheckerboard(Odd,src_o,src); | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl; |   std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl; |   std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|   // S-direction is INNERMOST and takes no part in the parity. |  | ||||||
|   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; |   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -463,7 +369,6 @@ int main (int argc, char ** argv) | |||||||
|   { |   { | ||||||
|     Dw.ZeroCounters(); |     Dw.ZeroCounters(); | ||||||
|     FGrid->Barrier(); |     FGrid->Barrier(); | ||||||
|     Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
|     double t0=usecond(); |     double t0=usecond(); | ||||||
|     for(int i=0;i<ncall;i++){ |     for(int i=0;i<ncall;i++){ | ||||||
|       Dw.DhopEO(src_o,r_e,DaggerNo); |       Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
| @@ -476,7 +381,6 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|   Dw.DhopEO(src_o,r_e,DaggerNo); |   Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
| @@ -492,20 +396,14 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|   err = r_eo-result;  |   err = r_eo-result;  | ||||||
|   std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |   std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|   if((norm2(err)>1.0e-4)){ |   assert(norm2(err)<1.0e-4); | ||||||
| 	std::cout<< "Deo RESULT\n " <<r_eo << std::endl; |  | ||||||
| 	std::cout<< "Deo REF\n " <<result  << std::endl; |  | ||||||
| 	std::cout<< "Deo ERR   \n " << err <<std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   pickCheckerboard(Even,src_e,err); |   pickCheckerboard(Even,src_e,err); | ||||||
|   pickCheckerboard(Odd,src_o,err); |   pickCheckerboard(Odd,src_o,err); | ||||||
|   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; |   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; |   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; | ||||||
|  |  | ||||||
|   assert(norm2(src_e)<1.0e-4); |   assert(norm2(src_e)<1.0e-4); | ||||||
|   assert(norm2(src_o)<1.0e-4); |   assert(norm2(src_o)<1.0e-4); | ||||||
|   Grid_finalize(); |  | ||||||
|   exit(0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -66,8 +66,7 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     Vec tsum; tsum = zero; |     Vec tsum; tsum = zero; | ||||||
|  |  | ||||||
|     GridParallelRNG          pRNG(&Grid);       |     GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|     pRNG.SeedFixedIntegers(std::vector<int>({56,17,89,101})); |  | ||||||
|  |  | ||||||
|     std::vector<double> stop(threads); |     std::vector<double> stop(threads); | ||||||
|     Vector<Vec> sum(threads); |     Vector<Vec> sum(threads); | ||||||
| @@ -78,7 +77,8 @@ int main (int argc, char ** argv) | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     double start=usecond(); |     double start=usecond(); | ||||||
|     parallel_for(int t=0;t<threads;t++){ | PARALLEL_FOR_LOOP | ||||||
|  |     for(int t=0;t<threads;t++){ | ||||||
|  |  | ||||||
|       sum[t] = x[t]._odata[0]; |       sum[t] = x[t]._odata[0]; | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|   | |||||||
| @@ -55,17 +55,17 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|   uint64_t lmax=96; |   uint64_t lmax=44; | ||||||
| #define NLOOP (10*lmax*lmax*lmax*lmax/vol) | #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
| @@ -83,7 +83,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |       double bytes=3*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -94,13 +94,13 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|    |    | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
| @@ -119,7 +119,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|       |       | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |       double bytes=3*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -129,16 +129,16 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
| @@ -154,7 +154,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=2.0*vol*Nvec*sizeof(Real); |       double bytes=2*vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*1;// mul |       double flops=vol*Nvec*1;// mul | ||||||
|       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
| @@ -166,14 +166,14 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid); //random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
| @@ -187,7 +187,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=1.0*vol*Nvec*sizeof(Real); |       double bytes=vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,134 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_staggered.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Grid::QCD; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   std::vector<int> latt_size   = GridDefaultLatt(); |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |  | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|   GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout); |  | ||||||
|  |  | ||||||
|   int threads = GridThread::GetThreads(); |  | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl; |  | ||||||
|  |  | ||||||
|   std::vector<int> seeds({1,2,3,4}); |  | ||||||
|   GridParallelRNG          pRNG(&Grid); |  | ||||||
|   pRNG.SeedFixedIntegers(seeds); |  | ||||||
|   //  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |  | ||||||
|  |  | ||||||
|   typedef typename ImprovedStaggeredFermionR::FermionField FermionField;  |  | ||||||
|   typename ImprovedStaggeredFermionR::ImplParams params;  |  | ||||||
|  |  | ||||||
|   FermionField src   (&Grid); random(pRNG,src); |  | ||||||
|   FermionField result(&Grid); result=zero; |  | ||||||
|   FermionField    ref(&Grid);    ref=zero; |  | ||||||
|   FermionField    tmp(&Grid);    tmp=zero; |  | ||||||
|   FermionField    err(&Grid);    tmp=zero; |  | ||||||
|   LatticeGaugeField Umu(&Grid); random(pRNG,Umu); |  | ||||||
|   std::vector<LatticeColourMatrix> U(4,&Grid); |  | ||||||
|  |  | ||||||
|   double volume=1; |  | ||||||
|   for(int mu=0;mu<Nd;mu++){ |  | ||||||
|     volume=volume*latt_size[mu]; |  | ||||||
|   }   |  | ||||||
|  |  | ||||||
|   // Only one non-zero (y) |  | ||||||
| #if 0 |  | ||||||
|   Umu=zero; |  | ||||||
|   Complex cone(1.0,0.0); |  | ||||||
|   for(int nn=0;nn<Nd;nn++){ |  | ||||||
|     random(pRNG,U[nn]); |  | ||||||
|     if(1) { |  | ||||||
|       if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; } |  | ||||||
|       //      else       { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; } |  | ||||||
|       else       { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; } |  | ||||||
|     } |  | ||||||
|     PokeIndex<LorentzIndex>(Umu,U[nn],nn); |  | ||||||
|   } |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
|   for(int mu=0;mu<Nd;mu++){ |  | ||||||
|     U[mu] = PeekIndex<LorentzIndex>(Umu,mu); |  | ||||||
|   } |  | ||||||
|   ref = zero; |  | ||||||
|   /*   |  | ||||||
|   { // Naive wilson implementation |  | ||||||
|     ref = zero; |  | ||||||
|     for(int mu=0;mu<Nd;mu++){ |  | ||||||
|       //    ref =  src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x |  | ||||||
|       tmp = U[mu]*Cshift(src,mu,1); |  | ||||||
|       for(int i=0;i<ref._odata.size();i++){ |  | ||||||
| 	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       tmp =adj(U[mu])*src; |  | ||||||
|       tmp =Cshift(tmp,mu,-1); |  | ||||||
|       for(int i=0;i<ref._odata.size();i++){ |  | ||||||
| 	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ref = -0.5*ref; |  | ||||||
|   */ |  | ||||||
|  |  | ||||||
|   RealD mass=0.1; |  | ||||||
|   RealD c1=9.0/8.0; |  | ||||||
|   RealD c2=-1.0/24.0; |  | ||||||
|   RealD u0=1.0; |  | ||||||
|   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params); |  | ||||||
|    |  | ||||||
|   std::cout<<GridLogMessage << "Calling Ds"<<std::endl; |  | ||||||
|   int ncall=1000; |  | ||||||
|   double t0=usecond(); |  | ||||||
|   for(int i=0;i<ncall;i++){ |  | ||||||
|     Ds.Dhop(src,result,0); |  | ||||||
|   } |  | ||||||
|   double t1=usecond(); |  | ||||||
|   double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 +  == 1146 |  | ||||||
|    |  | ||||||
|   std::cout<<GridLogMessage << "Called Ds"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |  | ||||||
|   err = ref-result;  |  | ||||||
|   std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
| } |  | ||||||
| @@ -35,14 +35,13 @@ using namespace Grid::QCD; | |||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| #define LMAX (64) |  | ||||||
|  |  | ||||||
|   int64_t Nloop=20; |   int Nloop=1000; | ||||||
|  |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |  | ||||||
|   int64_t threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
| @@ -51,19 +50,19 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid);// random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid);// random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid);// random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	x=x*y; | 	x=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -83,20 +82,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	z=x*y; | 	z=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -114,20 +113,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	mult(z,x,y); | 	mult(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -145,20 +144,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	mac(z,x,y); | 	mac(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|   | |||||||
| @@ -69,7 +69,7 @@ int main (int argc, char ** argv) | |||||||
|   std::vector<int> seeds({1,2,3,4}); |   std::vector<int> seeds({1,2,3,4}); | ||||||
|   GridParallelRNG          pRNG(&Grid); |   GridParallelRNG          pRNG(&Grid); | ||||||
|   pRNG.SeedFixedIntegers(seeds); |   pRNG.SeedFixedIntegers(seeds); | ||||||
|   //  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |   //  pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|   LatticeFermion src   (&Grid); random(pRNG,src); |   LatticeFermion src   (&Grid); random(pRNG,src); | ||||||
|   LatticeFermion result(&Grid); result=zero; |   LatticeFermion result(&Grid); result=zero; | ||||||
|   | |||||||
| @@ -1,7 +1,11 @@ | |||||||
| include Make.inc | include Make.inc | ||||||
|  |  | ||||||
| bench-local: all | simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o | ||||||
| 	./Benchmark_su3 |  | ||||||
| 	./Benchmark_memory_bandwidth | EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a | ||||||
| 	./Benchmark_wilson |  | ||||||
| 	./Benchmark_dwf --dslash-unroll | libsimple_su3_test_a_SOURCES = simple_su3_test.cc | ||||||
|  |  | ||||||
|  | libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc | ||||||
|  |  | ||||||
|  | libsimple_simd_test_a_SOURCES = simple_simd_test.cc | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| #!/usr/bin/env bash | #!/usr/bin/env bash | ||||||
|  |  | ||||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' | ||||||
|  |  | ||||||
| echo "-- deploying Eigen source..." | echo "-- deploying Eigen source..." | ||||||
| wget ${EIGEN_URL} --no-check-certificate | wget ${EIGEN_URL} --no-check-certificate | ||||||
|   | |||||||
							
								
								
									
										185
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										185
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -1,23 +1,16 @@ | |||||||
| AC_PREREQ([2.63]) | AC_PREREQ([2.63]) | ||||||
| AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) | AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) | ||||||
| AC_CANONICAL_BUILD | AC_CANONICAL_BUILD | ||||||
| AC_CANONICAL_HOST | AC_CANONICAL_HOST | ||||||
| AC_CANONICAL_TARGET | AC_CANONICAL_TARGET | ||||||
| AM_INIT_AUTOMAKE([subdir-objects 1.13]) | AM_INIT_AUTOMAKE(subdir-objects) | ||||||
| AM_EXTRA_RECURSIVE_TARGETS([tests bench]) |  | ||||||
| AC_CONFIG_MACRO_DIR([m4]) | AC_CONFIG_MACRO_DIR([m4]) | ||||||
| AC_CONFIG_SRCDIR([lib/Grid.h]) | AC_CONFIG_SRCDIR([lib/Grid.h]) | ||||||
| AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | ||||||
| m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||||
|  |  | ||||||
| ################ Get git info |  | ||||||
| #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) |  | ||||||
|  |  | ||||||
| ################ Set flags |  | ||||||
| # do not move! |  | ||||||
| CXXFLAGS="-O3 $CXXFLAGS" |  | ||||||
|  |  | ||||||
| ############### Checks for programs | ############### Checks for programs | ||||||
|  | CXXFLAGS="-O3 $CXXFLAGS" | ||||||
| AC_PROG_CXX | AC_PROG_CXX | ||||||
| AC_PROG_RANLIB | AC_PROG_RANLIB | ||||||
|  |  | ||||||
| @@ -31,8 +24,6 @@ AX_GXX_VERSION | |||||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||||
|       [version of g++ that will compile the code]) |       [version of g++ that will compile the code]) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ############### Checks for typedefs, structures, and compiler characteristics | ############### Checks for typedefs, structures, and compiler characteristics | ||||||
| AC_TYPE_SIZE_T | AC_TYPE_SIZE_T | ||||||
| AC_TYPE_UINT32_T | AC_TYPE_UINT32_T | ||||||
| @@ -54,14 +45,9 @@ AC_CHECK_HEADERS(malloc/malloc.h) | |||||||
| AC_CHECK_HEADERS(malloc.h) | AC_CHECK_HEADERS(malloc.h) | ||||||
| AC_CHECK_HEADERS(endian.h) | AC_CHECK_HEADERS(endian.h) | ||||||
| AC_CHECK_HEADERS(execinfo.h) | AC_CHECK_HEADERS(execinfo.h) | ||||||
| AC_CHECK_HEADERS(numaif.h) |  | ||||||
| AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | ||||||
| AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | ||||||
|  |  | ||||||
| ############## Standard libraries |  | ||||||
| AC_CHECK_LIB([m],[cos]) |  | ||||||
| AC_CHECK_LIB([stdc++],[abort]) |  | ||||||
|  |  | ||||||
| ############### GMP and MPFR | ############### GMP and MPFR | ||||||
| AC_ARG_WITH([gmp], | AC_ARG_WITH([gmp], | ||||||
|     [AS_HELP_STRING([--with-gmp=prefix], |     [AS_HELP_STRING([--with-gmp=prefix], | ||||||
| @@ -81,13 +67,6 @@ AC_ARG_WITH([fftw], | |||||||
|             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] |             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] | ||||||
|             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) |             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
| ############### LIME |  | ||||||
| AC_ARG_WITH([lime], |  | ||||||
|             [AS_HELP_STRING([--with-lime=prefix], |  | ||||||
|             [try this for a non-standard install prefix of the LIME library])], |  | ||||||
|             [AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"] |  | ||||||
|             [AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"]) |  | ||||||
|  |  | ||||||
| ############### lapack  | ############### lapack  | ||||||
| AC_ARG_ENABLE([lapack], | AC_ARG_ENABLE([lapack], | ||||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  |     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  | ||||||
| @@ -104,18 +83,6 @@ case ${ac_LAPACK} in | |||||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; |         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| ############### FP16 conversions |  | ||||||
| AC_ARG_ENABLE([sfw-fp16], |  | ||||||
|     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], |  | ||||||
|     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) |  | ||||||
| case ${ac_SFW_FP16} in |  | ||||||
|     yes) |  | ||||||
|       AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);; |  | ||||||
|     no);; |  | ||||||
|     *) |  | ||||||
|       AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);; |  | ||||||
| esac |  | ||||||
|  |  | ||||||
| ############### MKL | ############### MKL | ||||||
| AC_ARG_ENABLE([mkl], | AC_ARG_ENABLE([mkl], | ||||||
|     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], |     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], | ||||||
| @@ -185,23 +152,6 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3], | |||||||
|                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] |                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] | ||||||
|                [have_fftw=true]) |                [have_fftw=true]) | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([limeCreateReader], [lime], |  | ||||||
|                [AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])] |  | ||||||
|                [have_lime=true], |  | ||||||
| 	       [AC_MSG_WARN(C-LIME library was not found in your system. |  | ||||||
| In order to use ILGG file format please install or provide the correct path to your installation |  | ||||||
| Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([crc32], [z], |  | ||||||
|                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] |  | ||||||
|                [have_zlib=true] [LIBS="${LIBS} -lz"], |  | ||||||
| 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([move_pages], [numa], |  | ||||||
|                [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] |  | ||||||
|                [have_libnuma=true] [LIBS="${LIBS} -lnuma"], |  | ||||||
| 	       [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] |                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||||
|                [have_hdf5=true] |                [have_hdf5=true] | ||||||
| @@ -226,26 +176,19 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|     case ${ac_SIMD} in |     case ${ac_SIMD} in | ||||||
|       SSE4) |       SSE4) | ||||||
|         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) |         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) | ||||||
| 	case ${ac_SFW_FP16} in |  | ||||||
| 	  yes) |  | ||||||
|         SIMD_FLAGS='-msse4.2';; |         SIMD_FLAGS='-msse4.2';; | ||||||
| 	  no) |  | ||||||
| 	  SIMD_FLAGS='-msse4.2 -mf16c';; |  | ||||||
| 	  *) |  | ||||||
|           AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);; |  | ||||||
| 	esac;; |  | ||||||
|       AVX) |       AVX) | ||||||
|         AC_DEFINE([AVX1],[1],[AVX intrinsics]) |         AC_DEFINE([AVX1],[1],[AVX intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx -mf16c';; |         SIMD_FLAGS='-mavx';; | ||||||
|       AVXFMA4) |       AVXFMA4) | ||||||
|         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) |         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) | ||||||
|         SIMD_FLAGS='-mavx -mfma4 -mf16c';; |         SIMD_FLAGS='-mavx -mfma4';; | ||||||
|       AVXFMA) |       AVXFMA) | ||||||
|         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) |         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) | ||||||
|         SIMD_FLAGS='-mavx -mfma -mf16c';; |         SIMD_FLAGS='-mavx -mfma';; | ||||||
|       AVX2) |       AVX2) | ||||||
|         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) |         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx2 -mfma -mf16c';; |         SIMD_FLAGS='-mavx2 -mfma';; | ||||||
|       AVX512) |       AVX512) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; |         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; | ||||||
| @@ -254,7 +197,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) |  | ||||||
|         SIMD_FLAGS='-march=knl';; |         SIMD_FLAGS='-march=knl';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -262,9 +204,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|                            [generic SIMD vector width (in bytes)]) |                            [generic SIMD vector width (in bytes)]) | ||||||
|         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" |         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       NEONv8) |  | ||||||
|         AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) |  | ||||||
|         SIMD_FLAGS='-march=armv8-a';; |  | ||||||
|       QPX|BGQ) |       QPX|BGQ) | ||||||
|         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) |         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
| @@ -293,7 +232,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) | ||||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) |  | ||||||
|         SIMD_FLAGS='-xmic-avx512';; |         SIMD_FLAGS='-xmic-avx512';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -331,41 +269,8 @@ case ${ac_PRECISION} in | |||||||
|      double) |      double) | ||||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) |        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) | ||||||
|      ;; |      ;; | ||||||
|      *) |  | ||||||
|      AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]); |  | ||||||
|      ;; |  | ||||||
| esac | esac | ||||||
|  |  | ||||||
| ######################  Shared memory allocation technique under MPI3 |  | ||||||
| AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], |  | ||||||
|               [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) |  | ||||||
|  |  | ||||||
| case ${ac_SHM} in |  | ||||||
|  |  | ||||||
|      shmget) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      shmopen) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      hugetlbfs) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      *) |  | ||||||
|      AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]); |  | ||||||
|      ;; |  | ||||||
| esac |  | ||||||
|  |  | ||||||
| ######################  Shared base path for SHMMMAP |  | ||||||
| AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], |  | ||||||
|               [Select SHM mmap base path for hugetlbfs])], |  | ||||||
| 	      [ac_SHMPATH=${enable_shmpath}], |  | ||||||
| 	      [ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/]) |  | ||||||
| AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) |  | ||||||
|  |  | ||||||
| ############### communication type selection | ############### communication type selection | ||||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | ||||||
|               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) |               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||||
| @@ -375,14 +280,14 @@ case ${ac_COMMS} in | |||||||
|         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) |         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) | ||||||
|         comms_type='none' |         comms_type='none' | ||||||
|      ;; |      ;; | ||||||
|  |      mpi3l*) | ||||||
|  |        AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] ) | ||||||
|  |        comms_type='mpi3l' | ||||||
|  |      ;; | ||||||
|      mpi3*) |      mpi3*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) |         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) | ||||||
|         comms_type='mpi3' |         comms_type='mpi3' | ||||||
|      ;; |      ;; | ||||||
|      mpit) |  | ||||||
|         AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) |  | ||||||
|         comms_type='mpit' |  | ||||||
|      ;; |  | ||||||
|      mpi*) |      mpi*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) |         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||||
|         comms_type='mpi' |         comms_type='mpi' | ||||||
| @@ -410,13 +315,13 @@ esac | |||||||
| AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPIT,  [ test "${comms_type}X" == "mpitX" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | ||||||
|  |  | ||||||
| ############### RNG selection | ############### RNG selection | ||||||
| AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ | AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ | ||||||
| 	            [Select Random Number Generator to be used])],\ | 	            [Select Random Number Generator to be used])],\ | ||||||
| 	            [ac_RNG=${enable_rng}],[ac_RNG=sitmo]) | 	            [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) | ||||||
|  |  | ||||||
| case ${ac_RNG} in | case ${ac_RNG} in | ||||||
|      ranlux48) |      ranlux48) | ||||||
| @@ -479,31 +384,32 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg]) | |||||||
|  |  | ||||||
| ############### Ouput | ############### Ouput | ||||||
| cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | ||||||
| GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" |  | ||||||
| GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS" |  | ||||||
| GRID_LIBS=$LIBS |  | ||||||
| GRID_SHORT_SHA=`git rev-parse --short HEAD` |  | ||||||
| GRID_SHA=`git rev-parse HEAD` |  | ||||||
| GRID_BRANCH=`git rev-parse --abbrev-ref HEAD` |  | ||||||
| AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | ||||||
| AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | ||||||
| AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | ||||||
| AC_SUBST([AM_CFLAGS]) | AC_SUBST([AM_CFLAGS]) | ||||||
| AC_SUBST([AM_CXXFLAGS]) | AC_SUBST([AM_CXXFLAGS]) | ||||||
| AC_SUBST([AM_LDFLAGS]) | AC_SUBST([AM_LDFLAGS]) | ||||||
| AC_SUBST([GRID_CXXFLAGS]) | AC_CONFIG_FILES(Makefile) | ||||||
| AC_SUBST([GRID_LDFLAGS]) | AC_CONFIG_FILES(lib/Makefile) | ||||||
| AC_SUBST([GRID_LIBS]) | AC_CONFIG_FILES(tests/Makefile) | ||||||
| AC_SUBST([GRID_SHA]) | AC_CONFIG_FILES(tests/IO/Makefile) | ||||||
| AC_SUBST([GRID_BRANCH]) | AC_CONFIG_FILES(tests/core/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/debug/Makefile) | ||||||
| git_commit=`cd $srcdir && ./scripts/configure.commit` | AC_CONFIG_FILES(tests/forces/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/hadrons/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/hmc/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/solver/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||||
|  | AC_CONFIG_FILES(benchmarks/Makefile) | ||||||
|  | AC_CONFIG_FILES(extras/Makefile) | ||||||
|  | AC_CONFIG_FILES(extras/Hadrons/Makefile) | ||||||
|  | AC_OUTPUT | ||||||
|  |  | ||||||
| echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
| Summary of configuration for $PACKAGE v$VERSION | Summary of configuration for $PACKAGE v$VERSION | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
| ----- GIT VERSION ------------------------------------- |  | ||||||
| $git_commit |  | ||||||
| ----- PLATFORM ---------------------------------------- | ----- PLATFORM ---------------------------------------- | ||||||
| architecture (build)        : $build_cpu | architecture (build)        : $build_cpu | ||||||
| os (build)                  : $build_os | os (build)                  : $build_os | ||||||
| @@ -515,15 +421,11 @@ compiler version            : ${ax_cv_gxx_version} | |||||||
| SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | ||||||
| Threading                   : ${ac_openmp}  | Threading                   : ${ac_openmp}  | ||||||
| Communications type         : ${comms_type} | Communications type         : ${comms_type} | ||||||
| Shared memory allocator     : ${ac_SHM} |  | ||||||
| Shared memory mmap path     : ${ac_SHMPATH} |  | ||||||
| Default precision           : ${ac_PRECISION} | Default precision           : ${ac_PRECISION} | ||||||
| Software FP16 conversion    : ${ac_SFW_FP16} |  | ||||||
| RNG choice                  : ${ac_RNG}  | RNG choice                  : ${ac_RNG}  | ||||||
| GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||||
| LAPACK                      : ${ac_LAPACK} | LAPACK                      : ${ac_LAPACK} | ||||||
| FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||||
| LIME (ILDG support)         : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | ||||||
| build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | ||||||
| ----- BUILD FLAGS ------------------------------------- | ----- BUILD FLAGS ------------------------------------- | ||||||
| @@ -533,32 +435,7 @@ LDFLAGS: | |||||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
| LIBS: | LIBS: | ||||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
| -------------------------------------------------------" > grid.configure.summary | -------------------------------------------------------" > config.summary | ||||||
|  |  | ||||||
| GRID_SUMMARY="`cat grid.configure.summary`" |  | ||||||
| AM_SUBST_NOTMAKE([GRID_SUMMARY]) |  | ||||||
| AC_SUBST([GRID_SUMMARY]) |  | ||||||
|  |  | ||||||
| AC_CONFIG_FILES([grid-config], [chmod +x grid-config]) |  | ||||||
| AC_CONFIG_FILES(Makefile) |  | ||||||
| AC_CONFIG_FILES(lib/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/IO/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/core/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/debug/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/forces/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/hadrons/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/hmc/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/solver/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/smearing/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/testu01/Makefile) |  | ||||||
| AC_CONFIG_FILES(benchmarks/Makefile) |  | ||||||
| AC_CONFIG_FILES(extras/Makefile) |  | ||||||
| AC_CONFIG_FILES(extras/Hadrons/Makefile) |  | ||||||
| AC_OUTPUT |  | ||||||
|  |  | ||||||
| echo "" | echo "" | ||||||
| cat grid.configure.summary | cat config.summary | ||||||
| echo "" | echo "" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -162,8 +162,7 @@ void Application::saveParameterFile(const std::string parameterFileName) | |||||||
| sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | ||||||
|  |  | ||||||
| #define DEFINE_MEMPEAK \ | #define DEFINE_MEMPEAK \ | ||||||
| GeneticScheduler<unsigned int>::ObjFunc memPeak = \ | auto memPeak = [this](const std::vector<unsigned int> &program)\ | ||||||
| [this](const std::vector<unsigned int> &program)\ |  | ||||||
| {\ | {\ | ||||||
|     unsigned int memPeak;\ |     unsigned int memPeak;\ | ||||||
|     bool         msg;\ |     bool         msg;\ | ||||||
|   | |||||||
| @@ -41,10 +41,9 @@ using namespace Hadrons; | |||||||
| // constructor ///////////////////////////////////////////////////////////////// | // constructor ///////////////////////////////////////////////////////////////// | ||||||
| Environment::Environment(void) | Environment::Environment(void) | ||||||
| { | { | ||||||
|     dim_ = GridDefaultLatt(); |     nd_ = GridDefaultLatt().size(); | ||||||
|     nd_  = dim_.size(); |  | ||||||
|     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( |     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( | ||||||
|         dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), |         GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||||
|         GridDefaultMpi())); |         GridDefaultMpi())); | ||||||
|     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); |     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); | ||||||
|     auto loc = getGrid()->LocalDimensions(); |     auto loc = getGrid()->LocalDimensions(); | ||||||
| @@ -133,16 +132,6 @@ unsigned int Environment::getNd(void) const | |||||||
|     return nd_; |     return nd_; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::vector<int> Environment::getDim(void) const |  | ||||||
| { |  | ||||||
|     return dim_; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int Environment::getDim(const unsigned int mu) const |  | ||||||
| { |  | ||||||
|     return dim_[mu]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // random number generator ///////////////////////////////////////////////////// | // random number generator ///////////////////////////////////////////////////// | ||||||
| void Environment::setSeed(const std::vector<int> &seed) | void Environment::setSeed(const std::vector<int> &seed) | ||||||
| { | { | ||||||
| @@ -282,21 +271,6 @@ std::string Environment::getModuleType(const std::string name) const | |||||||
|     return getModuleType(getModuleAddress(name)); |     return getModuleType(getModuleAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string Environment::getModuleNamespace(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     std::string type = getModuleType(address), ns; |  | ||||||
|      |  | ||||||
|     auto pos2 = type.rfind("::"); |  | ||||||
|     auto pos1 = type.rfind("::", pos2 - 2); |  | ||||||
|      |  | ||||||
|     return type.substr(pos1 + 2, pos2 - pos1 - 2); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::string Environment::getModuleNamespace(const std::string name) const |  | ||||||
| { |  | ||||||
|     return getModuleNamespace(getModuleAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Environment::hasModule(const unsigned int address) const | bool Environment::hasModule(const unsigned int address) const | ||||||
| { | { | ||||||
|     return (address < module_.size()); |     return (address < module_.size()); | ||||||
| @@ -517,16 +491,9 @@ std::string Environment::getObjectName(const unsigned int address) const | |||||||
| std::string Environment::getObjectType(const unsigned int address) const | std::string Environment::getObjectType(const unsigned int address) const | ||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|     { |  | ||||||
|         if (object_[address].type) |  | ||||||
|     { |     { | ||||||
|         return typeName(object_[address].type); |         return typeName(object_[address].type); | ||||||
|     } |     } | ||||||
|         else |  | ||||||
|         { |  | ||||||
|             return "<no type>"; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else if (hasObject(address)) |     else if (hasObject(address)) | ||||||
|     { |     { | ||||||
|         HADRON_ERROR("object with address " + std::to_string(address) |         HADRON_ERROR("object with address " + std::to_string(address) | ||||||
| @@ -565,23 +532,6 @@ Environment::Size Environment::getObjectSize(const std::string name) const | |||||||
|     return getObjectSize(getObjectAddress(name)); |     return getObjectSize(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectModule(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     if (hasObject(address)) |  | ||||||
|     { |  | ||||||
|         return object_[address].module; |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectModule(const std::string name) const |  | ||||||
| { |  | ||||||
|     return getObjectModule(getObjectAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectLs(const unsigned int address) const | unsigned int Environment::getObjectLs(const unsigned int address) const | ||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|   | |||||||
| @@ -106,8 +106,6 @@ public: | |||||||
|     void                    createGrid(const unsigned int Ls); |     void                    createGrid(const unsigned int Ls); | ||||||
|     GridCartesian *         getGrid(const unsigned int Ls = 1) const; |     GridCartesian *         getGrid(const unsigned int Ls = 1) const; | ||||||
|     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; |     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; | ||||||
|     std::vector<int>        getDim(void) const; |  | ||||||
|     int                     getDim(const unsigned int mu) const; |  | ||||||
|     unsigned int            getNd(void) const; |     unsigned int            getNd(void) const; | ||||||
|     // random number generator |     // random number generator | ||||||
|     void                    setSeed(const std::vector<int> &seed); |     void                    setSeed(const std::vector<int> &seed); | ||||||
| @@ -133,8 +131,6 @@ public: | |||||||
|     std::string             getModuleName(const unsigned int address) const; |     std::string             getModuleName(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const unsigned int address) const; |     std::string             getModuleType(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const std::string name) const; |     std::string             getModuleType(const std::string name) const; | ||||||
|     std::string             getModuleNamespace(const unsigned int address) const; |  | ||||||
|     std::string             getModuleNamespace(const std::string name) const; |  | ||||||
|     bool                    hasModule(const unsigned int address) const; |     bool                    hasModule(const unsigned int address) const; | ||||||
|     bool                    hasModule(const std::string name) const; |     bool                    hasModule(const std::string name) const; | ||||||
|     Graph<unsigned int>     makeModuleGraph(void) const; |     Graph<unsigned int>     makeModuleGraph(void) const; | ||||||
| @@ -175,8 +171,6 @@ public: | |||||||
|     std::string             getObjectType(const std::string name) const; |     std::string             getObjectType(const std::string name) const; | ||||||
|     Size                    getObjectSize(const unsigned int address) const; |     Size                    getObjectSize(const unsigned int address) const; | ||||||
|     Size                    getObjectSize(const std::string name) const; |     Size                    getObjectSize(const std::string name) const; | ||||||
|     unsigned int            getObjectModule(const unsigned int address) const; |  | ||||||
|     unsigned int            getObjectModule(const std::string name) const; |  | ||||||
|     unsigned int            getObjectLs(const unsigned int address) const; |     unsigned int            getObjectLs(const unsigned int address) const; | ||||||
|     unsigned int            getObjectLs(const std::string name) const; |     unsigned int            getObjectLs(const std::string name) const; | ||||||
|     bool                    hasObject(const unsigned int address) const; |     bool                    hasObject(const unsigned int address) const; | ||||||
| @@ -187,10 +181,6 @@ public: | |||||||
|     bool                    hasCreatedObject(const std::string name) const; |     bool                    hasCreatedObject(const std::string name) const; | ||||||
|     bool                    isObject5d(const unsigned int address) const; |     bool                    isObject5d(const unsigned int address) const; | ||||||
|     bool                    isObject5d(const std::string name) const; |     bool                    isObject5d(const std::string name) const; | ||||||
|     template <typename T> |  | ||||||
|     bool                    isObjectOfType(const unsigned int address) const; |  | ||||||
|     template <typename T> |  | ||||||
|     bool                    isObjectOfType(const std::string name) const; |  | ||||||
|     Environment::Size       getTotalSize(void) const; |     Environment::Size       getTotalSize(void) const; | ||||||
|     void                    addOwnership(const unsigned int owner, |     void                    addOwnership(const unsigned int owner, | ||||||
|                                          const unsigned int property); |                                          const unsigned int property); | ||||||
| @@ -207,7 +197,6 @@ private: | |||||||
|     bool                                   dryRun_{false}; |     bool                                   dryRun_{false}; | ||||||
|     unsigned int                           traj_, locVol_; |     unsigned int                           traj_, locVol_; | ||||||
|     // grids |     // grids | ||||||
|     std::vector<int>                       dim_; |  | ||||||
|     GridPt                                 grid4d_; |     GridPt                                 grid4d_; | ||||||
|     std::map<unsigned int, GridPt>         grid5d_; |     std::map<unsigned int, GridPt>         grid5d_; | ||||||
|     GridRbPt                               gridRb4d_; |     GridRbPt                               gridRb4d_; | ||||||
| @@ -354,7 +343,7 @@ T * Environment::getObject(const unsigned int address) const | |||||||
|         else |         else | ||||||
|         { |         { | ||||||
|             HADRON_ERROR("object with address " + std::to_string(address) + |             HADRON_ERROR("object with address " + std::to_string(address) + | ||||||
|                          " does not have type '" + typeName(&typeid(T)) + |                          " does not have type '" + typeid(T).name() + | ||||||
|                          "' (has type '" + getObjectType(address) + "')"); |                          "' (has type '" + getObjectType(address) + "')"); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -391,37 +380,6 @@ T * Environment::createLattice(const std::string name) | |||||||
|     return createLattice<T>(getObjectAddress(name)); |     return createLattice<T>(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename T> |  | ||||||
| bool Environment::isObjectOfType(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     if (hasRegisteredObject(address)) |  | ||||||
|     { |  | ||||||
|         if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get())) |  | ||||||
|         { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         else |  | ||||||
|         { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else if (hasObject(address)) |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("object with address " + std::to_string(address) + |  | ||||||
|                      " exists but is not registered"); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename T> |  | ||||||
| bool Environment::isObjectOfType(const std::string name) const |  | ||||||
| { |  | ||||||
|     return isObjectOfType<T>(getObjectAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Environment_hpp_ | #endif // Hadrons_Environment_hpp_ | ||||||
|   | |||||||
| @@ -51,43 +51,23 @@ using Grid::operator<<; | |||||||
|  * error with GCC 5 (clang & GCC 6 compile fine without it). |  * error with GCC 5 (clang & GCC 6 compile fine without it). | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | // FIXME: find a way to do that in a more general fashion | ||||||
| #ifndef FIMPL | #ifndef FIMPL | ||||||
| #define FIMPL WilsonImplR | #define FIMPL WilsonImplR | ||||||
| #endif | #endif | ||||||
| #ifndef SIMPL |  | ||||||
| #define SIMPL ScalarImplCR |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| // type aliases | // type aliases | ||||||
| #define FERM_TYPE_ALIASES(FImpl, suffix)\ | #define TYPE_ALIASES(FImpl, suffix)\ | ||||||
| typedef FermionOperator<FImpl>                       FMat##suffix;             \ | typedef FermionOperator<FImpl>                       FMat##suffix;             \ | ||||||
| typedef typename FImpl::FermionField                 FermionField##suffix;     \ | typedef typename FImpl::FermionField                 FermionField##suffix;     \ | ||||||
| typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | ||||||
| typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | ||||||
| typedef std::vector<typename FImpl::SitePropagator::scalar_object>             \ | typedef typename FImpl::DoubledGaugeField            DoubledGaugeField##suffix;\ | ||||||
|                                                      SlicedPropagator##suffix; |  | ||||||
|  |  | ||||||
| #define GAUGE_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; |  | ||||||
|  |  | ||||||
| #define SCALAR_TYPE_ALIASES(SImpl, suffix)\ |  | ||||||
| typedef typename SImpl::Field ScalarField##suffix;\ |  | ||||||
| typedef typename SImpl::Field PropagatorField##suffix; |  | ||||||
|  |  | ||||||
| #define SOLVER_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| typedef std::function<void(FermionField##suffix &,                             \ | typedef std::function<void(FermionField##suffix &,                             \ | ||||||
|                       const FermionField##suffix &)> SolverFn##suffix; |                       const FermionField##suffix &)> SolverFn##suffix; | ||||||
|  |  | ||||||
| #define SINK_TYPE_ALIASES(suffix)\ |  | ||||||
| typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix; |  | ||||||
|  |  | ||||||
| #define FGS_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| FERM_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| GAUGE_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| SOLVER_TYPE_ALIASES(FImpl, suffix) |  | ||||||
|  |  | ||||||
| // logger | // logger | ||||||
| class HadronsLogger: public Logger | class HadronsLogger: public Logger | ||||||
| { | { | ||||||
| @@ -165,15 +145,6 @@ std::string typeName(void) | |||||||
|     return typeName(typeIdPt<T>()); |     return typeName(typeIdPt<T>()); | ||||||
| } | } | ||||||
|  |  | ||||||
| // default writers/readers |  | ||||||
| #ifdef HAVE_HDF5 |  | ||||||
| typedef Hdf5Reader CorrReader; |  | ||||||
| typedef Hdf5Writer CorrWriter; |  | ||||||
| #else |  | ||||||
| typedef XmlReader CorrReader; |  | ||||||
| typedef XmlWriter CorrWriter; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Global_hpp_ | #endif // Hadrons_Global_hpp_ | ||||||
|   | |||||||
| @@ -1,25 +1,40 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
| #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSink/Point.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Point.hpp> | #include <Grid/Hadrons/Modules/MSource/Point.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Wall.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/Quark.hpp> | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MAction_DWF_hpp_ | #ifndef Hadrons_DWF_hpp_ | ||||||
| #define Hadrons_MAction_DWF_hpp_ | #define Hadrons_DWF_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -48,15 +48,14 @@ public: | |||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     unsigned int, Ls, |                                     unsigned int, Ls, | ||||||
|                                     double      , mass, |                                     double      , mass, | ||||||
|                                     double      , M5, |                                     double      , M5); | ||||||
|                                     std::string , boundary); |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TDWF: public Module<DWFPar> | class TDWF: public Module<DWFPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TDWF(const std::string name); |     TDWF(const std::string name); | ||||||
| @@ -117,19 +116,14 @@ void TDWF<FImpl>::execute(void) | |||||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " |                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" |                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  |  | ||||||
|                  << std::endl; |  | ||||||
|     env().createGrid(par().Ls); |     env().createGrid(par().Ls); | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &g4     = *env().getGrid(); |     auto &g4     = *env().getGrid(); | ||||||
|     auto &grb4   = *env().getRbGrid(); |     auto &grb4   = *env().getRbGrid(); | ||||||
|     auto &g5     = *env().getGrid(par().Ls); |     auto &g5     = *env().getGrid(par().Ls); | ||||||
|     auto &grb5   = *env().getRbGrid(par().Ls); |     auto &grb5   = *env().getRbGrid(par().Ls); | ||||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); |  | ||||||
|     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); |  | ||||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, |     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||||
|                                                 par().mass, par().M5, |                                                 par().mass, par().M5); | ||||||
|                                                 implParams); |  | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -137,4 +131,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MAction_DWF_hpp_ | #endif // Hadrons_DWF_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MAction_Wilson_hpp_ | #ifndef Hadrons_Wilson_hpp_ | ||||||
| #define Hadrons_MAction_Wilson_hpp_ | #define Hadrons_Wilson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -46,15 +46,14 @@ class WilsonPar: Serializable | |||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     double     , mass, |                                     double     , mass); | ||||||
|                                     std::string, boundary); |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TWilson: public Module<WilsonPar> | class TWilson: public Module<WilsonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TWilson(const std::string name); |     TWilson(const std::string name); | ||||||
| @@ -113,15 +112,10 @@ void TWilson<FImpl>::execute() | |||||||
| { | { | ||||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass |     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; |                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  |  | ||||||
|                  << std::endl; |  | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &grid   = *env().getGrid(); |     auto &grid   = *env().getGrid(); | ||||||
|     auto &gridRb = *env().getRbGrid(); |     auto &gridRb = *env().getRbGrid(); | ||||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); |     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); | ||||||
|     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); |  | ||||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, |  | ||||||
|                                             implParams); |  | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Baryon_hpp_ | #ifndef Hadrons_Baryon_hpp_ | ||||||
| #define Hadrons_MContraction_Baryon_hpp_ | #define Hadrons_Baryon_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3> | |||||||
| class TBaryon: public Module<BaryonPar> | class TBaryon: public Module<BaryonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |     TYPE_ALIASES(FImpl1, 1); | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |     TYPE_ALIASES(FImpl2, 2); | ||||||
|     FERM_TYPE_ALIASES(FImpl3, 3); |     TYPE_ALIASES(FImpl3, 3); | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
| @@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" |                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" | ||||||
|                  << par().q3 << "'" << std::endl; |                  << par().q3 << "'" << std::endl; | ||||||
|      |      | ||||||
|     CorrWriter             writer(par().output); |     XmlWriter             writer(par().output); | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); |     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); | ||||||
| @@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|      |      | ||||||
|     // FIXME: do contractions |     // FIXME: do contractions | ||||||
|      |      | ||||||
|     // write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Baryon_hpp_ | #endif // Hadrons_Baryon_hpp_ | ||||||
|   | |||||||
| @@ -1,144 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
| #define Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                                DiscLoop                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| class DiscLoopPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar, |  | ||||||
|                                     std::string,    q_loop, |  | ||||||
|                                     Gamma::Algebra, gamma, |  | ||||||
|                                     std::string,    output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TDiscLoop: public Module<DiscLoopPar> |  | ||||||
| { |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
|     class Result: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |  | ||||||
|                                         Gamma::Algebra, gamma, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TDiscLoop(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TDiscLoop(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       TDiscLoop implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TDiscLoop<FImpl>::TDiscLoop(const std::string name) |  | ||||||
| : Module<DiscLoopPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TDiscLoop<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q_loop}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TDiscLoop<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TDiscLoop<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|      |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TDiscLoop<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing disconnected loop contraction '" << getName()  |  | ||||||
|                  << "' using '" << par().q_loop << "' with " << par().gamma  |  | ||||||
|                  << " insertion." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter            writer(par().output); |  | ||||||
|     PropagatorField       &q_loop = *env().template getObject<PropagatorField>(par().q_loop); |  | ||||||
|     LatticeComplex        c(env().getGrid()); |  | ||||||
|     Gamma                 gamma(par().gamma); |  | ||||||
|     std::vector<TComplex> buf; |  | ||||||
|     Result                result; |  | ||||||
|  |  | ||||||
|     c = trace(gamma*q_loop); |  | ||||||
|     sliceSum(c, buf, Tp); |  | ||||||
|  |  | ||||||
|     result.gamma = par().gamma; |  | ||||||
|     result.corr.resize(buf.size()); |  | ||||||
|     for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|     { |  | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     write(writer, "disc", result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
| @@ -1,170 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
| #define Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * 3pt contraction with gamma matrix insertion. |  | ||||||
|  * |  | ||||||
|  * Schematic: |  | ||||||
|  * |  | ||||||
|  *             q2           q3 |  | ||||||
|  *        /----<------*------<----¬ |  | ||||||
|  *       /          gamma          \ |  | ||||||
|  *      /                           \ |  | ||||||
|  *   i *                            * f |  | ||||||
|  *      \                          / |  | ||||||
|  *       \                        / |  | ||||||
|  *        \----------->----------/ |  | ||||||
|  *                   q1 |  | ||||||
|  * |  | ||||||
|  *      trace(g5*q1*adj(q2)*g5*gamma*q3) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                               Gamma3pt                                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| class Gamma3ptPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar, |  | ||||||
|                                     std::string,    q1, |  | ||||||
|                                     std::string,    q2, |  | ||||||
|                                     std::string,    q3, |  | ||||||
|                                     Gamma::Algebra, gamma, |  | ||||||
|                                     std::string,    output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| class TGamma3pt: public Module<Gamma3ptPar> |  | ||||||
| { |  | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |  | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |  | ||||||
|     FERM_TYPE_ALIASES(FImpl3, 3); |  | ||||||
|     class Result: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |  | ||||||
|                                         Gamma::Algebra, gamma, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TGamma3pt(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TGamma3pt(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       TGamma3pt implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name) |  | ||||||
| : Module<Gamma3ptPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void) |  | ||||||
| { |  | ||||||
|      |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing 3pt contractions '" << getName() << "' using" |  | ||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "' and '" |  | ||||||
|                  << par().q3 << "', with " << par().gamma << " insertion."  |  | ||||||
|                  << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter            writer(par().output); |  | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |  | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |  | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q3); |  | ||||||
|     LatticeComplex        c(env().getGrid()); |  | ||||||
|     Gamma                 g5(Gamma::Algebra::Gamma5); |  | ||||||
|     Gamma                 gamma(par().gamma); |  | ||||||
|     std::vector<TComplex> buf; |  | ||||||
|     Result                result; |  | ||||||
|  |  | ||||||
|     c = trace(g5*q1*adj(q2)*(g5*gamma)*q3); |  | ||||||
|     sliceSum(c, buf, Tp); |  | ||||||
|  |  | ||||||
|     result.gamma = par().gamma; |  | ||||||
|     result.corr.resize(buf.size()); |  | ||||||
|     for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|     { |  | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     write(writer, "gamma3pt", result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
| @@ -6,10 +6,8 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|         Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -29,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Meson_hpp_ | #ifndef Hadrons_Meson_hpp_ | ||||||
| #define Hadrons_MContraction_Meson_hpp_ | #define Hadrons_Meson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -38,56 +36,32 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Meson contractions |  | ||||||
|  ----------------------------- |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - q1: input propagator 1 (string) |  | ||||||
|  - q2: input propagator 2 (string) |  | ||||||
|  - gammas: gamma products to insert at sink & source, pairs of gamma matrices  |  | ||||||
|            (space-separated strings) in angled brackets (i.e. <g_sink g_src>), |  | ||||||
|            in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>"). |  | ||||||
|  |  | ||||||
|            Special values: "all" - perform all possible contractions. |  | ||||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."), |  | ||||||
|         given as multiples of (2*pi) / L. |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                                TMeson                                       * |  *                                TMeson                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
| typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair; |  | ||||||
|  |  | ||||||
| class MesonPar: Serializable | class MesonPar: Serializable | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, | ||||||
|                                     std::string,    q1, |                                     std::string,    q1, | ||||||
|                                     std::string,    q2, |                                     std::string,    q2, | ||||||
|                                     std::string, gammas, |                                     std::string,    output, | ||||||
|                                     std::string, sink, |                                     Gamma::Algebra, gammaSource, | ||||||
|                                     std::string, output); |                                     Gamma::Algebra, gammaSink); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| class TMeson: public Module<MesonPar> | class TMeson: public Module<MesonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |     TYPE_ALIASES(FImpl1, 1); | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |     TYPE_ALIASES(FImpl2, 2); | ||||||
|     FERM_TYPE_ALIASES(ScalarImplCR, Scalar); |  | ||||||
|     SINK_TYPE_ALIASES(Scalar); |  | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr); | ||||||
|                                         Gamma::Algebra, gamma_snk, |  | ||||||
|                                         Gamma::Algebra, gamma_src, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |     }; | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
| @@ -97,7 +71,6 @@ public: | |||||||
|     // dependencies/products |     // dependencies/products | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|     virtual void parseGammaString(std::vector<GammaPair> &gammaList); |  | ||||||
|     // execution |     // execution | ||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
| @@ -117,7 +90,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name) | |||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> input = {par().q1, par().q2, par().sink}; |     std::vector<std::string> input = {par().q1, par().q2}; | ||||||
|      |      | ||||||
|     return input; |     return input; | ||||||
| } | } | ||||||
| @@ -130,35 +103,7 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | |||||||
|     return output; |     return output; | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> |  | ||||||
| void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) |  | ||||||
| { |  | ||||||
|     gammaList.clear(); |  | ||||||
|     // Determine gamma matrices to insert at source/sink. |  | ||||||
|     if (par().gammas.compare("all") == 0) |  | ||||||
|     { |  | ||||||
|         // Do all contractions. |  | ||||||
|         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) |  | ||||||
|         { |  | ||||||
|             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) |  | ||||||
|             { |  | ||||||
|                 gammaList.push_back(std::make_pair((Gamma::Algebra)i,  |  | ||||||
|                                                    (Gamma::Algebra)j)); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         // Parse individual contractions from input string. |  | ||||||
|         gammaList = strToVec<GammaPair>(par().gammas); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| #define mesonConnected(q1, q2, gSnk, gSrc) \ |  | ||||||
| (g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| void TMeson<FImpl1, FImpl2>::execute(void) | void TMeson<FImpl1, FImpl2>::execute(void) | ||||||
| { | { | ||||||
| @@ -166,73 +111,21 @@ void TMeson<FImpl1, FImpl2>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" |                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|      |      | ||||||
|     CorrWriter             writer(par().output); |     XmlWriter             writer(par().output); | ||||||
|     std::vector<TComplex>  buf; |  | ||||||
|     std::vector<Result>    result; |  | ||||||
|     Gamma                  g5(Gamma::Algebra::Gamma5); |  | ||||||
|     std::vector<GammaPair> gammaList; |  | ||||||
|     int                    nt = env().getDim(Tp); |  | ||||||
|      |  | ||||||
|     parseGammaString(gammaList); |  | ||||||
|     result.resize(gammaList.size()); |  | ||||||
|     for (unsigned int i = 0; i < result.size(); ++i) |  | ||||||
|     { |  | ||||||
|         result[i].gamma_snk = gammaList[i].first; |  | ||||||
|         result[i].gamma_src = gammaList[i].second; |  | ||||||
|         result[i].corr.resize(nt); |  | ||||||
|     } |  | ||||||
|     if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and |  | ||||||
|         env().template isObjectOfType<SlicedPropagator2>(par().q2)) |  | ||||||
|     { |  | ||||||
|         SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1); |  | ||||||
|         SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "(propagator already sinked)" << std::endl; |  | ||||||
|         for (unsigned int i = 0; i < result.size(); ++i) |  | ||||||
|         { |  | ||||||
|             Gamma gSnk(gammaList[i].first); |  | ||||||
|             Gamma gSrc(gammaList[i].second); |  | ||||||
|              |  | ||||||
|             for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|             { |  | ||||||
|                 result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     LatticeComplex        c(env().getGrid()); |     LatticeComplex        c(env().getGrid()); | ||||||
|  |     Gamma                 gSrc(par().gammaSource), gSnk(par().gammaSink); | ||||||
|  |     Gamma                 g5(Gamma::Algebra::Gamma5); | ||||||
|  |     std::vector<TComplex> buf; | ||||||
|  |     Result                result; | ||||||
|      |      | ||||||
|         LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; |     c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5); | ||||||
|         for (unsigned int i = 0; i < result.size(); ++i) |  | ||||||
|         { |  | ||||||
|             Gamma       gSnk(gammaList[i].first); |  | ||||||
|             Gamma       gSrc(gammaList[i].second); |  | ||||||
|             std::string ns; |  | ||||||
|                  |  | ||||||
|             ns = env().getModuleNamespace(env().getObjectModule(par().sink)); |  | ||||||
|             if (ns == "MSource") |  | ||||||
|             { |  | ||||||
|                 PropagatorField1 &sink = |  | ||||||
|                     *env().template getObject<PropagatorField1>(par().sink); |  | ||||||
|                  |  | ||||||
|                 c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); |  | ||||||
|     sliceSum(c, buf, Tp); |     sliceSum(c, buf, Tp); | ||||||
|             } |     result.corr.resize(buf.size()); | ||||||
|             else if (ns == "MSink") |  | ||||||
|             { |  | ||||||
|                 SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink); |  | ||||||
|                  |  | ||||||
|                 c   = trace(mesonConnected(q1, q2, gSnk, gSrc)); |  | ||||||
|                 buf = sink(c); |  | ||||||
|             } |  | ||||||
|     for (unsigned int t = 0; t < buf.size(); ++t) |     for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|     { |     { | ||||||
|                 result[i].corr[t] = TensorRemove(buf[t]); |         result.corr[t] = TensorRemove(buf[t]); | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|     write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
| @@ -241,4 +134,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Meson_hpp_ | #endif // Hadrons_Meson_hpp_ | ||||||
|   | |||||||
| @@ -1,114 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonian                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| /******************************************************************************* |  | ||||||
|  * Utilities for contractions involving the Weak Hamiltonian. |  | ||||||
|  ******************************************************************************/ |  | ||||||
| //// Sum and store correlator. |  | ||||||
| #define MAKE_DIAG(exp, buf, res, n)\ |  | ||||||
| sliceSum(exp, buf, Tp);\ |  | ||||||
| res.name = (n);\ |  | ||||||
| res.corr.resize(buf.size());\ |  | ||||||
| for (unsigned int t = 0; t < buf.size(); ++t)\ |  | ||||||
| {\ |  | ||||||
|     res.corr[t] = TensorRemove(buf[t]);\ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //// Contraction of mu index: use 'mu' variable in exp. |  | ||||||
| #define SUM_MU(buf,exp)\ |  | ||||||
| buf = zero;\ |  | ||||||
| for (unsigned int mu = 0; mu < ndim; ++mu)\ |  | ||||||
| {\ |  | ||||||
|     buf += exp;\ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| enum  |  | ||||||
| { |  | ||||||
|   i_V = 0, |  | ||||||
|   i_A = 1, |  | ||||||
|   n_i = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class WeakHamiltonianPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar, |  | ||||||
|                                     std::string, q1, |  | ||||||
|                                     std::string, q2, |  | ||||||
|                                     std::string, q3, |  | ||||||
|                                     std::string, q4, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #define MAKE_WEAK_MODULE(modname)\ |  | ||||||
| class T##modname: public Module<WeakHamiltonianPar>\ |  | ||||||
| {\ |  | ||||||
| public:\ |  | ||||||
|     FERM_TYPE_ALIASES(FIMPL,)\ |  | ||||||
|     class Result: Serializable\ |  | ||||||
|     {\ |  | ||||||
|     public:\ |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\ |  | ||||||
|                                         std::string, name,\ |  | ||||||
|                                         std::vector<Complex>, corr);\ |  | ||||||
|     };\ |  | ||||||
| public:\ |  | ||||||
|     /* constructor */ \ |  | ||||||
|     T##modname(const std::string name);\ |  | ||||||
|     /* destructor */ \ |  | ||||||
|     virtual ~T##modname(void) = default;\ |  | ||||||
|     /* dependency relation */ \ |  | ||||||
|     virtual std::vector<std::string> getInput(void);\ |  | ||||||
|     virtual std::vector<std::string> getOutput(void);\ |  | ||||||
|     /* setup */ \ |  | ||||||
|     virtual void setup(void);\ |  | ||||||
|     /* execution */ \ |  | ||||||
|     virtual void execute(void);\ |  | ||||||
|     std::vector<std::string> VA_label = {"V", "A"};\ |  | ||||||
| };\ |  | ||||||
| MODULE_REGISTER_NS(modname, T##modname, MContraction); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
| @@ -1,137 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian current-current contractions, Eye-type. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical |  | ||||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematics:        q4                 |                   |  | ||||||
|  *                  /-<-¬                |                              |  | ||||||
|  *                 /     \               |             q2           q3 |  | ||||||
|  *                 \     /               |        /----<------*------<----¬                         |  | ||||||
|  *            q2    \   /    q3          |       /          /-*-¬          \ |  | ||||||
|  *       /-----<-----* *-----<----¬      |      /          /     \          \ |  | ||||||
|  *    i *            H_W           * f   |   i *           \     /  q4      * f |  | ||||||
|  *       \                        /      |      \           \->-/          /    |  | ||||||
|  *        \                      /       |       \                        /        |  | ||||||
|  *         \---------->---------/        |        \----------->----------/         |  | ||||||
|  *                   q1                  |                   q1                   |  | ||||||
|  *                                       | |  | ||||||
|  *                Saucer (S)             |                  Eye (E) |  | ||||||
|  *  |  | ||||||
|  * S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2]) |  | ||||||
|  * E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2]) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                  TWeakHamiltonianEye implementation                        * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakHamiltonianEye::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakHamiltonianEye::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianEye::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianEye::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_eye_diag); |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp1(env().getGrid()); |  | ||||||
|     LatticeComplex               tmp2(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> S_body(ndim, tmp1); |  | ||||||
|     std::vector<PropagatorField> S_loop(ndim, tmp1); |  | ||||||
|     std::vector<LatticeComplex>  E_body(ndim, tmp2); |  | ||||||
|     std::vector<LatticeComplex>  E_loop(ndim, tmp2); |  | ||||||
|  |  | ||||||
|     // Setup for S-type contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu])); |  | ||||||
|         S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform S-type contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu])) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S") |  | ||||||
|  |  | ||||||
|     // Recycle sub-expressions for E-type contractions. |  | ||||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         E_body[mu] = trace(S_body[mu]); |  | ||||||
|         E_loop[mu] = trace(S_loop[mu]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform E-type contractions. |  | ||||||
|     SUM_MU(expbuf, E_body[mu]*E_loop[mu]) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_Eye", result); |  | ||||||
| } |  | ||||||
| @@ -1,58 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonianEye                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     S_diag = 0, |  | ||||||
|     E_diag = 1, |  | ||||||
|     n_eye_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Saucer and Eye subdiagram contractions. |  | ||||||
| #define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma) |  | ||||||
| #define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakHamiltonianEye) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
| @@ -1,139 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian current-current contractions, Non-Eye-type. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical |  | ||||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematic:      |  | ||||||
|  *            q2             q3          |           q2              q3 |  | ||||||
|  *          /--<--¬       /--<--¬        |        /--<--¬         /--<--¬        |  | ||||||
|  *         /       \     /       \       |       /       \       /       \       |  | ||||||
|  *        /         \   /         \      |      /         \     /         \      |  | ||||||
|  *       /           \ /           \     |     /           \   /           \     |  | ||||||
|  *    i *             * H_W         *  f |  i *             * * H_W         * f  |  | ||||||
|  *      \             *             |    |     \           /   \           / |  | ||||||
|  *       \           / \           /     |      \         /     \         /     |  | ||||||
|  *        \         /   \         /      |       \       /       \       /   |  | ||||||
|  *         \       /     \       /       |        \-->--/         \-->--/       |  | ||||||
|  *          \-->--/       \-->--/        |          q1               q4  |  | ||||||
|  *            q1             q4          | |  | ||||||
|  *                Connected (C)          |                 Wing (W) |  | ||||||
|  * |  | ||||||
|  * C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu]) |  | ||||||
|  * W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu]) |  | ||||||
|  *  |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                  TWeakHamiltonianNonEye implementation                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakHamiltonianNonEye::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianNonEye::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianNonEye::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|      |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_noneye_diag);  |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp1(env().getGrid()); |  | ||||||
|     LatticeComplex               tmp2(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> C_i_side_loop(ndim, tmp1); |  | ||||||
|     std::vector<PropagatorField> C_f_side_loop(ndim, tmp1); |  | ||||||
|     std::vector<LatticeComplex>  W_i_side_loop(ndim, tmp2); |  | ||||||
|     std::vector<LatticeComplex>  W_f_side_loop(ndim, tmp2); |  | ||||||
|  |  | ||||||
|     // Setup for C-type contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu])); |  | ||||||
|         C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform C-type contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu])) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C") |  | ||||||
|  |  | ||||||
|     // Recycle sub-expressions for W-type contractions. |  | ||||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         W_i_side_loop[mu] = trace(C_i_side_loop[mu]); |  | ||||||
|         W_f_side_loop[mu] = trace(C_f_side_loop[mu]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform W-type contractions. |  | ||||||
|     SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu]) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_NonEye", result); |  | ||||||
| } |  | ||||||
| @@ -1,57 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonianNonEye                              * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     W_diag = 0, |  | ||||||
|     C_diag = 1, |  | ||||||
|     n_noneye_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Wing and Connected subdiagram contractions |  | ||||||
| #define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakHamiltonianNonEye) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
| @@ -1,135 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian + current contractions, disconnected topology for neutral  |  | ||||||
|  * mesons. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by operators Q_1,...,10 of the dS=1 Weak |  | ||||||
|  * Hamiltonian in the physical basis and an additional current J (see e.g.  |  | ||||||
|  * Fig 11 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematic: |  | ||||||
|  *                         |  | ||||||
|  *           q2          q4             q3 |  | ||||||
|  *       /--<--¬     /---<--¬       /---<--¬ |  | ||||||
|  *     /         \ /         \     /        \ |  | ||||||
|  *  i *           * H_W      |  J *          * f |  | ||||||
|  *     \         / \         /     \        / |  | ||||||
|  *      \--->---/   \-------/       \------/ |  | ||||||
|  *          q1  |  | ||||||
|  *  |  | ||||||
|  * options |  | ||||||
|  * - q1: input propagator 1 (string) |  | ||||||
|  * - q2: input propagator 2 (string) |  | ||||||
|  * - q3: input propagator 3 (string), assumed to be sequential propagator  |  | ||||||
|  * - q4: input propagator 4 (string), assumed to be a loop |  | ||||||
|  *  |  | ||||||
|  * type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5) |  | ||||||
|  * type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /******************************************************************************* |  | ||||||
|  *                  TWeakNeutral4ptDisc implementation                         * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakNeutral4ptDisc::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakNeutral4ptDisc::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakNeutral4ptDisc::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_neut_disc_diag); |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> meson(ndim, tmp); |  | ||||||
|     std::vector<PropagatorField> loop(ndim, tmp); |  | ||||||
|     LatticeComplex               curr(env().getGrid()); |  | ||||||
|  |  | ||||||
|     // Setup for type 1 contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu])); |  | ||||||
|         loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|     curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5)); |  | ||||||
|  |  | ||||||
|     // Perform type 1 contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(meson[mu]*loop[mu])) |  | ||||||
|     expbuf *= curr; |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1") |  | ||||||
|  |  | ||||||
|     // Perform type 2 contractions. |  | ||||||
|     SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu])) |  | ||||||
|     expbuf *= curr; |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_disc0", result); |  | ||||||
| } |  | ||||||
| @@ -1,59 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakNeutral4ptDisc                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     neut_disc_1_diag = 0, |  | ||||||
|     neut_disc_2_diag = 1, |  | ||||||
|     n_neut_disc_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Neutral 4pt disconnected subdiagram contractions. |  | ||||||
| #define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) |  | ||||||
| #define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma) |  | ||||||
| #define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma)) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakNeutral4ptDisc) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | |||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| void TLoad::execute(void) | void TLoad::execute(void) | ||||||
| { | { | ||||||
|     FieldMetaData  header; |     NerscField  header; | ||||||
|     std::string fileName = par().file + "." |     std::string fileName = par().file + "." | ||||||
|                            + std::to_string(env().getTrajectory()); |                            + std::to_string(env().getTrajectory()); | ||||||
|      |      | ||||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | |||||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); |     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||||
|     NerscIO::readConfiguration(U, header, fileName); |     NerscIO::readConfiguration(U, header, fileName); | ||||||
|     LOG(Message) << "NERSC header:" << std::endl; |     LOG(Message) << "NERSC header:" << std::endl; | ||||||
|     dump_meta_data(header, LOG(Message)); |     dump_nersc_header(header, LOG(Message)); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Load_hpp_ | #ifndef Hadrons_Load_hpp_ | ||||||
| #define Hadrons_MGauge_Load_hpp_ | #define Hadrons_Load_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Load_hpp_ | #endif // Hadrons_Load_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Random_hpp_ | #ifndef Hadrons_Random_hpp_ | ||||||
| #define Hadrons_MGauge_Random_hpp_ | #define Hadrons_Random_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Random_hpp_ | #endif // Hadrons_Random_hpp_ | ||||||
|   | |||||||
| @@ -1,88 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MGauge; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                  TStochEm implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TStochEm::TStochEm(const std::string name) |  | ||||||
| : Module<StochEmPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TStochEm::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TStochEm::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TStochEm::setup(void) |  | ||||||
| { |  | ||||||
|     if (!env().hasRegisteredObject("_" + getName() + "_weight")) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<EmComp>("_" + getName() + "_weight"); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<EmField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TStochEm::execute(void) |  | ||||||
| { |  | ||||||
|     PhotonR photon(par().gauge, par().zmScheme); |  | ||||||
|     EmField &a = *env().createLattice<EmField>(getName()); |  | ||||||
|     EmComp  *w; |  | ||||||
|      |  | ||||||
|     if (!env().hasCreatedObject("_" + getName() + "_weight")) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching stochatic EM potential weight (gauge: " |  | ||||||
|                      << par().gauge << ", zero-mode scheme: " |  | ||||||
|                      << par().zmScheme << ")..." << std::endl; |  | ||||||
|         w = env().createLattice<EmComp>("_" + getName() + "_weight"); |  | ||||||
|         photon.StochasticWeight(*w); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         w = env().getObject<EmComp>("_" + getName() + "_weight"); |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Generating stochatic EM potential..." << std::endl; |  | ||||||
|     photon.StochasticField(a, *env().get4dRng(), *w); |  | ||||||
| } |  | ||||||
| @@ -1,75 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef Hadrons_MGauge_StochEm_hpp_ |  | ||||||
| #define Hadrons_MGauge_StochEm_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         StochEm                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MGauge) |  | ||||||
|  |  | ||||||
| class StochEmPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, |  | ||||||
|                                     PhotonR::Gauge,    gauge, |  | ||||||
|                                     PhotonR::ZmScheme, zmScheme); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TStochEm: public Module<StochEmPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     typedef PhotonR::GaugeField     EmField; |  | ||||||
|     typedef PhotonR::GaugeLinkField EmComp; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TStochEm(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TStochEm(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_StochEm_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Unit_hpp_ | #ifndef Hadrons_Unit_hpp_ | ||||||
| #define Hadrons_MGauge_Unit_hpp_ | #define Hadrons_Unit_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Unit_hpp_ | #endif // Hadrons_Unit_hpp_ | ||||||
|   | |||||||
| @@ -1,132 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
| #define Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Noise loop propagator |  | ||||||
|  ----------------------------- |  | ||||||
|  * loop_x = q_x * adj(eta_x) |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - q = Result of inversion on noise source. |  | ||||||
|  - eta = noise source. |  | ||||||
|  |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         NoiseLoop                                          * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MLoop) |  | ||||||
|  |  | ||||||
| class NoiseLoopPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar, |  | ||||||
|                                     std::string, q, |  | ||||||
|                                     std::string, eta); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TNoiseLoop: public Module<NoiseLoopPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TNoiseLoop(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TNoiseLoop(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                 TNoiseLoop implementation                                  * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TNoiseLoop<FImpl>::TNoiseLoop(const std::string name) |  | ||||||
| : Module<NoiseLoopPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TNoiseLoop<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q, par().eta}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TNoiseLoop<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TNoiseLoop<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     PropagatorField &loop = *env().template createLattice<PropagatorField>(getName()); |  | ||||||
|     PropagatorField &q    = *env().template getObject<PropagatorField>(par().q); |  | ||||||
|     PropagatorField &eta  = *env().template getObject<PropagatorField>(par().eta); |  | ||||||
|     loop = q*adj(eta); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
| @@ -1,226 +0,0 @@ | |||||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MScalar; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                     TChargedProp implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TChargedProp::TChargedProp(const std::string name) |  | ||||||
| : Module<ChargedPropPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TChargedProp::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().source, par().emField}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TChargedProp::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TChargedProp::setup(void) |  | ||||||
| { |  | ||||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); |  | ||||||
|     phaseName_.clear(); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         phaseName_.push_back("_shiftphase_" + std::to_string(mu)); |  | ||||||
|     } |  | ||||||
|     GFSrcName_ = "_" + getName() + "_DinvSrc"; |  | ||||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     if (!env().hasRegisteredObject(phaseName_[0])) |  | ||||||
|     { |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             env().registerLattice<ScalarField>(phaseName_[mu]); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (!env().hasRegisteredObject(GFSrcName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(GFSrcName_); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<ScalarField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TChargedProp::execute(void) |  | ||||||
| { |  | ||||||
|     // CACHING ANALYTIC EXPRESSIONS |  | ||||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); |  | ||||||
|     Complex     ci(0.0,1.0); |  | ||||||
|     FFT         fft(env().getGrid()); |  | ||||||
|      |  | ||||||
|     // cache free scalar propagator |  | ||||||
|     if (!env().hasCreatedObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching momentum space free scalar propagator" |  | ||||||
|                      << " (mass= " << par().mass << ")..." << std::endl; |  | ||||||
|         freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_); |  | ||||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     // cache G*F*src |  | ||||||
|     if (!env().hasCreatedObject(GFSrcName_)) |  | ||||||
|          |  | ||||||
|     { |  | ||||||
|         GFSrc_ = env().createLattice<ScalarField>(GFSrcName_); |  | ||||||
|         fft.FFT_all_dim(*GFSrc_, source, FFT::forward); |  | ||||||
|         *GFSrc_ = (*freeMomProp_)*(*GFSrc_); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         GFSrc_ = env().getObject<ScalarField>(GFSrcName_); |  | ||||||
|     } |  | ||||||
|     // cache phases |  | ||||||
|     if (!env().hasCreatedObject(phaseName_[0])) |  | ||||||
|     { |  | ||||||
|         std::vector<int> &l = env().getGrid()->_fdimensions; |  | ||||||
|          |  | ||||||
|         LOG(Message) << "Caching shift phases..." << std::endl; |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             Real    twoPiL = M_PI*2./l[mu]; |  | ||||||
|              |  | ||||||
|             phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu])); |  | ||||||
|             LatticeCoordinate(*(phase_[mu]), mu); |  | ||||||
|             *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             phase_.push_back(env().getObject<ScalarField>(phaseName_[mu])); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // PROPAGATOR CALCULATION |  | ||||||
|     LOG(Message) << "Computing charged scalar propagator" |  | ||||||
|                  << " (mass= " << par().mass |  | ||||||
|                  << ", charge= " << par().charge << ")..." << std::endl; |  | ||||||
|      |  | ||||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); |  | ||||||
|     ScalarField buf(env().getGrid()); |  | ||||||
|     ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; |  | ||||||
|     double      q = par().charge; |  | ||||||
|      |  | ||||||
|     // G*F*Src |  | ||||||
|     prop = GFSrc; |  | ||||||
|  |  | ||||||
|     // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) |  | ||||||
|     buf = GFSrc; |  | ||||||
|     momD1(buf, fft); |  | ||||||
|     buf = G*buf; |  | ||||||
|     prop = prop - q*buf; |  | ||||||
|  |  | ||||||
|     // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) |  | ||||||
|     momD1(buf, fft); |  | ||||||
|     prop = prop + q*q*G*buf; |  | ||||||
|  |  | ||||||
|     // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) |  | ||||||
|     buf = GFSrc; |  | ||||||
|     momD2(buf, fft); |  | ||||||
|     prop = prop - q*q*G*buf; |  | ||||||
|  |  | ||||||
|     // final FT |  | ||||||
|     fft.FFT_all_dim(prop, prop, FFT::backward); |  | ||||||
|      |  | ||||||
|     // OUTPUT IF NECESSARY |  | ||||||
|     if (!par().output.empty()) |  | ||||||
|     { |  | ||||||
|         std::string           filename = par().output + "." + |  | ||||||
|                                          std::to_string(env().getTrajectory()); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "Saving zero-momentum projection to '" |  | ||||||
|                      << filename << "'..." << std::endl; |  | ||||||
|          |  | ||||||
|         CorrWriter            writer(filename); |  | ||||||
|         std::vector<TComplex> vecBuf; |  | ||||||
|         std::vector<Complex>  result; |  | ||||||
|          |  | ||||||
|         sliceSum(prop, vecBuf, Tp); |  | ||||||
|         result.resize(vecBuf.size()); |  | ||||||
|         for (unsigned int t = 0; t < vecBuf.size(); ++t) |  | ||||||
|         { |  | ||||||
|             result[t] = TensorRemove(vecBuf[t]); |  | ||||||
|         } |  | ||||||
|         write(writer, "charge", q); |  | ||||||
|         write(writer, "prop", result); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void TChargedProp::momD1(ScalarField &s, FFT &fft) |  | ||||||
| { |  | ||||||
|     EmField     &A = *env().getObject<EmField>(par().emField); |  | ||||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), |  | ||||||
|                 Amu(env().getGrid()); |  | ||||||
|     Complex     ci(0.0,1.0); |  | ||||||
|  |  | ||||||
|     result = zero; |  | ||||||
|  |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = (*phase_[mu])*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); |  | ||||||
|         buf = Amu*buf; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result - ci*buf; |  | ||||||
|     } |  | ||||||
|     fft.FFT_all_dim(s, s, FFT::backward); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = Amu*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + ci*adj(*phase_[mu])*buf; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     s = result; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void TChargedProp::momD2(ScalarField &s, FFT &fft) |  | ||||||
| { |  | ||||||
|     EmField     &A = *env().getObject<EmField>(par().emField); |  | ||||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), |  | ||||||
|                 Amu(env().getGrid()); |  | ||||||
|  |  | ||||||
|     result = zero; |  | ||||||
|      |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = (*phase_[mu])*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); |  | ||||||
|         buf = Amu*Amu*buf; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + .5*buf; |  | ||||||
|     } |  | ||||||
|     fft.FFT_all_dim(s, s, FFT::backward); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu);         |  | ||||||
|         buf = Amu*Amu*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + .5*adj(*phase_[mu])*buf; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     s = result; |  | ||||||
| } |  | ||||||
| @@ -1,61 +0,0 @@ | |||||||
| #ifndef Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
| #define Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       Charged scalar propagator                            * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MScalar) |  | ||||||
|  |  | ||||||
| class ChargedPropPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, |  | ||||||
|                                     std::string, emField, |  | ||||||
|                                     std::string, source, |  | ||||||
|                                     double,      mass, |  | ||||||
|                                     double,      charge, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TChargedProp: public Module<ChargedPropPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     SCALAR_TYPE_ALIASES(SIMPL,); |  | ||||||
|     typedef PhotonR::GaugeField     EmField; |  | ||||||
|     typedef PhotonR::GaugeLinkField EmComp; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TChargedProp(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TChargedProp(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| private: |  | ||||||
|     void momD1(ScalarField &s, FFT &fft); |  | ||||||
|     void momD2(ScalarField &s, FFT &fft); |  | ||||||
| private: |  | ||||||
|     std::string                freeMomPropName_, GFSrcName_; |  | ||||||
|     std::vector<std::string>   phaseName_; |  | ||||||
|     ScalarField                *freeMomProp_, *GFSrc_; |  | ||||||
|     std::vector<ScalarField *> phase_; |  | ||||||
|     EmField                    *A; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
| @@ -1,79 +0,0 @@ | |||||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MScalar; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                        TFreeProp implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TFreeProp::TFreeProp(const std::string name) |  | ||||||
| : Module<FreePropPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TFreeProp::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().source}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TFreeProp::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TFreeProp::setup(void) |  | ||||||
| { |  | ||||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); |  | ||||||
|      |  | ||||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<ScalarField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TFreeProp::execute(void) |  | ||||||
| { |  | ||||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); |  | ||||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); |  | ||||||
|     ScalarField *freeMomProp; |  | ||||||
|  |  | ||||||
|     if (!env().hasCreatedObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching momentum space free scalar propagator" |  | ||||||
|                      << " (mass= " << par().mass << ")..." << std::endl; |  | ||||||
|         freeMomProp = env().createLattice<ScalarField>(freeMomPropName_); |  | ||||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         freeMomProp = env().getObject<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Computing free scalar propagator..." << std::endl; |  | ||||||
|     SIMPL::FreePropagator(source, prop, *freeMomProp); |  | ||||||
|      |  | ||||||
|     if (!par().output.empty()) |  | ||||||
|     { |  | ||||||
|         TextWriter            writer(par().output + "." + |  | ||||||
|                                      std::to_string(env().getTrajectory())); |  | ||||||
|         std::vector<TComplex> buf; |  | ||||||
|         std::vector<Complex>  result; |  | ||||||
|          |  | ||||||
|         sliceSum(prop, buf, Tp); |  | ||||||
|         result.resize(buf.size()); |  | ||||||
|         for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|         { |  | ||||||
|             result[t] = TensorRemove(buf[t]); |  | ||||||
|         } |  | ||||||
|         write(writer, "prop", result); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,50 +0,0 @@ | |||||||
| #ifndef Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
| #define Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                               FreeProp                                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MScalar) |  | ||||||
|  |  | ||||||
| class FreePropPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, |  | ||||||
|                                     std::string, source, |  | ||||||
|                                     double,      mass, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TFreeProp: public Module<FreePropPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     SCALAR_TYPE_ALIASES(SIMPL,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TFreeProp(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TFreeProp(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| private: |  | ||||||
|     std::string freeMomPropName_; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
| @@ -1,6 +0,0 @@ | |||||||
| #ifndef Hadrons_Scalar_hpp_ |  | ||||||
| #define Hadrons_Scalar_hpp_ |  | ||||||
|  |  | ||||||
| #define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) |  | ||||||
|  |  | ||||||
| #endif // Hadrons_Scalar_hpp_ |  | ||||||
| @@ -1,114 +0,0 @@ | |||||||
| #ifndef Hadrons_MSink_Point_hpp_ |  | ||||||
| #define Hadrons_MSink_Point_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                                   Point                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MSink) |  | ||||||
|  |  | ||||||
| class PointPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, |  | ||||||
|                                     std::string, mom); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TPoint: public Module<PointPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
|     SINK_TYPE_ALIASES(); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TPoint(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TPoint(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSink); |  | ||||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                          TPoint implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TPoint<FImpl>::TPoint(const std::string name) |  | ||||||
| : Module<PointPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TPoint<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TPoint<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TPoint<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     unsigned int size; |  | ||||||
|      |  | ||||||
|     size = env().template lattice4dSize<LatticeComplex>(); |  | ||||||
|     env().registerObject(getName(), size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TPoint<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     std::vector<Real> p = strToVec<Real>(par().mom); |  | ||||||
|     LatticeComplex    ph(env().getGrid()), coor(env().getGrid()); |  | ||||||
|     Complex           i(0.0,1.0); |  | ||||||
|      |  | ||||||
|     LOG(Message) << "Setting up point sink function for momentum [" |  | ||||||
|                  << par().mom << "]" << std::endl; |  | ||||||
|     ph = zero; |  | ||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |  | ||||||
|     { |  | ||||||
|         LatticeCoordinate(coor, mu); |  | ||||||
|         ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; |  | ||||||
|     } |  | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |  | ||||||
|     auto sink = [ph](const PropagatorField &field) |  | ||||||
|     { |  | ||||||
|         SlicedPropagator res; |  | ||||||
|         PropagatorField  tmp = ph*field; |  | ||||||
|          |  | ||||||
|         sliceSum(tmp, res, Tp); |  | ||||||
|          |  | ||||||
|         return res; |  | ||||||
|     }; |  | ||||||
|     env().setObject(getName(), new SinkFn(sink)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MSink_Point_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSolver_RBPrecCG_hpp_ | #ifndef Hadrons_RBPrecCG_hpp_ | ||||||
| #define Hadrons_MSolver_RBPrecCG_hpp_ | #define Hadrons_RBPrecCG_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -53,7 +53,7 @@ template <typename FImpl> | |||||||
| class TRBPrecCG: public Module<RBPrecCGPar> | class TRBPrecCG: public Module<RBPrecCGPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TRBPrecCG(const std::string name); |     TRBPrecCG(const std::string name); | ||||||
| @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSolver_RBPrecCG_hpp_ | #endif // Hadrons_RBPrecCG_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_Point_hpp_ | #ifndef Hadrons_Point_hpp_ | ||||||
| #define Hadrons_MSource_Point_hpp_ | #define Hadrons_Point_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,7 +63,7 @@ template <typename FImpl> | |||||||
| class TPoint: public Module<PointPar> | class TPoint: public Module<PointPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TPoint(const std::string name); |     TPoint(const std::string name); | ||||||
| @@ -79,7 +79,6 @@ public: | |||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource); | MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource); | ||||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TPoint template implementation                       * |  *                       TPoint template implementation                       * | ||||||
| @@ -133,4 +132,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_Point_hpp_ | #endif // Hadrons_Point_hpp_ | ||||||
|   | |||||||
| @@ -6,7 +6,6 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
| @@ -28,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_SeqGamma_hpp_ | #ifndef Hadrons_SeqGamma_hpp_ | ||||||
| #define Hadrons_MSource_SeqGamma_hpp_ | #define Hadrons_SeqGamma_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -72,7 +71,7 @@ template <typename FImpl> | |||||||
| class TSeqGamma: public Module<SeqGammaPar> | class TSeqGamma: public Module<SeqGammaPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TSeqGamma(const std::string name); |     TSeqGamma(const std::string name); | ||||||
| @@ -150,9 +149,9 @@ void TSeqGamma<FImpl>::execute(void) | |||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||||
|     { |     { | ||||||
|         LatticeCoordinate(coor, mu); |         LatticeCoordinate(coor, mu); | ||||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); |         ph = ph + p[mu]*coor; | ||||||
|     } |     } | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |     ph = exp(i*ph); | ||||||
|     LatticeCoordinate(t, Tp); |     LatticeCoordinate(t, Tp); | ||||||
|     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); |     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); | ||||||
| } | } | ||||||
| @@ -161,4 +160,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_SeqGamma_hpp_ | #endif // Hadrons_SeqGamma_hpp_ | ||||||
|   | |||||||
| @@ -1,147 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MSource/Wall.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_WallSource_hpp_ |  | ||||||
| #define Hadrons_MSource_WallSource_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Wall source |  | ||||||
|  ----------------------------- |  | ||||||
|  * src_x = delta(x_3 - tW) * exp(i x.mom) |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - tW: source timeslice (integer) |  | ||||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.") |  | ||||||
|   |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         Wall                                               * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MSource) |  | ||||||
|  |  | ||||||
| class WallPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar, |  | ||||||
|                                     unsigned int, tW, |  | ||||||
|                                     std::string, mom); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TWall: public Module<WallPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TWall(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TWall(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                 TWall implementation                                       * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TWall<FImpl>::TWall(const std::string name) |  | ||||||
| : Module<WallPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TWall<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TWall<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TWall<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TWall<FImpl>::execute(void) |  | ||||||
| {     |  | ||||||
|     LOG(Message) << "Generating wall source at t = " << par().tW  |  | ||||||
|                  << " with momentum " << par().mom << std::endl; |  | ||||||
|      |  | ||||||
|     PropagatorField &src = *env().template createLattice<PropagatorField>(getName()); |  | ||||||
|     Lattice<iScalar<vInteger>> t(env().getGrid()); |  | ||||||
|     LatticeComplex             ph(env().getGrid()), coor(env().getGrid()); |  | ||||||
|     std::vector<Real>          p; |  | ||||||
|     Complex                    i(0.0,1.0); |  | ||||||
|      |  | ||||||
|     p  = strToVec<Real>(par().mom); |  | ||||||
|     ph = zero; |  | ||||||
|     for(unsigned int mu = 0; mu < Nd; mu++) |  | ||||||
|     { |  | ||||||
|         LatticeCoordinate(coor, mu); |  | ||||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); |  | ||||||
|     } |  | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |  | ||||||
|     LatticeCoordinate(t, Tp); |  | ||||||
|     src = 1.; |  | ||||||
|     src = where((t == par().tW), src*ph, 0.*src); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_WallSource_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_Z2_hpp_ | #ifndef Hadrons_Z2_hpp_ | ||||||
| #define Hadrons_MSource_Z2_hpp_ | #define Hadrons_Z2_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -67,7 +67,7 @@ template <typename FImpl> | |||||||
| class TZ2: public Module<Z2Par> | class TZ2: public Module<Z2Par> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TZ2(const std::string name); |     TZ2(const std::string name); | ||||||
| @@ -83,7 +83,6 @@ public: | |||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource); | MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource); | ||||||
| MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TZ2 template implementation                          * |  *                       TZ2 template implementation                          * | ||||||
| @@ -149,4 +148,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_Z2_hpp_ | #endif // Hadrons_Z2_hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,34 @@ | |||||||
| #ifndef Hadrons_MFermion_GaugeProp_hpp_ | /*************************************************************************************
 | ||||||
| #define Hadrons_MFermion_GaugeProp_hpp_ | 
 | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  | 
 | ||||||
|  | Source file: extras/Hadrons/Modules/Quark.hpp | ||||||
|  | 
 | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  | 
 | ||||||
|  | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  | 
 | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  | 
 | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  | 
 | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | 
 | ||||||
|  | #ifndef Hadrons_Quark_hpp_ | ||||||
|  | #define Hadrons_Quark_hpp_ | ||||||
| 
 | 
 | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -8,29 +37,27 @@ | |||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                                GaugeProp                                   * |  *                               TQuark                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MFermion) | class QuarkPar: Serializable | ||||||
| 
 |  | ||||||
| class GaugePropPar: Serializable |  | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, | ||||||
|                                     std::string, source, |                                     std::string, source, | ||||||
|                                     std::string, solver); |                                     std::string, solver); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TGaugeProp: public Module<GaugePropPar> | class TQuark: public Module<QuarkPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor
 |     // constructor
 | ||||||
|     TGaugeProp(const std::string name); |     TQuark(const std::string name); | ||||||
|     // destructor
 |     // destructor
 | ||||||
|     virtual ~TGaugeProp(void) = default; |     virtual ~TQuark(void) = default; | ||||||
|     // dependency relation
 |     // dependencies/products
 | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|     // setup
 |     // setup
 | ||||||
| @@ -42,20 +69,20 @@ private: | |||||||
|     SolverFn     *solver_{nullptr}; |     SolverFn     *solver_{nullptr}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion); | MODULE_REGISTER(Quark, TQuark<FIMPL>); | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                      TGaugeProp implementation                             * |  *                          TQuark implementation                             * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| // constructor /////////////////////////////////////////////////////////////////
 | // constructor /////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| TGaugeProp<FImpl>::TGaugeProp(const std::string name) | TQuark<FImpl>::TQuark(const std::string name) | ||||||
| : Module<GaugePropPar>(name) | : Module(name) | ||||||
| {} | {} | ||||||
| 
 | 
 | ||||||
| // dependencies/products ///////////////////////////////////////////////////////
 | // dependencies/products ///////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | std::vector<std::string> TQuark<FImpl>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> in = {par().source, par().solver}; |     std::vector<std::string> in = {par().source, par().solver}; | ||||||
|      |      | ||||||
| @@ -63,7 +90,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | std::vector<std::string> TQuark<FImpl>::getOutput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> out = {getName(), getName() + "_5d"}; |     std::vector<std::string> out = {getName(), getName() + "_5d"}; | ||||||
|      |      | ||||||
| @@ -72,7 +99,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | |||||||
| 
 | 
 | ||||||
| // setup ///////////////////////////////////////////////////////////////////////
 | // setup ///////////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TGaugeProp<FImpl>::setup(void) | void TQuark<FImpl>::setup(void) | ||||||
| { | { | ||||||
|     Ls_ = env().getObjectLs(par().solver); |     Ls_ = env().getObjectLs(par().solver); | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |     env().template registerLattice<PropagatorField>(getName()); | ||||||
| @@ -84,7 +111,7 @@ void TGaugeProp<FImpl>::setup(void) | |||||||
| 
 | 
 | ||||||
| // execution ///////////////////////////////////////////////////////////////////
 | // execution ///////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TGaugeProp<FImpl>::execute(void) | void TQuark<FImpl>::execute(void) | ||||||
| { | { | ||||||
|     LOG(Message) << "Computing quark propagator '" << getName() << "'" |     LOG(Message) << "Computing quark propagator '" << getName() << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
| @@ -146,15 +173,13 @@ void TGaugeProp<FImpl>::execute(void) | |||||||
|                 *env().template getObject<PropagatorField>(getName()); |                 *env().template getObject<PropagatorField>(getName()); | ||||||
|              |              | ||||||
|             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); |             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); | ||||||
|             axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); |             axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1); | ||||||
|             ExtractSlice(tmp, sol, 0, 0); |             ExtractSlice(tmp, sol, 0, 0); | ||||||
|             FermToProp(p4d, tmp, s, c); |             FermToProp(p4d, tmp, s, c); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| END_MODULE_NAMESPACE |  | ||||||
| 
 |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| #endif // Hadrons_MFermion_GaugeProp_hpp_
 | #endif // Hadrons_Quark_hpp_
 | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #ifndef Hadrons____FILEBASENAME____hpp_ | ||||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #define Hadrons____FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #endif // Hadrons____FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #ifndef Hadrons____FILEBASENAME____hpp_ | ||||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #define Hadrons____FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #endif // Hadrons____FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,38 +1,19 @@ | |||||||
| modules_cc =\ | modules_cc =\ | ||||||
|   Modules/MContraction/WeakHamiltonianEye.cc \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianNonEye.cc \ |  | ||||||
|   Modules/MContraction/WeakNeutral4ptDisc.cc \ |  | ||||||
|   Modules/MGauge/Load.cc \ |   Modules/MGauge/Load.cc \ | ||||||
|   Modules/MGauge/Random.cc \ |   Modules/MGauge/Random.cc \ | ||||||
|   Modules/MGauge/StochEm.cc \ |   Modules/MGauge/Unit.cc | ||||||
|   Modules/MGauge/Unit.cc \ |  | ||||||
|   Modules/MScalar/ChargedProp.cc \ |  | ||||||
|   Modules/MScalar/FreeProp.cc |  | ||||||
|  |  | ||||||
| modules_hpp =\ | modules_hpp =\ | ||||||
|   Modules/MAction/DWF.hpp \ |   Modules/MAction/DWF.hpp \ | ||||||
|   Modules/MAction/Wilson.hpp \ |   Modules/MAction/Wilson.hpp \ | ||||||
|   Modules/MContraction/Baryon.hpp \ |   Modules/MContraction/Baryon.hpp \ | ||||||
|   Modules/MContraction/DiscLoop.hpp \ |  | ||||||
|   Modules/MContraction/Gamma3pt.hpp \ |  | ||||||
|   Modules/MContraction/Meson.hpp \ |   Modules/MContraction/Meson.hpp \ | ||||||
|   Modules/MContraction/WeakHamiltonian.hpp \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianEye.hpp \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianNonEye.hpp \ |  | ||||||
|   Modules/MContraction/WeakNeutral4ptDisc.hpp \ |  | ||||||
|   Modules/MFermion/GaugeProp.hpp \ |  | ||||||
|   Modules/MGauge/Load.hpp \ |   Modules/MGauge/Load.hpp \ | ||||||
|   Modules/MGauge/Random.hpp \ |   Modules/MGauge/Random.hpp \ | ||||||
|   Modules/MGauge/StochEm.hpp \ |  | ||||||
|   Modules/MGauge/Unit.hpp \ |   Modules/MGauge/Unit.hpp \ | ||||||
|   Modules/MLoop/NoiseLoop.hpp \ |  | ||||||
|   Modules/MScalar/ChargedProp.hpp \ |  | ||||||
|   Modules/MScalar/FreeProp.hpp \ |  | ||||||
|   Modules/MScalar/Scalar.hpp \ |  | ||||||
|   Modules/MSink/Point.hpp \ |  | ||||||
|   Modules/MSolver/RBPrecCG.hpp \ |   Modules/MSolver/RBPrecCG.hpp \ | ||||||
|   Modules/MSource/Point.hpp \ |   Modules/MSource/Point.hpp \ | ||||||
|   Modules/MSource/SeqGamma.hpp \ |   Modules/MSource/SeqGamma.hpp \ | ||||||
|   Modules/MSource/Wall.hpp \ |   Modules/MSource/Z2.hpp \ | ||||||
|   Modules/MSource/Z2.hpp |   Modules/Quark.hpp | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,11 +0,0 @@ | |||||||
| #include <qed-fvol/Global.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace QCD; |  | ||||||
| using namespace QedFVol; |  | ||||||
|  |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); |  | ||||||
| @@ -1,42 +0,0 @@ | |||||||
| #ifndef QedFVol_Global_hpp_ |  | ||||||
| #define QedFVol_Global_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
| #define BEGIN_QEDFVOL_NAMESPACE \ |  | ||||||
| namespace Grid {\ |  | ||||||
| using namespace QCD;\ |  | ||||||
| namespace QedFVol {\ |  | ||||||
| using Grid::operator<<; |  | ||||||
| #define END_QEDFVOL_NAMESPACE }} |  | ||||||
|  |  | ||||||
| /* the 'using Grid::operator<<;' statement prevents a very nasty compilation |  | ||||||
|  * error with GCC (clang compiles fine without it). |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| BEGIN_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| class QedFVolLogger: public Logger |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, |  | ||||||
|                                                   GridLogColours, "BLACK"){}; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #define LOG(channel) std::cout << QedFVolLog##channel |  | ||||||
| #define QEDFVOL_ERROR(msg)\ |  | ||||||
| LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ |  | ||||||
|            << __LINE__ << ")" << std::endl;\ |  | ||||||
| abort(); |  | ||||||
|  |  | ||||||
| #define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; |  | ||||||
|  |  | ||||||
| extern QedFVolLogger QedFVolLogError; |  | ||||||
| extern QedFVolLogger QedFVolLogWarning; |  | ||||||
| extern QedFVolLogger QedFVolLogMessage; |  | ||||||
| extern QedFVolLogger QedFVolLogIterative; |  | ||||||
| extern QedFVolLogger QedFVolLogDebug; |  | ||||||
|  |  | ||||||
| END_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // QedFVol_Global_hpp_ |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| AM_CXXFLAGS += -I$(top_srcdir)/extras |  | ||||||
|  |  | ||||||
| bin_PROGRAMS = qed-fvol |  | ||||||
|  |  | ||||||
| qed_fvol_SOURCES =   \ |  | ||||||
|     qed-fvol.cc      \ |  | ||||||
|     Global.cc |  | ||||||
|  |  | ||||||
| qed_fvol_LDADD   = -lGrid |  | ||||||
| @@ -1,265 +0,0 @@ | |||||||
| #ifndef QEDFVOL_WILSONLOOPS_H |  | ||||||
| #define QEDFVOL_WILSONLOOPS_H |  | ||||||
|  |  | ||||||
| #include <Global.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| template <class Gimpl> class NewWilsonLoops : public Gimpl { |  | ||||||
| public: |  | ||||||
|   INHERIT_GIMPL_TYPES(Gimpl); |  | ||||||
|  |  | ||||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; |  | ||||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // directed plaquette oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U, |  | ||||||
|                            const int mu, const int nu) { |  | ||||||
|     // Annoyingly, must use either scope resolution to find dependent base |  | ||||||
|     // class, |  | ||||||
|     // or this-> ; there is no "this" in a static method. This forces explicit |  | ||||||
|     // Gimpl scope |  | ||||||
|     // resolution throughout the usage in this file, and rather defeats the |  | ||||||
|     // purpose of deriving |  | ||||||
|     // from Gimpl. |  | ||||||
|     plaq = Gimpl::CovShiftBackward( |  | ||||||
|         U[mu], mu, Gimpl::CovShiftBackward( |  | ||||||
|                        U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // trace of directed plaquette oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void traceDirPlaquette(LatticeComplex &plaq, |  | ||||||
|                                 const std::vector<GaugeMat> &U, const int mu, |  | ||||||
|                                 const int nu) { |  | ||||||
|     GaugeMat sp(U[0]._grid); |  | ||||||
|     dirPlaquette(sp, U, mu, nu); |  | ||||||
|     plaq = trace(sp); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void sitePlaquette(LatticeComplex &Plaq, |  | ||||||
|                             const std::vector<GaugeMat> &U) { |  | ||||||
|     LatticeComplex sitePlaq(U[0]._grid); |  | ||||||
|     Plaq = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceDirPlaquette(sitePlaq, U, mu, nu); |  | ||||||
|         Plaq = Plaq + sitePlaq; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumPlaquette(const GaugeLorentz &Umu) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Plaq(Umu._grid); |  | ||||||
|  |  | ||||||
|     sitePlaquette(Plaq, U); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Plaq); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgPlaquette(const GaugeLorentz &Umu) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumplaq = sumPlaquette(Umu); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; |  | ||||||
|     return sumplaq / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // Wilson loop of size (R1, R2), oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U, |  | ||||||
|                            const int Rmu, const int Rnu, |  | ||||||
|                            const int mu, const int nu) { |  | ||||||
|     wl = U[nu]; |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rnu-1; i++){ |  | ||||||
|       wl = Gimpl::CovShiftForward(U[nu], nu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rmu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftForward(U[mu], mu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rnu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftBackward(U[nu], nu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rmu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftBackward(U[mu], mu, wl); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // trace of Wilson Loop oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void traceWilsonLoop(LatticeComplex &wl, |  | ||||||
|                                 const std::vector<GaugeMat> &U, |  | ||||||
|                                 const int Rmu, const int Rnu, |  | ||||||
|                                 const int mu, const int nu) { |  | ||||||
|     GaugeMat sp(U[0]._grid); |  | ||||||
|     wilsonLoop(sp, U, Rmu, Rnu, mu, nu); |  | ||||||
|     wl = trace(sp); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over planes of Wilson loop with length R1 |  | ||||||
|   // in the time direction |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteTimelikeWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|  |  | ||||||
|     int ndim = U[0]._grid->_ndimension; |  | ||||||
|  |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int nu = 0; nu < ndim - 1; nu++) { |  | ||||||
|       traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); |  | ||||||
|       Wl = Wl + siteWl; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum Wilson loop over all planes orthogonal to the time direction |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteSpatialWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|  |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of timelike Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteTimelikeWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of spatial Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteSpatialWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * ndim * (ndim - 1); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of timelike Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * (ndim - 1); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of spatial Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * (ndim - 1) * (ndim - 2); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| END_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // QEDFVOL_WILSONLOOPS_H |  | ||||||
| @@ -1,88 +0,0 @@ | |||||||
| #include <Global.hpp> |  | ||||||
| #include <WilsonLoops.h> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace QCD; |  | ||||||
| using namespace QedFVol; |  | ||||||
|  |  | ||||||
| typedef PeriodicGaugeImpl<QedGimplR>    QedPeriodicGimplR; |  | ||||||
| typedef PhotonR::GaugeField             EmField; |  | ||||||
| typedef PhotonR::GaugeLinkField         EmComp; |  | ||||||
|  |  | ||||||
| const int NCONFIGS = 10; |  | ||||||
| const int NWILSON = 10; |  | ||||||
|  |  | ||||||
| int main(int argc, char *argv[]) |  | ||||||
| { |  | ||||||
|     // parse command line |  | ||||||
|     std::string parameterFileName; |  | ||||||
|      |  | ||||||
|     if (argc < 2) |  | ||||||
|     { |  | ||||||
|         std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]"; |  | ||||||
|         std::cerr << std::endl; |  | ||||||
|         std::exit(EXIT_FAILURE); |  | ||||||
|     } |  | ||||||
|     parameterFileName = argv[1]; |  | ||||||
|      |  | ||||||
|     // initialization |  | ||||||
|     Grid_init(&argc, &argv); |  | ||||||
|     QedFVolLogError.Active(GridLogError.isActive()); |  | ||||||
|     QedFVolLogWarning.Active(GridLogWarning.isActive()); |  | ||||||
|     QedFVolLogMessage.Active(GridLogMessage.isActive()); |  | ||||||
|     QedFVolLogIterative.Active(GridLogIterative.isActive()); |  | ||||||
|     QedFVolLogDebug.Active(GridLogDebug.isActive()); |  | ||||||
|     LOG(Message) << "Grid initialized" << std::endl; |  | ||||||
|      |  | ||||||
|     // QED stuff |  | ||||||
|     std::vector<int> latt_size   = GridDefaultLatt(); |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|     GridCartesian    grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|     GridParallelRNG  pRNG(&grid); |  | ||||||
|     PhotonR          photon(PhotonR::Gauge::feynman, |  | ||||||
|                             PhotonR::ZmScheme::qedL); |  | ||||||
|     EmField          a(&grid); |  | ||||||
|     EmField          expA(&grid); |  | ||||||
|  |  | ||||||
|     Complex imag_unit(0, 1); |  | ||||||
|  |  | ||||||
|     Real wlA; |  | ||||||
|     std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); |  | ||||||
|  |  | ||||||
|     pRNG.SeedRandomDevice(); |  | ||||||
|  |  | ||||||
|     LOG(Message) << "Wilson loop calculation beginning" << std::endl; |  | ||||||
|     for(int ic = 0; ic < NCONFIGS; ic++){ |  | ||||||
|         LOG(Message) << "Configuration " << ic <<std::endl; |  | ||||||
|         photon.StochasticField(a, pRNG); |  | ||||||
|  |  | ||||||
|         // Exponentiate photon field |  | ||||||
|         expA = exp(imag_unit*a); |  | ||||||
|  |  | ||||||
|         // Calculate Wilson loops |  | ||||||
|         for(int iw=1; iw<=NWILSON; iw++){ |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlAvg[iw-1] -= 2*log(wlA); |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlTime[iw-1] -= 2*log(wlA); |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlSpace[iw-1] -= 2*log(wlA); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Wilson loop calculation completed" << std::endl; |  | ||||||
|      |  | ||||||
|     // Calculate Wilson loops |  | ||||||
|     for(int iw=1; iw<=10; iw++){ |  | ||||||
|         LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // epilogue |  | ||||||
|     LOG(Message) << "Grid is finalizing now" << std::endl; |  | ||||||
|     Grid_finalize(); |  | ||||||
|      |  | ||||||
|     return EXIT_SUCCESS; |  | ||||||
| } |  | ||||||
| @@ -21,16 +21,3 @@ problem. The test case works with icpc and with clang++, but fails consistently | |||||||
| current variants. | current variants. | ||||||
|  |  | ||||||
| Peter | Peter | ||||||
|  |  | ||||||
|  |  | ||||||
| ************ |  | ||||||
|  |  | ||||||
| Second GCC bug reported, see Issue 100. |  | ||||||
|  |  | ||||||
| https://wandbox.org/permlink/tzssJza6R9XnqANw |  | ||||||
| https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652 |  | ||||||
|  |  | ||||||
| Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the |  | ||||||
| CI test suite. The limitations of Travis runtime limits & weak cores are being shown. |  | ||||||
|  |  | ||||||
| Travis uses 5.4.1 for g++-5. |  | ||||||
|   | |||||||
| @@ -1,86 +0,0 @@ | |||||||
| #! /bin/sh |  | ||||||
|  |  | ||||||
| prefix=@prefix@ |  | ||||||
| exec_prefix=@exec_prefix@ |  | ||||||
| includedir=@includedir@ |  | ||||||
|  |  | ||||||
| usage() |  | ||||||
| { |  | ||||||
|   cat <<EOF |  | ||||||
| Usage: grid-config [OPTION] |  | ||||||
|  |  | ||||||
| Known values for OPTION are: |  | ||||||
|  |  | ||||||
|   --prefix     show Grid installation prefix |  | ||||||
|   --cxxflags   print pre-processor and compiler flags |  | ||||||
|   --ldflags    print library linking flags |  | ||||||
|   --libs       print library linking information |  | ||||||
|   --summary    print full build summary |  | ||||||
|   --help       display this help and exit |  | ||||||
|   --version    output version information |  | ||||||
|   --git        print git revision |  | ||||||
|  |  | ||||||
| EOF |  | ||||||
|    |  | ||||||
|   exit $1 |  | ||||||
| } |  | ||||||
|  |  | ||||||
| if test $# -eq 0; then |  | ||||||
|   usage 1 |  | ||||||
| fi |  | ||||||
|  |  | ||||||
| cflags=false |  | ||||||
| libs=false |  | ||||||
|  |  | ||||||
| while test $# -gt 0; do |  | ||||||
|   case "$1" in |  | ||||||
|     -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; |  | ||||||
|     *) optarg= ;; |  | ||||||
|   esac |  | ||||||
|    |  | ||||||
|   case "$1" in |  | ||||||
|     --prefix) |  | ||||||
|       echo $prefix |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --version) |  | ||||||
|       echo @VERSION@ |  | ||||||
|       exit 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --git) |  | ||||||
|       echo "@GRID_BRANCH@ @GRID_SHA@" |  | ||||||
|       exit 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --help) |  | ||||||
|       usage 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --cxxflags) |  | ||||||
|       echo @GRID_CXXFLAGS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --ldflags) |  | ||||||
|       echo @GRID_LDFLAGS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --libs) |  | ||||||
|       echo @GRID_LIBS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --summary) |  | ||||||
|       echo "" |  | ||||||
|       echo "@GRID_SUMMARY@" |  | ||||||
|       echo "" |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     *) |  | ||||||
|       usage |  | ||||||
|       exit 1 |  | ||||||
|     ;; |  | ||||||
|   esac |  | ||||||
|   shift |  | ||||||
| done |  | ||||||
|  |  | ||||||
| exit 0 |  | ||||||
| @@ -39,17 +39,19 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/algorithms/approx/MultiShiftFunction.h> | #include <Grid/algorithms/approx/MultiShiftFunction.h> | ||||||
| 
 | 
 | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradient.h> | #include <Grid/algorithms/iterative/ConjugateGradient.h> | ||||||
|  | #include <Grid/algorithms/iterative/ConjugateGradientShifted.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateResidual.h> | #include <Grid/algorithms/iterative/ConjugateResidual.h> | ||||||
| #include <Grid/algorithms/iterative/NormalEquations.h> | #include <Grid/algorithms/iterative/NormalEquations.h> | ||||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||||
|  | 
 | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||||
| 
 | 
 | ||||||
| // Lanczos support
 | // Lanczos support
 | ||||||
| //#include <Grid/algorithms/iterative/MatrixUtils.h>
 | #include <Grid/algorithms/iterative/MatrixUtils.h> | ||||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||||
|  | 
 | ||||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | #include <Grid/algorithms/CoarsenedMatrix.h> | ||||||
| #include <Grid/algorithms/FFT.h> |  | ||||||
| 
 | 
 | ||||||
| // Eigen/lanczos
 | // Eigen/lanczos
 | ||||||
| // EigCg
 | // EigCg
 | ||||||
| @@ -1,7 +1,7 @@ | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #include <Grid/GridCore.h> | #include <Grid/Grid.h> | ||||||
| 
 | 
 | ||||||
| namespace Grid { | namespace Grid { | ||||||
| 
 | 
 | ||||||
| @@ -11,12 +11,11 @@ int PointerCache::victim; | |||||||
| 
 | 
 | ||||||
| void *PointerCache::Insert(void *ptr,size_t bytes) { | void *PointerCache::Insert(void *ptr,size_t bytes) { | ||||||
| 
 | 
 | ||||||
|   if (bytes < 4096 ) return ptr; |   if (bytes < 4096 ) return NULL; | ||||||
| 
 | 
 | ||||||
| #ifdef GRID_OMP | #ifdef _OPENMP | ||||||
|   assert(omp_in_parallel()==0); |   assert(omp_in_parallel()==0); | ||||||
| #endif  | #endif  | ||||||
| 
 |  | ||||||
|   void * ret = NULL; |   void * ret = NULL; | ||||||
|   int v = -1; |   int v = -1; | ||||||
| 
 | 
 | ||||||
| @@ -92,34 +92,18 @@ public: | |||||||
|     size_type bytes = __n*sizeof(_Tp); |     size_type bytes = __n*sizeof(_Tp); | ||||||
| 
 | 
 | ||||||
|     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); |     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); | ||||||
|     //    if ( ptr != NULL ) 
 |  | ||||||
|     //      std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl;
 |  | ||||||
|      |      | ||||||
|     //////////////////
 |  | ||||||
|     // Hack 2MB align; could make option probably doesn't need configurability
 |  | ||||||
|     //////////////////
 |  | ||||||
| //define GRID_ALLOC_ALIGN (128)
 |  | ||||||
| #define GRID_ALLOC_ALIGN (2*1024*1024) |  | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); | ||||||
| #else | #else | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); | ||||||
| #endif | #endif | ||||||
|     //    std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl;
 | 
 | ||||||
|     // First touch optimise in threaded loop
 |  | ||||||
|     uint8_t *cp = (uint8_t *)ptr; |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp parallel for |  | ||||||
| #endif |  | ||||||
|     for(size_type n=0;n<bytes;n+=4096){ |  | ||||||
|       cp[n]=0; |  | ||||||
|     } |  | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   void deallocate(pointer __p, size_type __n) {  |   void deallocate(pointer __p, size_type __n) {  | ||||||
|     size_type bytes = __n * sizeof(_Tp); |     size_type bytes = __n * sizeof(_Tp); | ||||||
| 
 |  | ||||||
|     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); |     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); | ||||||
| 
 | 
 | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
| @@ -198,19 +182,10 @@ public: | |||||||
|   pointer allocate(size_type __n, const void* _p= 0)  |   pointer allocate(size_type __n, const void* _p= 0)  | ||||||
|   { |   { | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); |     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); | ||||||
| #else | #else | ||||||
|     _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); |     _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); | ||||||
| #endif | #endif | ||||||
|     size_type bytes = __n*sizeof(_Tp); |  | ||||||
|     uint8_t *cp = (uint8_t *)ptr; |  | ||||||
|     if ( ptr ) {  |  | ||||||
|     // One touch per 4k page, static OMP loop to catch same loop order
 |  | ||||||
| #pragma omp parallel for schedule(static) |  | ||||||
|       for(size_type n=0;n<bytes;n+=4096){ |  | ||||||
| 	cp[n]=0; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|   void deallocate(pointer __p, size_type) {  |   void deallocate(pointer __p, size_type) {  | ||||||
| @@ -42,7 +42,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/cshift/Cshift_mpi.h> | #include <Grid/cshift/Cshift_mpi.h> | ||||||
| #endif  | #endif  | ||||||
| 
 | 
 | ||||||
| #ifdef GRID_COMMS_MPIT | #ifdef GRID_COMMS_MPI3L | ||||||
| #include <Grid/cshift/Cshift_mpi.h> | #include <Grid/cshift/Cshift_mpi.h> | ||||||
| #endif  | #endif  | ||||||
| 
 | 
 | ||||||
| @@ -1,37 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/DisableWarnings.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef DISABLE_WARNINGS_H |  | ||||||
| #define DISABLE_WARNINGS_H |  | ||||||
|  |  | ||||||
|  //disables and intel compiler specific warning (in json.hpp) |  | ||||||
| #pragma warning disable 488   |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
							
								
								
									
										54
									
								
								lib/Grid.h
									
									
									
									
									
								
							
							
						
						
									
										54
									
								
								lib/Grid.h
									
									
									
									
									
								
							| @@ -38,12 +38,52 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_H | #ifndef GRID_H | ||||||
| #define GRID_H | #define GRID_H | ||||||
|  |  | ||||||
| #include <Grid/GridCore.h> | /////////////////// | ||||||
| #include <Grid/GridQCDcore.h> | // Std C++ dependencies | ||||||
| #include <Grid/qcd/action/Action.h> | /////////////////// | ||||||
| #include <Grid/qcd/utils/GaugeFix.h> | #include <cassert> | ||||||
| #include <Grid/qcd/smearing/Smearing.h> | #include <complex> | ||||||
| #include <Grid/parallelIO/MetaData.h> | #include <vector> | ||||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | #include <iostream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <random> | ||||||
|  | #include <functional> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <signal.h> | ||||||
|  | #include <ctime> | ||||||
|  | #include <sys/time.h> | ||||||
|  | #include <chrono> | ||||||
|  |  | ||||||
|  | /////////////////// | ||||||
|  | // Grid headers | ||||||
|  | /////////////////// | ||||||
|  | #include "Config.h" | ||||||
|  | #include <Grid/Timer.h> | ||||||
|  | #include <Grid/PerfCount.h> | ||||||
|  | #include <Grid/Log.h> | ||||||
|  | #include <Grid/AlignedAllocator.h> | ||||||
|  | #include <Grid/Simd.h> | ||||||
|  | #include <Grid/serialisation/Serialisation.h> | ||||||
|  | #include <Grid/Threads.h> | ||||||
|  | #include <Grid/Lexicographic.h> | ||||||
|  | #include <Grid/Init.h> | ||||||
|  | #include <Grid/Communicator.h>  | ||||||
|  | #include <Grid/Cartesian.h>     | ||||||
|  | #include <Grid/Tensors.h>       | ||||||
|  | #include <Grid/Lattice.h>       | ||||||
|  | #include <Grid/Cshift.h>        | ||||||
|  | #include <Grid/Stencil.h>       | ||||||
|  | #include <Grid/Algorithms.h>    | ||||||
|  | #include <Grid/parallelIO/BinaryIO.h> | ||||||
|  | #include <Grid/FFT.h> | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/QCD.h> | ||||||
|  | #include <Grid/parallelIO/NerscIO.h> | ||||||
|  | #include <Grid/qcd/hmc/NerscCheckpointer.h> | ||||||
|  | #include <Grid/qcd/hmc/HmcRunner.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -1,29 +0,0 @@ | |||||||
| #ifndef GRID_STD_H |  | ||||||
| #define GRID_STD_H |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Std C++ dependencies |  | ||||||
| /////////////////// |  | ||||||
| #include <cassert> |  | ||||||
| #include <complex> |  | ||||||
| #include <vector> |  | ||||||
| #include <string> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <random> |  | ||||||
| #include <functional> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <signal.h> |  | ||||||
| #include <ctime> |  | ||||||
| #include <sys/time.h> |  | ||||||
| #include <chrono> |  | ||||||
| #include <zlib.h> |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Grid config |  | ||||||
| /////////////////// |  | ||||||
| #include "Config.h" |  | ||||||
|  |  | ||||||
| #endif /* GRID_STD_H */ |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| #pragma once |  | ||||||
| #if defined __GNUC__ |  | ||||||
| #pragma GCC diagnostic push |  | ||||||
| #pragma GCC diagnostic ignored "-Wdeprecated-declarations" |  | ||||||
| #endif |  | ||||||
| #include <Grid/Eigen/Dense> |  | ||||||
| #if defined __GNUC__ |  | ||||||
| #pragma GCC diagnostic pop |  | ||||||
| #endif |  | ||||||
| @@ -41,15 +41,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <signal.h> | #include <signal.h> | ||||||
| #include <iostream> | #include <iostream> | ||||||
| #include <iterator> | #include <iterator> | ||||||
|  | #include <Grid/Grid.h> | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
| #include <iterator> | #include <iterator> | ||||||
| #include <cstdlib> | #include <cstdlib> | ||||||
| #include <memory> | #include <memory> | ||||||
| 
 | 
 | ||||||
| #include <Grid/Grid.h> |  | ||||||
| 
 |  | ||||||
| #include <Grid/util/CompilerCompatible.h> |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| #include <fenv.h> | #include <fenv.h> | ||||||
| #ifdef __APPLE__ | #ifdef __APPLE__ | ||||||
| @@ -219,68 +216,11 @@ void Grid_init(int *argc,char ***argv) | |||||||
|     int MB; |     int MB; | ||||||
|     arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); |     arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); | ||||||
|     GridCmdOptionInt(arg,MB); |     GridCmdOptionInt(arg,MB); | ||||||
|     uint64_t MB64 = MB; |     CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024; | ||||||
|     CartesianCommunicator::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){ |  | ||||||
|     CartesianCommunicator::Hugepages = 1; |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ |  | ||||||
|     Grid_debug_handler_init(); |  | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   CartesianCommunicator::Init(argc,argv); |   CartesianCommunicator::Init(argc,argv); | ||||||
| 
 | 
 | ||||||
|   if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){ |  | ||||||
|     Grid_quiesce_nodes(); |  | ||||||
|   } else {  |  | ||||||
|     FILE *fp; |  | ||||||
|     std::ostringstream fname; |  | ||||||
|     fname<<"Grid.stdout."; |  | ||||||
|     fname<<CartesianCommunicator::RankWorld(); |  | ||||||
|     fp=freopen(fname.str().c_str(),"w",stdout); |  | ||||||
|     assert(fp!=(FILE *)NULL); |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   ////////////////////////////////////
 |  | ||||||
|   // Banner
 |  | ||||||
|   ////////////////////////////////////
 |  | ||||||
|   if ( CartesianCommunicator::RankWorld() == 0 ) {  |  | ||||||
|     std::cout <<std::endl; |  | ||||||
|     std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;  |  | ||||||
|     std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;  |  | ||||||
|     std::cout  << "__|_ |  |  |  |  |  |  |  |  |  |  |  | _|__"<<std::endl;  |  | ||||||
|     std::cout  << "__|_                                    _|__"<<std::endl;  |  | ||||||
|     std::cout  << "__|_   GGGG    RRRR    III    DDDD      _|__"<<std::endl; |  | ||||||
|     std::cout  << "__|_  G        R   R    I     D   D     _|__"<<std::endl; |  | ||||||
|     std::cout  << "__|_  G        R   R    I     D    D    _|__"<<std::endl; |  | ||||||
|     std::cout  << "__|_  G  GG    RRRR     I     D    D    _|__"<<std::endl; |  | ||||||
|     std::cout  << "__|_  G   G    R  R     I     D   D     _|__"<<std::endl; |  | ||||||
|     std::cout  << "__|_   GGGG    R   R   III    DDDD      _|__"<<std::endl; |  | ||||||
|     std::cout  << "__|_                                    _|__"<<std::endl;  |  | ||||||
|     std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;  |  | ||||||
|     std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;  |  | ||||||
|     std::cout  << "  |  |  |  |  |  |  |  |  |  |  |  |  |  |  "<<std::endl;  |  | ||||||
|     std::cout << std::endl; |  | ||||||
|     std::cout << std::endl; |  | ||||||
|     std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl; |  | ||||||
|     std::cout << std::endl; |  | ||||||
|     std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl; |  | ||||||
|     std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl; |  | ||||||
|     std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl; |  | ||||||
|     std::cout << "(at your option) any later version."<<std::endl; |  | ||||||
|     std::cout << std::endl; |  | ||||||
|     std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl; |  | ||||||
|     std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl; |  | ||||||
|     std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl; |  | ||||||
|     std::cout << "GNU General Public License for more details."<<std::endl; |  | ||||||
|     std::cout << std::endl; |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|   ////////////////////////////////////
 |   ////////////////////////////////////
 | ||||||
|   // Logging
 |   // Logging
 | ||||||
|   ////////////////////////////////////
 |   ////////////////////////////////////
 | ||||||
| @@ -290,6 +230,9 @@ void Grid_init(int *argc,char ***argv) | |||||||
|   GridCmdOptionCSL(defaultLog,logstreams); |   GridCmdOptionCSL(defaultLog,logstreams); | ||||||
|   GridLogConfigure(logstreams); |   GridLogConfigure(logstreams); | ||||||
| 
 | 
 | ||||||
|  |   if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){ | ||||||
|  |     Grid_quiesce_nodes(); | ||||||
|  |   } | ||||||
| 
 | 
 | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){ | ||||||
|     arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log"); |     arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log"); | ||||||
| @@ -305,73 +248,94 @@ void Grid_init(int *argc,char ***argv) | |||||||
|     std::cout<<GridLogMessage<<"  --help : this message"<<std::endl; |     std::cout<<GridLogMessage<<"  --help : this message"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"Geometry:"<<std::endl; |     std::cout<<GridLogMessage<<"Geometry:"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"  --mpi n.n.n.n   : default MPI decomposition"<<std::endl;     |     std::cout<<GridLogMessage<<"  --mpi n.n.n.n   : default MPI decomposition"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --threads n     : default number of OMP threads"<<std::endl; |     std::cout<<GridLogMessage<<"  --threads n     : default number of OMP threads"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"  --grid n.n.n.n  : default Grid size"<<std::endl;     |     std::cout<<GridLogMessage<<"  --grid n.n.n.n  : default Grid size"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --shm  M        : allocate M megabytes of shared memory for comms"<<std::endl;     |     std::cout<<GridLogMessage<<"  --shm  M        : allocate M megabytes of shared memory for comms"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;     |  | ||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl; |     std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<"  --log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"  --log list      : comma separated list from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"  --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;     |     std::cout<<GridLogMessage<<"  --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --debug-signals : catch sigsegv and print a blame report"<<std::endl; |     std::cout<<GridLogMessage<<"  --debug-signals : catch sigsegv and print a blame report"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"  --debug-stdout  : print stdout from EVERY node"<<std::endl;     |     std::cout<<GridLogMessage<<"  --debug-stdout  : print stdout from EVERY node"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --notimestamp   : suppress millisecond resolution stamps"<<std::endl;     |     std::cout<<GridLogMessage<<"  --notimestamp   : suppress millisecond resolution stamps"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"Performance:"<<std::endl; |     std::cout<<GridLogMessage<<"Performance:"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"  --comms-concurrent : Asynchronous MPI calls; several dirs at a time "<<std::endl;     |  | ||||||
|     std::cout<<GridLogMessage<<"  --comms-sequential : Synchronous MPI calls; one dirs at a time "<<std::endl;     |  | ||||||
|     std::cout<<GridLogMessage<<"  --comms-overlap    : Overlap comms with compute "<<std::endl;     |  | ||||||
|     std::cout<<GridLogMessage<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"  --dslash-generic: Wilson kernel for generic Nc"<<std::endl;     |     std::cout<<GridLogMessage<<"  --dslash-generic: Wilson kernel for generic Nc"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;     |     std::cout<<GridLogMessage<<"  --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --dslash-asm    : Wilson kernel for AVX512"<<std::endl;     |     std::cout<<GridLogMessage<<"  --dslash-asm    : Wilson kernel for AVX512"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"  --lebesgue      : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl;     |     std::cout<<GridLogMessage<<"  --lebesgue      : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl;     |     std::cout<<GridLogMessage<<"  --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<std::endl; | ||||||
|     exit(EXIT_SUCCESS); |     exit(EXIT_SUCCESS); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  |   ////////////////////////////////////
 | ||||||
|  |   // Banner
 | ||||||
|  |   ////////////////////////////////////
 | ||||||
|  | 
 | ||||||
|  |   std::string COL_RED    = GridLogColours.colour["RED"]; | ||||||
|  |   std::string COL_PURPLE = GridLogColours.colour["PURPLE"]; | ||||||
|  |   std::string COL_BLACK  = GridLogColours.colour["BLACK"]; | ||||||
|  |   std::string COL_GREEN  = GridLogColours.colour["GREEN"]; | ||||||
|  |   std::string COL_BLUE   = GridLogColours.colour["BLUE"]; | ||||||
|  |   std::string COL_YELLOW = GridLogColours.colour["YELLOW"]; | ||||||
|  |   std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"]; | ||||||
|  |    | ||||||
|  |   std::cout <<std::endl; | ||||||
|  |   std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||||
|  |   std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||||
|  |   std::cout <<COL_RED  << "__|_ |  |  |  "<<             "|  |  | "<<COL_PURPLE<<" |  |  |"<<                "  |  |  | _|__"<<std::endl;  | ||||||
|  |   std::cout <<COL_RED  << "__|_          "<<             "        "<<COL_PURPLE<<"        "<<                "          _|__"<<std::endl;  | ||||||
|  |   std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_PURPLE<<"    _|__"<<std::endl; | ||||||
|  |   std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_PURPLE<<"    _|__"<<std::endl; | ||||||
|  |   std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_PURPLE<<"    _|__"<<std::endl; | ||||||
|  |   std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G  GG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_GREEN <<"    _|__"<<std::endl; | ||||||
|  |   std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G   G   "<<COL_RED<<" R  R   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_GREEN <<"    _|__"<<std::endl; | ||||||
|  |   std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" R   R  "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_GREEN <<"    _|__"<<std::endl; | ||||||
|  |   std::cout <<COL_BLUE << "__|_          "<<             "        "<<COL_GREEN <<"        "<<                "          _|__"<<std::endl;  | ||||||
|  |   std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||||
|  |   std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||||
|  |   std::cout <<COL_BLUE << "  |  |  |  |  "<<             "|  |  | "<<COL_GREEN <<" |  |  |"<<                "  |  |  |  |  "<<std::endl;  | ||||||
|  |   std::cout << std::endl; | ||||||
|  |   std::cout << std::endl; | ||||||
|  |   std::cout <<COL_YELLOW<< std::endl; | ||||||
|  |   std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl; | ||||||
|  |   std::cout << std::endl; | ||||||
|  |   std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl; | ||||||
|  |   std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl; | ||||||
|  |   std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl; | ||||||
|  |   std::cout << "(at your option) any later version."<<std::endl; | ||||||
|  |   std::cout << std::endl; | ||||||
|  |   std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl; | ||||||
|  |   std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl; | ||||||
|  |   std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl; | ||||||
|  |   std::cout << "GNU General Public License for more details."<<std::endl; | ||||||
|  |   std::cout << COL_BACKGROUND <<std::endl; | ||||||
|  |   std::cout << std::endl; | ||||||
|  | 
 | ||||||
|   ////////////////////////////////////
 |   ////////////////////////////////////
 | ||||||
|   // Debug and performance options
 |   // Debug and performance options
 | ||||||
|   ////////////////////////////////////
 |   ////////////////////////////////////
 | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  |   if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ | ||||||
|  |     Grid_debug_handler_init(); | ||||||
|  |   } | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){ | ||||||
|     QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll; |     QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll; | ||||||
|     QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll; |  | ||||||
|   } |   } | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){ | ||||||
|     QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm; |     QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm; | ||||||
|     QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm; |  | ||||||
|   } |   } | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){ | ||||||
|     QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric; |     QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric; | ||||||
|     QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptGeneric; |  | ||||||
|   } |   } | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){ |  | ||||||
|     QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsAndCompute; |  | ||||||
|   } else { |  | ||||||
|     QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; |  | ||||||
|   } |  | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-concurrent") ){ |  | ||||||
|     CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicyConcurrent); |  | ||||||
|   } |  | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){ |  | ||||||
|     CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ | ||||||
|     LebesgueOrder::UseLebesgueOrder=1; |     LebesgueOrder::UseLebesgueOrder=1; | ||||||
|   } |   } | ||||||
|   CartesianCommunicator::nCommThreads = -1; | 
 | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-threads") ){ | 
 | ||||||
|     arg= GridCmdOptionPayload(*argv,*argv+*argc,"--comms-threads"); |  | ||||||
|     GridCmdOptionInt(arg,CartesianCommunicator::nCommThreads); |  | ||||||
|   } |  | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){ | ||||||
|     arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking"); |     arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking"); | ||||||
|     GridCmdOptionIntVector(arg,LebesgueOrder::Block); |     GridCmdOptionIntVector(arg,LebesgueOrder::Block); | ||||||
| @@ -387,12 +351,9 @@ void Grid_init(int *argc,char ***argv) | |||||||
| 		  Grid_default_mpi); | 		  Grid_default_mpi); | ||||||
| 
 | 
 | ||||||
|   std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl; |   std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl; | ||||||
|   if ( CartesianCommunicator::Hugepages) { |  | ||||||
|     std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl; |  | ||||||
|   } |  | ||||||
| 
 | 
 | ||||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){ |   if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){ | ||||||
|     std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; |     std::cout<<GridLogMessage<<"Grid Decomposition\n"; | ||||||
|     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; |     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; |     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; |     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; | ||||||
| @@ -408,36 +369,27 @@ void Grid_init(int *argc,char ***argv) | |||||||
|    |    | ||||||
| void Grid_finalize(void) | void Grid_finalize(void) | ||||||
| { | { | ||||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) | #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)  | ||||||
|   MPI_Finalize(); |   MPI_Finalize(); | ||||||
|   Grid_unquiesce_nodes(); |   Grid_unquiesce_nodes(); | ||||||
| #endif | #endif | ||||||
| #if defined (GRID_COMMS_SHMEM) |  | ||||||
|   shmem_finalize(); |  | ||||||
| #endif |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void GridLogLayout() { |  | ||||||
|     std::cout << GridLogMessage << "Grid Layout\n"; |  | ||||||
|     std::cout << GridLogMessage << "\tGlobal lattice size  : "<< GridCmdVectorIntToString(GridDefaultLatt()) << std::endl; |  | ||||||
|     std::cout << GridLogMessage << "\tOpenMP threads       : "<< GridThread::GetThreads() <<std::endl; |  | ||||||
|     std::cout << GridLogMessage << "\tMPI tasks            : "<< GridCmdVectorIntToString(GridDefaultMpi()) << std::endl; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void * Grid_backtrace_buffer[_NBACKTRACE]; | void * Grid_backtrace_buffer[_NBACKTRACE]; | ||||||
| 
 | 
 | ||||||
| void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr) | void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr) | ||||||
| { | { | ||||||
|   fprintf(stderr,"Caught signal %d\n",si->si_signo); |   printf("Caught signal %d\n",si->si_signo); | ||||||
|   fprintf(stderr,"  mem address %llx\n",(unsigned long long)si->si_addr); |   printf("  mem address %llx\n",(unsigned long long)si->si_addr); | ||||||
|   fprintf(stderr,"         code %d\n",si->si_code); |   printf("         code %d\n",si->si_code); | ||||||
|  | 
 | ||||||
|   // Linux/Posix
 |   // Linux/Posix
 | ||||||
| #ifdef __linux__ | #ifdef __linux__ | ||||||
|   // And x86 64bit
 |   // And x86 64bit
 | ||||||
| #ifdef __x86_64__ | #ifdef __x86_64__ | ||||||
|   ucontext_t * uc= (ucontext_t *)ptr; |   ucontext_t * uc= (ucontext_t *)ptr; | ||||||
|   struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext; |   struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext; | ||||||
|   fprintf(stderr,"  instruction %llx\n",(unsigned long long)sc->rip); |   printf("  instruction %llx\n",(unsigned long long)sc->rip); | ||||||
| #define REG(A)  printf("  %s %lx\n",#A,sc-> A); | #define REG(A)  printf("  %s %lx\n",#A,sc-> A); | ||||||
|   REG(rdi); |   REG(rdi); | ||||||
|   REG(rsi); |   REG(rsi); | ||||||
| @@ -460,11 +412,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr) | |||||||
|   REG(r15); |   REG(r15); | ||||||
| #endif | #endif | ||||||
| #endif | #endif | ||||||
|   fflush(stderr); |   BACKTRACE(); | ||||||
|   BACKTRACEFP(stderr); |  | ||||||
|   fprintf(stderr,"Called backtrace\n"); |  | ||||||
|   fflush(stdout); |  | ||||||
|   fflush(stderr); |  | ||||||
|   exit(0); |   exit(0); | ||||||
|   return; |   return; | ||||||
| }; | }; | ||||||
| @@ -477,12 +425,9 @@ void Grid_debug_handler_init(void) | |||||||
|   sa.sa_flags    = SA_SIGINFO; |   sa.sa_flags    = SA_SIGINFO; | ||||||
|   sigaction(SIGSEGV,&sa,NULL); |   sigaction(SIGSEGV,&sa,NULL); | ||||||
|   sigaction(SIGTRAP,&sa,NULL); |   sigaction(SIGTRAP,&sa,NULL); | ||||||
|   sigaction(SIGBUS,&sa,NULL); |  | ||||||
| 
 | 
 | ||||||
|   feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO); |   feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO); | ||||||
| 
 | 
 | ||||||
|   sigaction(SIGFPE,&sa,NULL); |   sigaction(SIGFPE,&sa,NULL); | ||||||
|   sigaction(SIGKILL,&sa,NULL); |  | ||||||
|   sigaction(SIGILL,&sa,NULL); |  | ||||||
| } | } | ||||||
| } | } | ||||||
| @@ -46,7 +46,6 @@ namespace Grid { | |||||||
|   const int              &GridThreads(void)  ; |   const int              &GridThreads(void)  ; | ||||||
|   void                    GridSetThreads(int t) ; |   void                    GridSetThreads(int t) ; | ||||||
|   void GridLogTimestamp(int); |   void GridLogTimestamp(int); | ||||||
|   void GridLogLayout(); |  | ||||||
| 
 | 
 | ||||||
|   // Common parsing chores
 |   // Common parsing chores
 | ||||||
|   std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option); |   std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option); | ||||||
| @@ -29,11 +29,9 @@ See the full license in the file "LICENSE" in the top level distribution | |||||||
| directory | directory | ||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
| #include <Grid/GridCore.h> | #include <Grid/Grid.h> | ||||||
| #include <Grid/util/CompilerCompatible.h> |  | ||||||
| 
 | 
 | ||||||
| #include <cxxabi.h> | #include <cxxabi.h> | ||||||
| #include <memory> |  | ||||||
| 
 | 
 | ||||||
| namespace Grid { | namespace Grid { | ||||||
| 
 | 
 | ||||||
| @@ -95,7 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | |||||||
| ////////////////////////////////////////////////////////////
 | ////////////////////////////////////////////////////////////
 | ||||||
| void Grid_quiesce_nodes(void) { | void Grid_quiesce_nodes(void) { | ||||||
|   int me = 0; |   int me = 0; | ||||||
| #if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) | #if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L) | ||||||
|   MPI_Comm_rank(MPI_COMM_WORLD, &me); |   MPI_Comm_rank(MPI_COMM_WORLD, &me); | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_SHMEM | #ifdef GRID_COMMS_SHMEM | ||||||
| @@ -110,8 +110,8 @@ public: | |||||||
|   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ |   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ | ||||||
| 
 | 
 | ||||||
|     if ( log.active ) { |     if ( log.active ) { | ||||||
|       stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : "; |       stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : "; | ||||||
|       stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : "; |       stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : "; | ||||||
|       if ( log.timestamp ) { |       if ( log.timestamp ) { | ||||||
| 	StopWatch.Stop(); | 	StopWatch.Stop(); | ||||||
| 	GridTime now = StopWatch.Elapsed(); | 	GridTime now = StopWatch.Elapsed(); | ||||||
| @@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3 | |||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
| if BUILD_COMMS_MPIT | if BUILD_COMMS_MPI3L | ||||||
|   extra_sources+=communicator/Communicator_mpit.cc |   extra_sources+=communicator/Communicator_mpi3_leader.cc | ||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										
											BIN
										
									
								
								lib/Old/Endeavour.tgz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								lib/Old/Endeavour.tgz
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										154
									
								
								lib/Old/Tensor_peek.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								lib/Old/Tensor_peek.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,154 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/Old/Tensor_peek.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef GRID_MATH_PEEK_H | ||||||
|  | #define GRID_MATH_PEEK_H | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | ////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Peek on a specific index; returns a scalar in that index, tensor inherits rest | ||||||
|  | ////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // If we hit the right index, return scalar with no further recursion | ||||||
|  |  | ||||||
|  | //template<int Level> inline ComplexF peekIndex(const ComplexF arg) { return arg;} | ||||||
|  | //template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;} | ||||||
|  | //template<int Level> inline RealF peekIndex(const RealF arg) { return arg;} | ||||||
|  | //template<int Level> inline RealD peekIndex(const RealD arg) { return arg;} | ||||||
|  | #if 0 | ||||||
|  | // Scalar peek, no indices | ||||||
|  | template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iScalar<vtype> &arg) ->  iScalar<vtype>  | ||||||
|  | { | ||||||
|  |   return arg; | ||||||
|  | } | ||||||
|  | // Vector peek, one index | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iVector<vtype,N> &arg,int i) -> iScalar<vtype> // Index matches | ||||||
|  | { | ||||||
|  |   iScalar<vtype> ret;                              // return scalar | ||||||
|  |   ret._internal = arg._internal[i]; | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | // Matrix peek, two indices | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) ->  iScalar<vtype> | ||||||
|  | { | ||||||
|  |   iScalar<vtype> ret;                              // return scalar | ||||||
|  |   ret._internal = arg._internal[i][j]; | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ///////////// | ||||||
|  | // No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue | ||||||
|  | ///////////// | ||||||
|  | // scalar | ||||||
|  | template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iScalar<vtype> &arg) -> iScalar<decltype(peekIndex<Level>(arg._internal))> | ||||||
|  | { | ||||||
|  |   iScalar<decltype(peekIndex<Level>(arg._internal))> ret; | ||||||
|  |   ret._internal= peekIndex<Level>(arg._internal); | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iScalar<vtype> &arg,int i) ->  iScalar<decltype(peekIndex<Level>(arg._internal,i))>  | ||||||
|  | { | ||||||
|  |   iScalar<decltype(peekIndex<Level>(arg._internal,i))> ret; | ||||||
|  |   ret._internal=peekIndex<Level>(arg._internal,i); | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iScalar<vtype> &arg,int i,int j) ->  iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> | ||||||
|  | { | ||||||
|  |   iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> ret; | ||||||
|  |   ret._internal=peekIndex<Level>(arg._internal,i,j); | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | // vector | ||||||
|  | template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  | auto peekIndex(const iVector<vtype,N> &arg) ->   iVector<decltype(peekIndex<Level>(arg._internal[0])),N> | ||||||
|  | { | ||||||
|  |   iVector<decltype(peekIndex<Level>(arg._internal[0])),N> ret; | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |     ret._internal[ii]=peekIndex<Level>(arg._internal[ii]); | ||||||
|  |   } | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iVector<vtype,N> &arg,int i) ->  iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> | ||||||
|  | { | ||||||
|  |   iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret; | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |     ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i); | ||||||
|  |   } | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iVector<vtype,N> &arg,int i,int j) ->  iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N>  | ||||||
|  | { | ||||||
|  |   iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> ret; | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |     ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i,j); | ||||||
|  |   } | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // matrix | ||||||
|  | template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  | auto peekIndex(const iMatrix<vtype,N> &arg) ->   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>  | ||||||
|  | { | ||||||
|  |   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> ret; | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |   for(int jj=0;jj<N;jj++){ | ||||||
|  |     ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb | ||||||
|  |   }} | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iMatrix<vtype,N> &arg,int i) ->   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N> | ||||||
|  | { | ||||||
|  |   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N> ret; | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |   for(int jj=0;jj<N;jj++){ | ||||||
|  |     ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i); | ||||||
|  |   }} | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) ->   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> | ||||||
|  | { | ||||||
|  |   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> ret; | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |   for(int jj=0;jj<N;jj++){ | ||||||
|  |     ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i,j); | ||||||
|  |   }} | ||||||
|  |   return ret; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
							
								
								
									
										127
									
								
								lib/Old/Tensor_poke.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								lib/Old/Tensor_poke.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,127 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/Old/Tensor_poke.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef GRID_MATH_POKE_H | ||||||
|  | #define GRID_MATH_POKE_H | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | ////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Poke a specific index;  | ||||||
|  | ////////////////////////////////////////////////////////////////////////////// | ||||||
|  | #if 0 | ||||||
|  | // Scalar poke | ||||||
|  | template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg) | ||||||
|  | { | ||||||
|  |   ret._internal = arg._internal; | ||||||
|  | } | ||||||
|  | // Vector poke, one index | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i) | ||||||
|  | { | ||||||
|  |   ret._internal[i] = arg._internal; | ||||||
|  | } | ||||||
|  | //Matrix poke, two indices | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j) | ||||||
|  | { | ||||||
|  |   ret._internal[i][j] = arg._internal; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | ///////////// | ||||||
|  | // No match poke for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue | ||||||
|  | ///////////// | ||||||
|  | // scalar | ||||||
|  | template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  | void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))>  &arg) | ||||||
|  | { | ||||||
|  |   pokeIndex<Level>(ret._internal,arg._internal); | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0))> &arg, int i) | ||||||
|  | 		  | ||||||
|  | { | ||||||
|  |   pokeIndex<Level>(ret._internal,arg._internal,i); | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0,0))> &arg,int i,int j) | ||||||
|  | { | ||||||
|  |   pokeIndex<Level>(ret._internal,arg._internal,i,j); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Vector | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iVector<vtype,N> &ret, iVector<decltype(peekIndex<Level>(ret._internal)),N>  &arg) | ||||||
|  | { | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |     pokeIndex<Level>(ret._internal[ii],arg._internal[ii]); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i) | ||||||
|  | { | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |     pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg,int i,int j) | ||||||
|  | { | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |     pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i,j); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Matrix | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal)),N> &arg)		  | ||||||
|  | { | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |   for(int jj=0;jj<N;jj++){ | ||||||
|  |     pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj]); | ||||||
|  |   }} | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i) | ||||||
|  | { | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |   for(int jj=0;jj<N;jj++){ | ||||||
|  |     pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i); | ||||||
|  |   }} | ||||||
|  | } | ||||||
|  | template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline  | ||||||
|  |   void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg, int i,int j) | ||||||
|  | { | ||||||
|  |   for(int ii=0;ii<N;ii++){ | ||||||
|  |   for(int jj=0;jj<N;jj++){ | ||||||
|  |     pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j); | ||||||
|  |   }} | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
| @@ -26,8 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| 
 | 
 | ||||||
| #include <Grid/GridCore.h> | #include <Grid/Grid.h> | ||||||
| #include <Grid/perfmon/PerfCount.h> | #include <Grid/PerfCount.h> | ||||||
| 
 | 
 | ||||||
| namespace Grid { | namespace Grid { | ||||||
| 
 | 
 | ||||||
| @@ -40,7 +40,7 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan | |||||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." , INSTRUCTIONS}, |   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." , INSTRUCTIONS}, | ||||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." , CPUCYCLES   }, |   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." , CPUCYCLES   }, | ||||||
|     // 4
 |     // 4
 | ||||||
| #ifdef KNL | #ifdef AVX512 | ||||||
|     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES    }, |     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES    }, | ||||||
|     { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS  }, |     { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS  }, | ||||||
|     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS    }, |     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS    }, | ||||||
| @@ -172,7 +172,7 @@ public: | |||||||
|     const char * name = PerformanceCounterConfigs[PCT].name; |     const char * name = PerformanceCounterConfigs[PCT].name; | ||||||
|     fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
 |     fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
 | ||||||
|     if (fd == -1) { |     if (fd == -1) { | ||||||
|       fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name); |       fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name); | ||||||
|       perror("Error is"); |       perror("Error is"); | ||||||
|     } |     } | ||||||
|     int norm = PerformanceCounterConfigs[PCT].normalisation; |     int norm = PerformanceCounterConfigs[PCT].normalisation; | ||||||
| @@ -181,7 +181,7 @@ public: | |||||||
|     name = PerformanceCounterConfigs[norm].name; |     name = PerformanceCounterConfigs[norm].name; | ||||||
|     cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
 |     cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
 | ||||||
|     if (cyclefd == -1) { |     if (cyclefd == -1) { | ||||||
|       fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name); |       fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name); | ||||||
|       perror("Error is"); |       perror("Error is"); | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
| @@ -205,14 +205,13 @@ public: | |||||||
|   void Stop(void) { |   void Stop(void) { | ||||||
|     count=0; |     count=0; | ||||||
|     cycles=0; |     cycles=0; | ||||||
|  |     size_t ign; | ||||||
| #ifdef __linux__ | #ifdef __linux__ | ||||||
|     ssize_t ign; |  | ||||||
|     if ( fd!= -1) { |     if ( fd!= -1) { | ||||||
|       ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); |       ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); | ||||||
|       ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); |       ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); | ||||||
|       ign=::read(fd, &count, sizeof(long long)); |       ign=::read(fd, &count, sizeof(long long)); | ||||||
|       ign+=::read(cyclefd, &cycles, sizeof(long long)); |       ign=::read(cyclefd, &cycles, sizeof(long long)); | ||||||
|       assert(ign=2*sizeof(long long)); |  | ||||||
|     } |     } | ||||||
|     elapsed = cyclecount() - begin; |     elapsed = cyclecount() - begin; | ||||||
| #else | #else | ||||||
| @@ -172,8 +172,8 @@ namespace Grid { | |||||||
| 
 | 
 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #include <Grid/simd/Grid_vector_types.h> | #include "simd/Grid_vector_types.h" | ||||||
| #include <Grid/simd/Grid_vector_unops.h> | #include "simd/Grid_vector_unops.h" | ||||||
| 
 | 
 | ||||||
| namespace Grid { | namespace Grid { | ||||||
|   // Default precision
 |   // Default precision
 | ||||||
| @@ -1,9 +1,11 @@ | |||||||
| #include <Grid/GridCore.h> | #include <Grid/Grid.h> | ||||||
| #include <Grid/perfmon/PerfCount.h> | #include <Grid/PerfCount.h> | ||||||
| #include <Grid/perfmon/Stat.h> | #include <Grid/Stat.h> | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| namespace Grid {  | namespace Grid {  | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| bool PmuStat::pmu_initialized=false; | bool PmuStat::pmu_initialized=false; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -37,9 +37,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| 
 | 
 | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
| #include <omp.h> | #include <omp.h> | ||||||
| 
 | #ifdef GRID_NUMA | ||||||
| #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)") | #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)") | ||||||
| #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") | #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") | ||||||
|  | #else | ||||||
|  | #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)") | ||||||
|  | #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") | ||||||
|  | #endif | ||||||
| #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") | #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") | ||||||
| #define PARALLEL_REGION       _Pragma("omp parallel") | #define PARALLEL_REGION       _Pragma("omp parallel") | ||||||
| #define PARALLEL_CRITICAL     _Pragma("omp critical") | #define PARALLEL_CRITICAL     _Pragma("omp critical") | ||||||
| @@ -51,9 +55,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #define PARALLEL_CRITICAL | #define PARALLEL_CRITICAL | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #define parallel_for       PARALLEL_FOR_LOOP for |  | ||||||
| #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for |  | ||||||
| 
 |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| 
 | 
 | ||||||
|   // Introduce a class to gain deterministic bit reproducible reduction.
 |   // Introduce a class to gain deterministic bit reproducible reduction.
 | ||||||
| @@ -267,7 +267,8 @@ namespace Grid { | |||||||
|       SimpleCompressor<siteVector> compressor; |       SimpleCompressor<siteVector> compressor; | ||||||
|       Stencil.HaloExchange(in,compressor); |       Stencil.HaloExchange(in,compressor); | ||||||
|  |  | ||||||
|       parallel_for(int ss=0;ss<Grid()->oSites();ss++){ | PARALLEL_FOR_LOOP | ||||||
|  |       for(int ss=0;ss<Grid()->oSites();ss++){ | ||||||
|         siteVector res = zero; |         siteVector res = zero; | ||||||
| 	siteVector nbr; | 	siteVector nbr; | ||||||
| 	int ptype; | 	int ptype; | ||||||
| @@ -379,7 +380,8 @@ namespace Grid { | |||||||
| 	  Subspace.ProjectToSubspace(oProj,oblock); | 	  Subspace.ProjectToSubspace(oProj,oblock); | ||||||
| 	  //	  blockProject(iProj,iblock,Subspace.subspace); | 	  //	  blockProject(iProj,iblock,Subspace.subspace); | ||||||
| 	  //	  blockProject(oProj,oblock,Subspace.subspace); | 	  //	  blockProject(oProj,oblock,Subspace.subspace); | ||||||
| 	  parallel_for(int ss=0;ss<Grid()->oSites();ss++){ | PARALLEL_FOR_LOOP | ||||||
|  | 	  for(int ss=0;ss<Grid()->oSites();ss++){ | ||||||
| 	    for(int j=0;j<nbasis;j++){ | 	    for(int j=0;j<nbasis;j++){ | ||||||
| 	      if( disp!= 0 ) { | 	      if( disp!= 0 ) { | ||||||
| 		A[p]._odata[ss](j,i) = oProj._odata[ss](j); | 		A[p]._odata[ss](j,i) = oProj._odata[ss](j); | ||||||
| @@ -425,7 +427,7 @@ namespace Grid { | |||||||
| 	A[p]=zero; | 	A[p]=zero; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       GridParallelRNG  RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34})); |       GridParallelRNG  RNG(Grid()); RNG.SeedRandomDevice(); | ||||||
|       Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val); |       Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val); | ||||||
|  |  | ||||||
|       Complex one(1.0); |       Complex one(1.0); | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								lib/algorithms/approx/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/algorithms/approx/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -197,9 +197,8 @@ namespace Grid { | |||||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { |     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||||
|  |  | ||||||
|       GridBase *grid=in._grid; |       GridBase *grid=in._grid; | ||||||
|  |  | ||||||
| //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | ||||||
|       //std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | ||||||
|  |  | ||||||
|       int vol=grid->gSites(); |       int vol=grid->gSites(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -25,7 +25,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | |||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #include <Grid/GridCore.h> | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| double MultiShiftFunction::approx(double x) | double MultiShiftFunction::approx(double x) | ||||||
|   | |||||||
| @@ -16,7 +16,7 @@ | |||||||
| #define INCLUDED_ALG_REMEZ_H | #define INCLUDED_ALG_REMEZ_H | ||||||
|  |  | ||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <Grid/GridStd.h> | #include <Config.h> | ||||||
|  |  | ||||||
| #ifdef HAVE_LIBGMP | #ifdef HAVE_LIBGMP | ||||||
| #include "bigfloat.h" | #include "bigfloat.h" | ||||||
|   | |||||||
| @@ -1,600 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H |  | ||||||
| #define GRID_BLOCK_CONJUGATE_GRADIENT_H |  | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Block conjugate gradient. Dimension zero should be the block direction |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| template <class Field> |  | ||||||
| class BlockConjugateGradient : public OperatorFunction<Field> { |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   typedef typename Field::scalar_type scomplex; |  | ||||||
|  |  | ||||||
|   int blockDim ; |  | ||||||
|   int Nblock; |  | ||||||
|  |  | ||||||
|   BlockCGtype CGtype; |  | ||||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. |  | ||||||
|                            // Defaults true. |  | ||||||
|   RealD Tolerance; |  | ||||||
|   Integer MaxIterations; |  | ||||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion |  | ||||||
|    |  | ||||||
|   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) |  | ||||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) |  | ||||||
|   {}; |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Thin QR factorisation (google it) |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, |  | ||||||
| 		 Eigen::MatrixXcd &C, |  | ||||||
| 		 Eigen::MatrixXcd &Cinv, |  | ||||||
| 		 Field & Q, |  | ||||||
| 		 const Field & R) |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   //Dimensions |  | ||||||
|   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock |  | ||||||
|   // |  | ||||||
|   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) |  | ||||||
|   // |  | ||||||
|   //   Q  C = R => Q = R C^{-1} |  | ||||||
|   // |  | ||||||
|   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  |  | ||||||
|   // |  | ||||||
|   // Set C = L^{dag}, and then Q^dag Q = ident  |  | ||||||
|   // |  | ||||||
|   // Checks: |  | ||||||
|   // Cdag C = Rdag R ; passes. |  | ||||||
|   // QdagQ  = 1      ; passes |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Cholesky from Eigen |  | ||||||
|   // There exists a ldlt that is documented as more stable |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  |  | ||||||
|  |  | ||||||
|   C    = L.adjoint(); |  | ||||||
|   Cinv = C.inverse(); |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Q = R C^{-1} |  | ||||||
|   // |  | ||||||
|   // Q_j  = R_i Cinv(i,j)  |  | ||||||
|   // |  | ||||||
|   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // FIXME:: make a sliceMulMatrix to avoid zero vector |  | ||||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); |  | ||||||
| } |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Call one of several implementations |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  |  | ||||||
| { |  | ||||||
|   if ( CGtype == BlockCGrQ ) { |  | ||||||
|     BlockCGrQsolve(Linop,Src,Psi); |  | ||||||
|   } else if (CGtype == BlockCG ) { |  | ||||||
|     BlockCGsolve(Linop,Src,Psi); |  | ||||||
|   } else if (CGtype == CGmultiRHS ) { |  | ||||||
|     CGmultiRHSsolve(Linop,Src,Psi); |  | ||||||
|   } else { |  | ||||||
|     assert(0); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////// |  | ||||||
| // BlockCGrQ implementation: |  | ||||||
| //-------------------------- |  | ||||||
| // X is guess/Solution |  | ||||||
| // B is RHS |  | ||||||
| // Solve A X_i = B_i    ;        i refers to Nblock index |  | ||||||
| //////////////////////////////////////////////////////////////////////////// |  | ||||||
| void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   Nblock = B._grid->_fdimensions[Orthog]; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |  | ||||||
|  |  | ||||||
|   X.checkerboard = B.checkerboard; |  | ||||||
|   conformable(X, B); |  | ||||||
|  |  | ||||||
|   Field tmp(B); |  | ||||||
|   Field Q(B); |  | ||||||
|   Field D(B); |  | ||||||
|   Field Z(B); |  | ||||||
|   Field AD(B); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   // Initial residual computation & set up |  | ||||||
|   std::vector<RealD> residuals(Nblock); |  | ||||||
|   std::vector<RealD> ssq(Nblock); |  | ||||||
|  |  | ||||||
|   sliceNorm(ssq,B,Orthog); |  | ||||||
|   RealD sssum=0; |  | ||||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,B,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,X,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   /************************************************************************ |  | ||||||
|    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) |  | ||||||
|    ************************************************************************ |  | ||||||
|    * Dimensions: |  | ||||||
|    * |  | ||||||
|    *   X,B==(Nferm x Nblock) |  | ||||||
|    *   A==(Nferm x Nferm) |  | ||||||
|    *   |  | ||||||
|    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site |  | ||||||
|    *  |  | ||||||
|    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) |  | ||||||
|    * for k:  |  | ||||||
|    *   Z  = AD |  | ||||||
|    *   M  = [D^dag Z]^{-1} |  | ||||||
|    *   X  = X + D MC |  | ||||||
|    *   QS = Q - ZM |  | ||||||
|    *   D  = Q + D S^dag |  | ||||||
|    *   C  = S C |  | ||||||
|    */ |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   // Initial block: initial search dir is guess |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; |  | ||||||
|  |  | ||||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) |  | ||||||
|  |  | ||||||
|   Linop.HermOp(X, AD); |  | ||||||
|   tmp = B - AD;   |  | ||||||
|   //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; |  | ||||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); |  | ||||||
|   //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_C " << m_C<<std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl; |  | ||||||
|   D=Q; |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   // Timers |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   GridStopWatch sliceInnerTimer; |  | ||||||
|   GridStopWatch sliceMaddTimer; |  | ||||||
|   GridStopWatch QRTimer; |  | ||||||
|   GridStopWatch MatrixTimer; |  | ||||||
|   GridStopWatch SolverTimer; |  | ||||||
|   SolverTimer.Start(); |  | ||||||
|  |  | ||||||
|   int k; |  | ||||||
|   for (k = 1; k <= MaxIterations; k++) { |  | ||||||
|  |  | ||||||
|     //3. Z  = AD |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     Linop.HermOp(D, Z);       |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|     //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl; |  | ||||||
|  |  | ||||||
|     //4. M  = [D^dag Z]^{-1} |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     m_M       = m_DZ.inverse(); |  | ||||||
|     //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl; |  | ||||||
|      |  | ||||||
|     //5. X  = X + D MC |  | ||||||
|     m_tmp     = m_M * m_C; |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     //6. QS = Q - ZM |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|     QRTimer.Start(); |  | ||||||
|     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); |  | ||||||
|     QRTimer.Stop(); |  | ||||||
|      |  | ||||||
|     //7. D  = Q + D S^dag |  | ||||||
|     m_tmp = m_S.adjoint(); |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     //8. C  = S C |  | ||||||
|     m_C = m_S*m_C; |  | ||||||
|      |  | ||||||
|     /********************* |  | ||||||
|      * convergence monitor |  | ||||||
|      ********************* |  | ||||||
|      */ |  | ||||||
|     m_rr = m_C.adjoint() * m_C; |  | ||||||
|  |  | ||||||
|     RealD max_resid=0; |  | ||||||
|     RealD rrsum=0; |  | ||||||
|     RealD rr; |  | ||||||
|  |  | ||||||
|     for(int b=0;b<Nblock;b++) { |  | ||||||
|       rrsum+=real(m_rr(b,b)); |  | ||||||
|       rr = real(m_rr(b,b))/ssq[b]; |  | ||||||
|       if ( rr > max_resid ) max_resid = rr; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum |  | ||||||
| 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; |  | ||||||
|  |  | ||||||
|     if ( max_resid < Tolerance*Tolerance ) {  |  | ||||||
|  |  | ||||||
|       SolverTimer.Stop(); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; |  | ||||||
|  |  | ||||||
|       for(int b=0;b<Nblock;b++){ |  | ||||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " |  | ||||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |  | ||||||
|  |  | ||||||
|       Linop.HermOp(X, AD); |  | ||||||
|       AD = AD-B; |  | ||||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; |  | ||||||
| 	     |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; |  | ||||||
|  |  | ||||||
|   if (ErrorOnNoConverge) assert(0); |  | ||||||
|   IterationsToComplete = k; |  | ||||||
| } |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |  | ||||||
|  |  | ||||||
|   Psi.checkerboard = Src.checkerboard; |  | ||||||
|   conformable(Psi, Src); |  | ||||||
|  |  | ||||||
|   Field P(Src); |  | ||||||
|   Field AP(Src); |  | ||||||
|   Field R(Src); |  | ||||||
|    |  | ||||||
|   Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   // Initial residual computation & set up |  | ||||||
|   std::vector<RealD> residuals(Nblock); |  | ||||||
|   std::vector<RealD> ssq(Nblock); |  | ||||||
|  |  | ||||||
|   sliceNorm(ssq,Src,Orthog); |  | ||||||
|   RealD sssum=0; |  | ||||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Src,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Psi,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   // Initial search dir is guess |  | ||||||
|   Linop.HermOp(Psi, AP); |  | ||||||
|    |  | ||||||
|  |  | ||||||
|   /************************************************************************ |  | ||||||
|    * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) |  | ||||||
|    ************************************************************************ |  | ||||||
|    * O'Leary : R = B - A X |  | ||||||
|    * O'Leary : P = M R ; preconditioner M = 1 |  | ||||||
|    * O'Leary : alpha = PAP^{-1} RMR |  | ||||||
|    * O'Leary : beta  = RMR^{-1}_old RMR_new |  | ||||||
|    * O'Leary : X=X+Palpha |  | ||||||
|    * O'Leary : R_new=R_old-AP alpha |  | ||||||
|    * O'Leary : P=MR_new+P beta |  | ||||||
|    */ |  | ||||||
|  |  | ||||||
|   R = Src - AP;   |  | ||||||
|   P = R; |  | ||||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); |  | ||||||
|  |  | ||||||
|   GridStopWatch sliceInnerTimer; |  | ||||||
|   GridStopWatch sliceMaddTimer; |  | ||||||
|   GridStopWatch MatrixTimer; |  | ||||||
|   GridStopWatch SolverTimer; |  | ||||||
|   SolverTimer.Start(); |  | ||||||
|  |  | ||||||
|   int k; |  | ||||||
|   for (k = 1; k <= MaxIterations; k++) { |  | ||||||
|  |  | ||||||
|     RealD rrsum=0; |  | ||||||
|     for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b)); |  | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum |  | ||||||
| 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; |  | ||||||
|  |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     Linop.HermOp(P, AP); |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Alpha |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductMatrix(m_pAp,P,AP,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     m_pAp_inv = m_pAp.inverse(); |  | ||||||
|     m_alpha   = m_pAp_inv * m_rr ; |  | ||||||
|  |  | ||||||
|     // Psi, R update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi |  | ||||||
|     sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Beta |  | ||||||
|     m_rr_inv = m_rr.inverse(); |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductMatrix(m_rr,R,R,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     m_beta = m_rr_inv *m_rr; |  | ||||||
|  |  | ||||||
|     // Search update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(AP,m_beta,P,R,Orthog); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|     P= AP; |  | ||||||
|  |  | ||||||
|     /********************* |  | ||||||
|      * convergence monitor |  | ||||||
|      ********************* |  | ||||||
|      */ |  | ||||||
|     RealD max_resid=0; |  | ||||||
|     RealD rr; |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       rr = real(m_rr(b,b))/ssq[b]; |  | ||||||
|       if ( rr > max_resid ) max_resid = rr; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     if ( max_resid < Tolerance*Tolerance ) {  |  | ||||||
|  |  | ||||||
|       SolverTimer.Stop(); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; |  | ||||||
|       for(int b=0;b<Nblock;b++){ |  | ||||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " |  | ||||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |  | ||||||
|  |  | ||||||
|       Linop.HermOp(Psi, AP); |  | ||||||
|       AP = AP-Src; |  | ||||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; |  | ||||||
| 	     |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl; |  | ||||||
|  |  | ||||||
|   if (ErrorOnNoConverge) assert(0); |  | ||||||
|   IterationsToComplete = k; |  | ||||||
| } |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction |  | ||||||
| // Use this for spread out across nodes |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim |  | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |  | ||||||
|  |  | ||||||
|   Psi.checkerboard = Src.checkerboard; |  | ||||||
|   conformable(Psi, Src); |  | ||||||
|  |  | ||||||
|   Field P(Src); |  | ||||||
|   Field AP(Src); |  | ||||||
|   Field R(Src); |  | ||||||
|    |  | ||||||
|   std::vector<ComplexD> v_pAp(Nblock); |  | ||||||
|   std::vector<RealD> v_rr (Nblock); |  | ||||||
|   std::vector<RealD> v_rr_inv(Nblock); |  | ||||||
|   std::vector<RealD> v_alpha(Nblock); |  | ||||||
|   std::vector<RealD> v_beta(Nblock); |  | ||||||
|  |  | ||||||
|   // Initial residual computation & set up |  | ||||||
|   std::vector<RealD> residuals(Nblock); |  | ||||||
|   std::vector<RealD> ssq(Nblock); |  | ||||||
|  |  | ||||||
|   sliceNorm(ssq,Src,Orthog); |  | ||||||
|   RealD sssum=0; |  | ||||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Src,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Psi,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   // Initial search dir is guess |  | ||||||
|   Linop.HermOp(Psi, AP); |  | ||||||
|  |  | ||||||
|   R = Src - AP;   |  | ||||||
|   P = R; |  | ||||||
|   sliceNorm(v_rr,R,Orthog); |  | ||||||
|  |  | ||||||
|   GridStopWatch sliceInnerTimer; |  | ||||||
|   GridStopWatch sliceMaddTimer; |  | ||||||
|   GridStopWatch sliceNormTimer; |  | ||||||
|   GridStopWatch MatrixTimer; |  | ||||||
|   GridStopWatch SolverTimer; |  | ||||||
|  |  | ||||||
|   SolverTimer.Start(); |  | ||||||
|   int k; |  | ||||||
|   for (k = 1; k <= MaxIterations; k++) { |  | ||||||
|  |  | ||||||
|     RealD rrsum=0; |  | ||||||
|     for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]); |  | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum |  | ||||||
| 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; |  | ||||||
|  |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     Linop.HermOp(P, AP); |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Alpha |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductVector(v_pAp,P,AP,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       v_alpha[b] = v_rr[b]/real(v_pAp[b]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Psi, R update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddVector(Psi,v_alpha, P,Psi,Orthog);     // add alpha *  P to psi |  | ||||||
|     sliceMaddVector(R  ,v_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Beta |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       v_rr_inv[b] = 1.0/v_rr[b]; |  | ||||||
|     } |  | ||||||
|     sliceNormTimer.Start(); |  | ||||||
|     sliceNorm(v_rr,R,Orthog); |  | ||||||
|     sliceNormTimer.Stop(); |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       v_beta[b] = v_rr_inv[b] *v_rr[b]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Search update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddVector(P,v_beta,P,R,Orthog); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     /********************* |  | ||||||
|      * convergence monitor |  | ||||||
|      ********************* |  | ||||||
|      */ |  | ||||||
|     RealD max_resid=0; |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       RealD rr = v_rr[b]/ssq[b]; |  | ||||||
|       if ( rr > max_resid ) max_resid = rr; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     if ( max_resid < Tolerance*Tolerance ) {  |  | ||||||
|  |  | ||||||
|       SolverTimer.Stop(); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; |  | ||||||
|       for(int b=0;b<Nblock;b++){ |  | ||||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |  | ||||||
|  |  | ||||||
|       Linop.HermOp(Psi, AP); |  | ||||||
|       AP = AP-Src; |  | ||||||
|       std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tNorm       " << sliceNormTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl; |  | ||||||
|  |  | ||||||
|   if (ErrorOnNoConverge) assert(0); |  | ||||||
|   IterationsToComplete = k; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user