mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-24 17:54:47 +01:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			dirac-ITT-
			...
			feature/ha
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | e57eafe388 | 
							
								
								
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -92,7 +92,6 @@ build*/* | |||||||
| ##################### | ##################### | ||||||
| *.xcodeproj/* | *.xcodeproj/* | ||||||
| build.sh | build.sh | ||||||
| .vscode |  | ||||||
|  |  | ||||||
| # Eigen source # | # Eigen source # | ||||||
| ################ | ################ | ||||||
| @@ -107,10 +106,6 @@ lib/fftw/* | |||||||
| m4/lt* | m4/lt* | ||||||
| m4/libtool.m4 | m4/libtool.m4 | ||||||
|  |  | ||||||
| # github pages # |  | ||||||
| ################ |  | ||||||
| gh-pages/ |  | ||||||
|  |  | ||||||
| # Buck files # | # Buck files # | ||||||
| ############## | ############## | ||||||
| .buck* | .buck* | ||||||
| @@ -121,5 +116,4 @@ make-bin-BUCK.sh | |||||||
| # generated sources # | # generated sources # | ||||||
| ##################### | ##################### | ||||||
| lib/qcd/spin/gamma-gen/*.h | lib/qcd/spin/gamma-gen/*.h | ||||||
| lib/qcd/spin/gamma-gen/*.cc | lib/qcd/spin/gamma-gen/*.cc | ||||||
|  |  | ||||||
							
								
								
									
										67
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										67
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -9,6 +9,62 @@ matrix: | |||||||
|     - os:        osx |     - os:        osx | ||||||
|       osx_image: xcode8.3 |       osx_image: xcode8.3 | ||||||
|       compiler: clang |       compiler: clang | ||||||
|  |     - compiler: gcc | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.9 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: VERSION=-4.9 | ||||||
|  |     - compiler: gcc | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-5 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: VERSION=-5 | ||||||
|  |     - compiler: clang | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.8 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||||
|  |     - compiler: clang | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.8 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||||
|        |        | ||||||
| before_install: | before_install: | ||||||
|     - export GRIDDIR=`pwd` |     - export GRIDDIR=`pwd` | ||||||
| @@ -22,10 +78,6 @@ install: | |||||||
|     - export CC=$CC$VERSION |     - export CC=$CC$VERSION | ||||||
|     - export CXX=$CXX$VERSION |     - export CXX=$CXX$VERSION | ||||||
|     - echo $PATH |     - echo $PATH | ||||||
|     - which autoconf |  | ||||||
|     - autoconf  --version |  | ||||||
|     - which automake |  | ||||||
|     - automake  --version |  | ||||||
|     - which $CC |     - which $CC | ||||||
|     - $CC  --version |     - $CC  --version | ||||||
|     - which $CXX |     - which $CXX | ||||||
| @@ -43,4 +95,9 @@ script: | |||||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4 |     - make -j4 | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals |     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||||
|     - make check |     - echo make clean | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							| @@ -3,15 +3,10 @@ SUBDIRS = lib benchmarks tests extras | |||||||
|  |  | ||||||
| include $(top_srcdir)/doxygen.inc | include $(top_srcdir)/doxygen.inc | ||||||
|  |  | ||||||
| bin_SCRIPTS=grid-config | tests: all | ||||||
|  | 	$(MAKE) -C tests tests | ||||||
|  |  | ||||||
|  | .PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) | ||||||
| .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) |  | ||||||
|  |  | ||||||
| tests-local: all |  | ||||||
| bench-local: all |  | ||||||
| check-local: all |  | ||||||
|  |  | ||||||
| AM_CXXFLAGS += -I$(top_builddir)/include | AM_CXXFLAGS += -I$(top_builddir)/include | ||||||
|  |  | ||||||
| ACLOCAL_AMFLAGS = -I m4 | ACLOCAL_AMFLAGS = -I m4 | ||||||
|   | |||||||
							
								
								
									
										302
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										302
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,13 +1,41 @@ | |||||||
| # Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid) | # Grid | ||||||
|  | <table> | ||||||
|  | <tr> | ||||||
|  |     <td>Last stable release</td> | ||||||
|  |     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||||
|  |     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a> | ||||||
|  |     </td> | ||||||
|  | </tr> | ||||||
|  | <tr> | ||||||
|  |     <td>Development branch</td> | ||||||
|  |     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||||
|  |     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a> | ||||||
|  |     </td> | ||||||
|  | </tr> | ||||||
|  | </table> | ||||||
|  |  | ||||||
| **Data parallel C++ mathematical object library.** | **Data parallel C++ mathematical object library.** | ||||||
|  |  | ||||||
| License: GPL v2. | License: GPL v2. | ||||||
|  |  | ||||||
| Last update June 2017. | Last update Nov 2016. | ||||||
|  |  | ||||||
| _Please do not send pull requests to the `master` branch which is reserved for releases._ | _Please do not send pull requests to the `master` branch which is reserved for releases._ | ||||||
|  |  | ||||||
|  | ### Bug report | ||||||
|  |  | ||||||
|  | _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | ||||||
|  |  | ||||||
|  | When you file an issue, please go though the following checklist: | ||||||
|  |  | ||||||
|  | 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  | ||||||
|  | 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | ||||||
|  | 3. Give the exact `configure` command used. | ||||||
|  | 4. Attach `config.log`. | ||||||
|  | 5. Attach `config.summary`. | ||||||
|  | 6. Attach the output of `make V=1`. | ||||||
|  | 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Description | ### Description | ||||||
| @@ -30,68 +58,13 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi | |||||||
| for most programmers. | for most programmers. | ||||||
|  |  | ||||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||||
| Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. | Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). | ||||||
|  |  | ||||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.  | These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. | ||||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||||
|  |  | ||||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | MPI, OpenMP, and SIMD parallelism are present in the library. | ||||||
| Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. | Please see https://arxiv.org/abs/1512.03487 for more detail. | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Compilers |  | ||||||
|  |  | ||||||
| Intel ICPC v16.0.3 and later |  | ||||||
|  |  | ||||||
| Clang v3.5 and later (need 3.8 and later for OpenMP) |  | ||||||
|  |  | ||||||
| GCC   v4.9.x (recommended) |  | ||||||
|  |  | ||||||
| GCC   v6.3 and later |  | ||||||
|  |  | ||||||
| ### Important:  |  | ||||||
|  |  | ||||||
| Some versions of GCC appear to have a bug under high optimisation (-O2, -O3). |  | ||||||
|  |  | ||||||
| The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates. |  | ||||||
|  |  | ||||||
| GCC   v5.x |  | ||||||
|  |  | ||||||
| GCC   v6.1, v6.2 |  | ||||||
|  |  | ||||||
| ### Bug report |  | ||||||
|  |  | ||||||
| _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ |  | ||||||
|  |  | ||||||
| When you file an issue, please go though the following checklist: |  | ||||||
|  |  | ||||||
| 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  |  | ||||||
| 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. |  | ||||||
| 3. Give the exact `configure` command used. |  | ||||||
| 4. Attach `config.log`. |  | ||||||
| 5. Attach `grid.config.summary`. |  | ||||||
| 6. Attach the output of `make V=1`. |  | ||||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. |  | ||||||
|  |  | ||||||
| ### Required libraries |  | ||||||
| Grid requires: |  | ||||||
|  |  | ||||||
| [GMP](https://gmplib.org/),  |  | ||||||
|  |  | ||||||
| [MPFR](http://www.mpfr.org/)  |  | ||||||
|  |  | ||||||
| Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. |  | ||||||
|  |  | ||||||
| Grid optionally uses: |  | ||||||
|  |  | ||||||
| [HDF5](https://support.hdfgroup.org/HDF5/)   |  | ||||||
|  |  | ||||||
| [LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.  |  | ||||||
|  |  | ||||||
| [FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. |  | ||||||
|  |  | ||||||
| LAPACK either generic version or Intel MKL library. |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Quick start | ### Quick start | ||||||
| First, start by cloning the repository: | First, start by cloning the repository: | ||||||
| @@ -122,10 +95,10 @@ install Grid. Other options are detailed in the next section, you can also use ` | |||||||
| `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | ||||||
| customise the build. | customise the build. | ||||||
|  |  | ||||||
| Finally, you can build, check, and install Grid: | Finally, you can build and install Grid: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| make; make check; make install | make; make install | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | ||||||
| @@ -148,7 +121,7 @@ If you want to build all the tests at once just use `make tests`. | |||||||
| - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | ||||||
| - `--enable-precision={single|double}`: set the default precision (default: `double`). | - `--enable-precision={single|double}`: set the default precision (default: `double`). | ||||||
| - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | ||||||
| - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). | - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). | ||||||
| - `--disable-timers`: disable system dependent high-resolution timers. | - `--disable-timers`: disable system dependent high-resolution timers. | ||||||
| - `--enable-chroma`: enable Chroma regression tests. | - `--enable-chroma`: enable Chroma regression tests. | ||||||
| - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | ||||||
| @@ -162,6 +135,7 @@ The following options can be use with the `--enable-comms=` option to target dif | |||||||
| | `none`         | no communications                                             | | | `none`         | no communications                                             | | ||||||
| | `mpi[-auto]`   | MPI communications                                            | | | `mpi[-auto]`   | MPI communications                                            | | ||||||
| | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | ||||||
|  | | `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | | ||||||
| | `shmem `       | Cray SHMEM communications                                     | | | `shmem `       | Cray SHMEM communications                                     | | ||||||
|  |  | ||||||
| For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | ||||||
| @@ -179,13 +153,13 @@ The following options can be use with the `--enable-simd=` option to target diff | |||||||
| | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | ||||||
| | `AVX2`      | AVX 2 (256 bit)                        | | | `AVX2`      | AVX 2 (256 bit)                        | | ||||||
| | `AVX512`    | AVX 512 bit                            | | | `AVX512`    | AVX 512 bit                            | | ||||||
| | `NEONv8`    | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit)                     | | | `QPX`       | QPX (256 bit)                          | | ||||||
| | `QPX`       | IBM QPX (256 bit)                      | |  | ||||||
|  |  | ||||||
| Alternatively, some CPU codenames can be directly used: | Alternatively, some CPU codenames can be directly used: | ||||||
|  |  | ||||||
| | `<code>`    | Description                            | | | `<code>`    | Description                            | | ||||||
| | ----------- | -------------------------------------- | | | ----------- | -------------------------------------- | | ||||||
|  | | `KNC`       | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | | ||||||
| | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | ||||||
| | `BGQ`       | Blue Gene/Q                            | | | `BGQ`       | Blue Gene/Q                            | | ||||||
|  |  | ||||||
| @@ -202,205 +176,21 @@ The following configuration is recommended for the Intel Knights Landing platfor | |||||||
| ``` bash | ``` bash | ||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi-auto  \ |              --enable-comms=mpi-auto \ | ||||||
|  |              --with-gmp=<path>        \ | ||||||
|  |              --with-mpfr=<path>       \ | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=icpc MPICXX=mpiicpc |              CXX=icpc MPICXX=mpiicpc | ||||||
| ``` | ``` | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi       \ |              --enable-comms=mpi       \ | ||||||
|  |              --with-gmp=<path>        \ | ||||||
|  |              --with-mpfr=<path>       \ | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=CC CC=cc |              CXX=CC CC=cc | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| Knight's Landing with Intel Omnipath adapters with two adapters per node  |  | ||||||
| presently performs better with use of more than one rank per node, using shared memory  |  | ||||||
| for interior communication. This is the mpi3 communications implementation.  |  | ||||||
| We recommend four ranks per node for best performance, but optimum is local volume dependent. |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=KNL        \ |  | ||||||
|              --enable-comms=mpi3-auto \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CC=icpc MPICXX=mpiicpc  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| ### Build setup for Intel Haswell Xeon platform |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the Intel Haswell platform: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3-auto \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=icpc MPICXX=mpiicpc |  | ||||||
| ``` |  | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=CC CC=cc |  | ||||||
| ``` |  | ||||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  |  | ||||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using |  | ||||||
| ``` |  | ||||||
|         export I_MPI_PIN=1 |  | ||||||
| ``` |  | ||||||
| This is the default. |  | ||||||
|  |  | ||||||
| ### Build setup for Intel Skylake Xeon platform |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the Intel Skylake platform: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX512     \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=mpiicpc |  | ||||||
| ``` |  | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX512     \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=CC CC=cc |  | ||||||
| ``` |  | ||||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  |  | ||||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using |  | ||||||
| ```  |  | ||||||
|         export I_MPI_PIN=1 |  | ||||||
| ``` |  | ||||||
| This is the default.  |  | ||||||
|  |  | ||||||
| #### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):  |  | ||||||
|  |  | ||||||
| mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18  |  | ||||||
|  |  | ||||||
| TBA |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Build setup for AMD EPYC / RYZEN |  | ||||||
|  |  | ||||||
| The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. |  | ||||||
| So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total |  | ||||||
| are common. Each chip within the module exposes a separate NUMA domain. |  | ||||||
| There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. |  | ||||||
| MPI-3 is recommended with the use of four ranks per socket, |  | ||||||
| and 8 threads per rank.  |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the AMD EPYC platform. |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3 \ |  | ||||||
|              CXX=mpicxx  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. |  | ||||||
| This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.  |  | ||||||
|  |  | ||||||
| It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and |  | ||||||
| shared memory to communicate within this node: |  | ||||||
|  |  | ||||||
| mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4  |  | ||||||
|  |  | ||||||
| Where omp_bind.sh does the following: |  | ||||||
| ``` |  | ||||||
| #!/bin/bash |  | ||||||
|  |  | ||||||
| numanode=` expr $PMI_RANK % 8 ` |  | ||||||
| basecore=`expr $numanode \* 16` |  | ||||||
| core0=`expr $basecore + 0 ` |  | ||||||
| core1=`expr $basecore + 2 ` |  | ||||||
| core2=`expr $basecore + 4 ` |  | ||||||
| core3=`expr $basecore + 6 ` |  | ||||||
| core4=`expr $basecore + 8 ` |  | ||||||
| core5=`expr $basecore + 10 ` |  | ||||||
| core6=`expr $basecore + 12 ` |  | ||||||
| core7=`expr $basecore + 14 ` |  | ||||||
|  |  | ||||||
| export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" |  | ||||||
| echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY |  | ||||||
|  |  | ||||||
| $@ |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| Performance: |  | ||||||
|  |  | ||||||
| #### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):  |  | ||||||
|  |  | ||||||
| mpirun  -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 |  | ||||||
|  |  | ||||||
| TBA |  | ||||||
|  |  | ||||||
| ### Build setup for BlueGene/Q |  | ||||||
|  |  | ||||||
| To be written... |  | ||||||
|  |  | ||||||
| ### Build setup for ARM Neon |  | ||||||
|  |  | ||||||
| To be written... |  | ||||||
|  |  | ||||||
| ### Build setup for laptops, other compilers, non-cluster builds |  | ||||||
|  |  | ||||||
| Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), |  | ||||||
| and omit the enable-mkl flag.  |  | ||||||
|  |  | ||||||
| Single node builds are enabled with  |  | ||||||
| ``` |  | ||||||
|             --enable-comms=none |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| FFTW support that is not in the default search path may then enabled with |  | ||||||
| ``` |  | ||||||
|     --with-fftw=<installpath> |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. |  | ||||||
|  |  | ||||||
							
								
								
									
										33
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,32 +1,23 @@ | |||||||
| TODO: | TODO: | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
| Large item work list: | Peter's work list: | ||||||
|  | 2)- Precision conversion and sort out localConvert      <--  | ||||||
| 1)- BG/Q port and check | 3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started  | ||||||
| 2)- Christoph's local basis expansion Lanczos | 4)- Binary I/O speed up & x-strips | ||||||
| 3)- Precision conversion and sort out localConvert      <-- partial | -- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet | ||||||
|  | -- Physical propagator interface | ||||||
|   - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet | -- Conserved currents | ||||||
| 4)- Physical propagator interface | -- GaugeFix into central location | ||||||
| 5)- Conserved currents | -- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||||
| 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations | -- HDCR resume | ||||||
| 7)- HDCR resume |  | ||||||
|  |  | ||||||
| Recent DONE  | Recent DONE  | ||||||
|  |  | ||||||
| -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O.  <--- DONE |  | ||||||
| -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE |  | ||||||
| -- GaugeFix into central location                      <-- DONE |  | ||||||
| -- Scidac and Ildg metadata handling                   <-- DONE |  | ||||||
| -- Binary I/O MPI2 IO                                  <-- DONE |  | ||||||
| -- Binary I/O speed up & x-strips                      <-- DONE |  | ||||||
| -- Cut down the exterior overhead                      <-- DONE | -- Cut down the exterior overhead                      <-- DONE | ||||||
| -- Interior legs from SHM comms                        <-- DONE | -- Interior legs from SHM comms                        <-- DONE | ||||||
| -- Half-precision comms                                <-- DONE | -- Half-precision comms                                <-- DONE | ||||||
| -- Merge high precision reduction into develop         <-- DONE | -- Merge high precision reduction into develop         | ||||||
| -- BlockCG, BCGrQ                                      <-- DONE | -- multiRHS DWF; benchmark on Cori/BNL for comms elimination | ||||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE |  | ||||||
|    -- slice* linalg routines for multiRHS, BlockCG     |    -- slice* linalg routines for multiRHS, BlockCG     | ||||||
|  |  | ||||||
| ----- | ----- | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								VERSION
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								VERSION
									
									
									
									
									
								
							| @@ -1,5 +1,6 @@ | |||||||
| Version : 0.7.0 | Version : 0.6.0 | ||||||
|  |  | ||||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended | - AVX512, AVX2, AVX, SSE good | ||||||
| - MPI and MPI3 comms optimisations for KNL and OPA finished | - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above | ||||||
| - Half precision comms | - MPI and MPI3 | ||||||
|  | - HiRep, Smearing, Generic gauge group | ||||||
|   | |||||||
| @@ -1,800 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_memory_bandwidth.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Grid::QCD; |  | ||||||
|  |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| std::vector<int> L_list; |  | ||||||
| std::vector<int> Ls_list; |  | ||||||
| std::vector<double> mflop_list; |  | ||||||
|  |  | ||||||
| double mflop_ref; |  | ||||||
| double mflop_ref_err; |  | ||||||
|  |  | ||||||
| int NN_global; |  | ||||||
|  |  | ||||||
| struct time_statistics{ |  | ||||||
|   double mean; |  | ||||||
|   double err; |  | ||||||
|   double min; |  | ||||||
|   double max; |  | ||||||
|  |  | ||||||
|   void statistics(std::vector<double> v){ |  | ||||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); |  | ||||||
|       mean = sum / v.size(); |  | ||||||
|  |  | ||||||
|       std::vector<double> diff(v.size()); |  | ||||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); |  | ||||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); |  | ||||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); |  | ||||||
|  |  | ||||||
|       auto result = std::minmax_element(v.begin(), v.end()); |  | ||||||
|       min = *result.first; |  | ||||||
|       max = *result.second; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| void comms_header(){ |  | ||||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" |  | ||||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| Gamma::Algebra Gmu [] = { |  | ||||||
|   Gamma::Algebra::GammaX, |  | ||||||
|   Gamma::Algebra::GammaY, |  | ||||||
|   Gamma::Algebra::GammaZ, |  | ||||||
|   Gamma::Algebra::GammaT |  | ||||||
| }; |  | ||||||
| struct controls { |  | ||||||
|   int Opt; |  | ||||||
|   int CommsOverlap; |  | ||||||
|   Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; |  | ||||||
|   //  int HugePages; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class Benchmark { |  | ||||||
| public: |  | ||||||
|   static void Decomposition (void ) { |  | ||||||
|  |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; |  | ||||||
|     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvReal          : "<<sizeof(vReal )*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvRealD         : "<<sizeof(vRealD)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplex       : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplexF      : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static void Comms(void) |  | ||||||
|   { |  | ||||||
|     int Nloop=200; |  | ||||||
|     int nmu=0; |  | ||||||
|     int maxlat=32; |  | ||||||
|  |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|  |  | ||||||
|     for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |  | ||||||
|  |  | ||||||
|     std::vector<double> t_time(Nloop); |  | ||||||
|     time_statistics timestat; |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|     comms_header(); |  | ||||||
|  |  | ||||||
|     for(int lat=4;lat<=maxlat;lat+=4){ |  | ||||||
|       for(int Ls=8;Ls<=8;Ls*=2){ |  | ||||||
|  |  | ||||||
| 	std::vector<int> latt_size  ({lat*mpi_layout[0], |  | ||||||
| 	      lat*mpi_layout[1], |  | ||||||
| 	      lat*mpi_layout[2], |  | ||||||
| 	      lat*mpi_layout[3]}); |  | ||||||
|  |  | ||||||
| 	GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
| 	RealD Nrank = Grid._Nprocessors; |  | ||||||
| 	RealD Nnode = Grid.NodeCount(); |  | ||||||
| 	RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
| 	std::vector<HalfSpinColourVectorD *> xbuf(8); |  | ||||||
| 	std::vector<HalfSpinColourVectorD *> rbuf(8); |  | ||||||
| 	Grid.ShmBufferFreeAll(); |  | ||||||
| 	for(int d=0;d<8;d++){ |  | ||||||
| 	  xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |  | ||||||
| 	int ncomm; |  | ||||||
| 	double dbytes; |  | ||||||
| 	std::vector<double> times(Nloop); |  | ||||||
| 	for(int i=0;i<Nloop;i++){ |  | ||||||
|  |  | ||||||
| 	  double start=usecond(); |  | ||||||
|  |  | ||||||
| 	  dbytes=0; |  | ||||||
| 	  ncomm=0; |  | ||||||
|  |  | ||||||
| 	  parallel_for(int dir=0;dir<8;dir++){ |  | ||||||
|  |  | ||||||
| 	    double tbytes; |  | ||||||
| 	    int mu =dir % 4; |  | ||||||
|  |  | ||||||
| 	    if (mpi_layout[mu]>1 ) { |  | ||||||
| 	         |  | ||||||
| 	      int xmit_to_rank; |  | ||||||
| 	      int recv_from_rank; |  | ||||||
| 	      if ( dir == mu ) {  |  | ||||||
| 		int comm_proc=1; |  | ||||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	      } else {  |  | ||||||
| 		int comm_proc = mpi_layout[mu]-1; |  | ||||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	      } |  | ||||||
| 	      tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, |  | ||||||
| 						 (void *)&rbuf[dir][0], recv_from_rank, |  | ||||||
| 						 bytes,dir); |  | ||||||
| 	   |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp atomic |  | ||||||
| #endif |  | ||||||
| 	      ncomm++; |  | ||||||
|  |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp atomic |  | ||||||
| #endif |  | ||||||
| 	      dbytes+=tbytes; |  | ||||||
| 	    } |  | ||||||
| 	  } |  | ||||||
| 	  Grid.Barrier(); |  | ||||||
| 	  double stop=usecond(); |  | ||||||
| 	  t_time[i] = stop-start; // microseconds |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	//	for(int i=0;i<t_time.size();i++){ |  | ||||||
| 	//	  std::cout << i<<" "<<t_time[i]<<std::endl; |  | ||||||
| 	//	} |  | ||||||
|  |  | ||||||
| 	dbytes=dbytes*ppn; |  | ||||||
| 	double xbytes    = dbytes*0.5; |  | ||||||
| 	double rbytes    = dbytes*0.5; |  | ||||||
| 	double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
| 		 <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
| 		 <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
| 		 <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
| 		 << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
| 		 << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|   |  | ||||||
| 	 |  | ||||||
| 	    } |  | ||||||
|     }     |  | ||||||
|  |  | ||||||
|     return; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static void Memory(void) |  | ||||||
|   { |  | ||||||
|     const int Nvec=8; |  | ||||||
|     typedef Lattice< iVector< vReal,Nvec> > LatticeVec; |  | ||||||
|     typedef iVector<vReal,Nvec> Vec; |  | ||||||
|  |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |  | ||||||
|    |  | ||||||
|     uint64_t NP; |  | ||||||
|     uint64_t NN; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   uint64_t lmax=48; |  | ||||||
| #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) |  | ||||||
|  |  | ||||||
|     GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |  | ||||||
|     for(int lat=8;lat<=lmax;lat+=4){ |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |  | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|  |  | ||||||
|       NP= Grid.RankCount(); |  | ||||||
|       NN =Grid.NodeCount(); |  | ||||||
|  |  | ||||||
|       Vec rn ; random(sRNG,rn); |  | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); z=rn; |  | ||||||
|       LatticeVec x(&Grid); x=rn; |  | ||||||
|       LatticeVec y(&Grid); y=rn; |  | ||||||
|       double a=2.0; |  | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |  | ||||||
|  |  | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
| 	z=a*x-y; |  | ||||||
|         x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away |  | ||||||
|         y._odata[4]=z._odata[4]; |  | ||||||
|       } |  | ||||||
|       double stop=usecond(); |  | ||||||
|       double time = (stop-start)/Nloop*1000; |  | ||||||
|       |  | ||||||
|       double flops=vol*Nvec*2;// mul,add |  | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |  | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3)  |  | ||||||
| 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000. |  | ||||||
| 	       << "\t\t"<< bytes/time/NN <<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   static double DWF5(int Ls,int L) |  | ||||||
|   { |  | ||||||
|     RealD mass=0.1; |  | ||||||
|     RealD M5  =1.8; |  | ||||||
|  |  | ||||||
|     double mflops; |  | ||||||
|     double mflops_best = 0; |  | ||||||
|     double mflops_worst= 0; |  | ||||||
|     std::vector<double> mflops_all; |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     // Set/Get the layout & grid size |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); |  | ||||||
|     std::vector<int> local({L,L,L,L}); |  | ||||||
|  |  | ||||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  |  | ||||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     uint64_t NP = TmpGrid->RankCount(); |  | ||||||
|     uint64_t NN = TmpGrid->NodeCount(); |  | ||||||
|     NN_global=NN; |  | ||||||
|     uint64_t SHM=NP/NN; |  | ||||||
|  |  | ||||||
|     std::vector<int> internal; |  | ||||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); |  | ||||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); |  | ||||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); |  | ||||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); |  | ||||||
|     else assert(0); |  | ||||||
|  |  | ||||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); |  | ||||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); |  | ||||||
|  |  | ||||||
|     ///////// Welcome message //////////// |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Lattice Init //////////// |  | ||||||
|     GridCartesian         * UGrid    = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * UrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|     GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); |  | ||||||
|     GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); |  | ||||||
|     GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|     ///////// RNG Init //////////// |  | ||||||
|     std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|     std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|     GridParallelRNG          RNG5(sFGrid);  RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Source preparation //////////// |  | ||||||
|     LatticeFermion src   (sFGrid); random(RNG5,src); |  | ||||||
|     LatticeFermion tmp   (sFGrid); |  | ||||||
|  |  | ||||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|     src = src*N2; |  | ||||||
|      |  | ||||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  |  | ||||||
|  |  | ||||||
|     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); |  | ||||||
|     LatticeFermion src_e (sFrbGrid); |  | ||||||
|     LatticeFermion src_o (sFrbGrid); |  | ||||||
|     LatticeFermion r_e   (sFrbGrid); |  | ||||||
|     LatticeFermion r_o   (sFrbGrid); |  | ||||||
|     LatticeFermion r_eo  (sFGrid); |  | ||||||
|     LatticeFermion err   (sFGrid); |  | ||||||
|     { |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,src); |  | ||||||
|       pickCheckerboard(Odd,src_o,src); |  | ||||||
|  |  | ||||||
| #if defined(AVX512)  |  | ||||||
|       const int num_cases = 6; |  | ||||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #else |  | ||||||
|       const int num_cases = 4; |  | ||||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #endif |  | ||||||
|       controls Cases [] = { |  | ||||||
| #ifdef AVX512 |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| #endif |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } |  | ||||||
|       };  |  | ||||||
|  |  | ||||||
|       for(int c=0;c<num_cases;c++) { |  | ||||||
|  |  | ||||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; |  | ||||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; |  | ||||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |  | ||||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
| 	int nwarm = 100; |  | ||||||
| 	uint64_t ncall = 1000; |  | ||||||
|  |  | ||||||
| 	double t0=usecond(); |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	for(int i=0;i<nwarm;i++){ |  | ||||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	} |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	double t1=usecond(); |  | ||||||
|  |  | ||||||
| 	sDw.ZeroCounters(); |  | ||||||
| 	time_statistics timestat; |  | ||||||
| 	std::vector<double> t_time(ncall); |  | ||||||
| 	for(uint64_t i=0;i<ncall;i++){ |  | ||||||
| 	  t0=usecond(); |  | ||||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	  t1=usecond(); |  | ||||||
| 	  t_time[i] = t1-t0; |  | ||||||
| 	} |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	 |  | ||||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
| 	double flops=(1344.0*volume)/2; |  | ||||||
| 	double mf_hi, mf_lo, mf_err; |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	mf_hi = flops/timestat.min; |  | ||||||
| 	mf_lo = flops/timestat.max; |  | ||||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; |  | ||||||
|  |  | ||||||
| 	mflops = flops/timestat.mean; |  | ||||||
| 	mflops_all.push_back(mflops); |  | ||||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; |  | ||||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; |  | ||||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; |  | ||||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank   "<< mflops/NP<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node   "<< mflops/NN<<std::endl; |  | ||||||
|  |  | ||||||
| 	sDw.Report(); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|       double robust = mflops_worst/mflops_best;; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |  | ||||||
|       std::cout<<GridLogMessage; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<mflops_all.size();i++){ |  | ||||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; |  | ||||||
|       } |  | ||||||
|       std::cout<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|     return mflops_best; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static double DWF(int Ls,int L, double & robust) |  | ||||||
|   { |  | ||||||
|     RealD mass=0.1; |  | ||||||
|     RealD M5  =1.8; |  | ||||||
|  |  | ||||||
|     double mflops; |  | ||||||
|     double mflops_best = 0; |  | ||||||
|     double mflops_worst= 0; |  | ||||||
|     std::vector<double> mflops_all; |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     // Set/Get the layout & grid size |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); |  | ||||||
|     std::vector<int> local({L,L,L,L}); |  | ||||||
|  |  | ||||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  |  | ||||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     uint64_t NP = TmpGrid->RankCount(); |  | ||||||
|     uint64_t NN = TmpGrid->NodeCount(); |  | ||||||
|     NN_global=NN; |  | ||||||
|     uint64_t SHM=NP/NN; |  | ||||||
|  |  | ||||||
|     std::vector<int> internal; |  | ||||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); |  | ||||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); |  | ||||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); |  | ||||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); |  | ||||||
|     else assert(0); |  | ||||||
|  |  | ||||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); |  | ||||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); |  | ||||||
|  |  | ||||||
|     ///////// Welcome message //////////// |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     ///////// Lattice Init //////////// |  | ||||||
|     GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|      |  | ||||||
|     ///////// RNG Init //////////// |  | ||||||
|     std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|     std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Source preparation //////////// |  | ||||||
|     LatticeFermion src   (FGrid); random(RNG5,src); |  | ||||||
|     LatticeFermion ref   (FGrid); |  | ||||||
|     LatticeFermion tmp   (FGrid); |  | ||||||
|  |  | ||||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|     src = src*N2; |  | ||||||
|      |  | ||||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  |  | ||||||
|  |  | ||||||
|     DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|  |  | ||||||
|     //////////////////////////////////// |  | ||||||
|     // Naive wilson implementation |  | ||||||
|     //////////////////////////////////// |  | ||||||
|     { |  | ||||||
|       LatticeGaugeField Umu5d(FGrid);  |  | ||||||
|       std::vector<LatticeColourMatrix> U(4,FGrid); |  | ||||||
|       for(int ss=0;ss<Umu._grid->oSites();ss++){ |  | ||||||
| 	for(int s=0;s<Ls;s++){ |  | ||||||
| 	  Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|       ref = zero; |  | ||||||
|       for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); |  | ||||||
|       } |  | ||||||
|       for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	 |  | ||||||
| 	tmp = U[mu]*Cshift(src,mu+1,1); |  | ||||||
| 	ref=ref + tmp - Gamma(Gmu[mu])*tmp; |  | ||||||
| 	 |  | ||||||
| 	tmp =adj(U[mu])*src; |  | ||||||
| 	tmp =Cshift(tmp,mu+1,-1); |  | ||||||
| 	ref=ref + tmp + Gamma(Gmu[mu])*tmp; |  | ||||||
|       } |  | ||||||
|       ref = -0.5*ref; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeFermion src_e (FrbGrid); |  | ||||||
|     LatticeFermion src_o (FrbGrid); |  | ||||||
|     LatticeFermion r_e   (FrbGrid); |  | ||||||
|     LatticeFermion r_o   (FrbGrid); |  | ||||||
|     LatticeFermion r_eo  (FGrid); |  | ||||||
|     LatticeFermion err   (FGrid); |  | ||||||
|     { |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,src); |  | ||||||
|       pickCheckerboard(Odd,src_o,src); |  | ||||||
|  |  | ||||||
| #if defined(AVX512)  |  | ||||||
|       const int num_cases = 6; |  | ||||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #else |  | ||||||
|       const int num_cases = 4; |  | ||||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #endif |  | ||||||
|       controls Cases [] = { |  | ||||||
| #ifdef AVX512 |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| #endif |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } |  | ||||||
|       };  |  | ||||||
|  |  | ||||||
|       for(int c=0;c<num_cases;c++) { |  | ||||||
|  |  | ||||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; |  | ||||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; |  | ||||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |  | ||||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
| 	int nwarm = 200; |  | ||||||
| 	double t0=usecond(); |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	for(int i=0;i<nwarm;i++){ |  | ||||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	} |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	double t1=usecond(); |  | ||||||
| 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); |  | ||||||
| 	//	if (ncall < 500) ncall = 500; |  | ||||||
| 	uint64_t ncall = 1000; |  | ||||||
|  |  | ||||||
| 	FGrid->Broadcast(0,&ncall,sizeof(ncall)); |  | ||||||
|  |  | ||||||
| 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; |  | ||||||
| 	Dw.ZeroCounters(); |  | ||||||
|  |  | ||||||
| 	time_statistics timestat; |  | ||||||
| 	std::vector<double> t_time(ncall); |  | ||||||
| 	for(uint64_t i=0;i<ncall;i++){ |  | ||||||
| 	  t0=usecond(); |  | ||||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	  t1=usecond(); |  | ||||||
| 	  t_time[i] = t1-t0; |  | ||||||
| 	} |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	 |  | ||||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
| 	double flops=(1344.0*volume)/2; |  | ||||||
| 	double mf_hi, mf_lo, mf_err; |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	mf_hi = flops/timestat.min; |  | ||||||
| 	mf_lo = flops/timestat.max; |  | ||||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; |  | ||||||
|  |  | ||||||
| 	mflops = flops/timestat.mean; |  | ||||||
| 	mflops_all.push_back(mflops); |  | ||||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; |  | ||||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; |  | ||||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; |  | ||||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl; |  | ||||||
|  |  | ||||||
| 	Dw.Report(); |  | ||||||
|  |  | ||||||
| 	Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	Dw.DhopOE(src_e,r_o,DaggerNo); |  | ||||||
| 	setCheckerboard(r_eo,r_o); |  | ||||||
| 	setCheckerboard(r_eo,r_e); |  | ||||||
| 	err = r_eo-ref;  |  | ||||||
| 	std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |  | ||||||
| 	assert((norm2(err)<1.0e-4)); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|       robust = mflops_worst/mflops_best; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust  <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |  | ||||||
|       std::cout<<GridLogMessage ; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<mflops_all.size();i++){ |  | ||||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; |  | ||||||
|       } |  | ||||||
|       std::cout<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|     return mflops_best; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); |  | ||||||
| #ifdef KNL |  | ||||||
|   LebesgueOrder::Block = std::vector<int>({8,2,2,2}); |  | ||||||
| #else |  | ||||||
|   LebesgueOrder::Block = std::vector<int>({2,2,2,2}); |  | ||||||
| #endif |  | ||||||
|   Benchmark::Decomposition(); |  | ||||||
|  |  | ||||||
|   int do_memory=1; |  | ||||||
|   int do_comms =1; |  | ||||||
|   int do_su3   =0; |  | ||||||
|   int do_wilson=1; |  | ||||||
|   int do_dwf   =1; |  | ||||||
|  |  | ||||||
|   if ( do_su3 ) { |  | ||||||
|     // empty for now |  | ||||||
|   } |  | ||||||
| #if 1 |  | ||||||
|   int sel=2; |  | ||||||
|   std::vector<int> L_list({8,12,16,24}); |  | ||||||
| #else |  | ||||||
|   int sel=1; |  | ||||||
|   std::vector<int> L_list({8,12}); |  | ||||||
| #endif |  | ||||||
|   int selm1=sel-1; |  | ||||||
|   std::vector<double> robust_list; |  | ||||||
|  |  | ||||||
|   std::vector<double> wilson; |  | ||||||
|   std::vector<double> dwf4; |  | ||||||
|   std::vector<double> dwf5; |  | ||||||
|  |  | ||||||
|   if ( do_wilson ) { |  | ||||||
|     int Ls=1; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       double robust; |  | ||||||
|       wilson.push_back(Benchmark::DWF(1,L_list[l],robust)); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int Ls=16; |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       double robust; |  | ||||||
|       double result = Benchmark::DWF(Ls,L_list[l],robust) ; |  | ||||||
|       dwf4.push_back(result); |  | ||||||
|       robust_list.push_back(robust); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl; |  | ||||||
|   for(int l=0;l<L_list.size();l++){ |  | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; |  | ||||||
|   } |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int NN=NN_global; |  | ||||||
|   if ( do_memory ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Memory(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_comms && (NN>1) ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Comms(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; |  | ||||||
|   for(int l=0;l<L_list.size();l++){ |  | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; |  | ||||||
|   } |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point     result: "  << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl; |  | ||||||
|   std::cout<<std::setprecision(3); |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point robustness: "  << robust_list[sel] <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
| } |  | ||||||
| @@ -31,32 +31,6 @@ using namespace std; | |||||||
| using namespace Grid; | using namespace Grid; | ||||||
| using namespace Grid::QCD; | using namespace Grid::QCD; | ||||||
|  |  | ||||||
| struct time_statistics{ |  | ||||||
|   double mean; |  | ||||||
|   double err; |  | ||||||
|   double min; |  | ||||||
|   double max; |  | ||||||
|  |  | ||||||
|   void statistics(std::vector<double> v){ |  | ||||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); |  | ||||||
|       mean = sum / v.size(); |  | ||||||
|  |  | ||||||
|       std::vector<double> diff(v.size()); |  | ||||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); |  | ||||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); |  | ||||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); |  | ||||||
|  |  | ||||||
|       auto result = std::minmax_element(v.begin(), v.end()); |  | ||||||
|       min = *result.first; |  | ||||||
|       max = *result.second; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| void header(){ |  | ||||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" |  | ||||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| @@ -66,21 +40,17 @@ int main (int argc, char ** argv) | |||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   int Nloop=100; |   int Nloop=10; | ||||||
|   int nmu=0; |   int nmu=0; | ||||||
|   int maxlat=32; |  | ||||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; |  | ||||||
|   std::vector<double> t_time(Nloop); |  | ||||||
|   time_statistics timestat; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |   int maxlat=24; | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -88,23 +58,15 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);	 |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|       for(int mu=0;mu<8;mu++){ |  | ||||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| @@ -117,6 +79,7 @@ int main (int argc, char ** argv) | |||||||
| 	    int comm_proc=1; | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
|  | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    Grid.SendToRecvFromBegin(requests, | 	    Grid.SendToRecvFromBegin(requests, | ||||||
| 				   (void *)&xbuf[mu][0], | 				   (void *)&xbuf[mu][0], | ||||||
| @@ -139,24 +102,18 @@ int main (int argc, char ** argv) | |||||||
| 	} | 	} | ||||||
| 	Grid.SendToRecvFromComplete(requests); | 	Grid.SendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|       } |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|       double dbytes    = bytes*ppn; |  | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |  | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |       double time = stop-start; // microseconds | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
| @@ -164,32 +121,25 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); |       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  |  | ||||||
|       for(int mu=0;mu<8;mu++){ |  | ||||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|      |      | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| @@ -228,37 +178,30 @@ int main (int argc, char ** argv) | |||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double stop=usecond(); | ||||||
|        |        | ||||||
|       double dbytes    = bytes*ppn; |       double dbytes    = bytes; | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |       double time = stop-start; | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|        |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }   |   }   | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Nloop=10; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -266,9 +209,6 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -276,115 +216,16 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double dbytes; |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	double start=usecond(); |  | ||||||
|  |  | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| 	for(int mu=0;mu<4;mu++){ |  | ||||||
| 	 |  | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { |  | ||||||
| 	   |  | ||||||
| 	    ncomm++; |  | ||||||
| 	    int comm_proc=1; |  | ||||||
| 	    int xmit_to_rank; |  | ||||||
| 	    int recv_from_rank; |  | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	    dbytes+= |  | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, |  | ||||||
| 					      (void *)&xbuf[mu][0], |  | ||||||
| 					      xmit_to_rank, |  | ||||||
| 					      (void *)&rbuf[mu][0], |  | ||||||
| 					      recv_from_rank, |  | ||||||
| 					      bytes,mu); |  | ||||||
| 	 |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; |  | ||||||
| 	   |  | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	    dbytes+= |  | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, |  | ||||||
| 					      (void *)&xbuf[mu+4][0], |  | ||||||
| 					      xmit_to_rank, |  | ||||||
| 					      (void *)&rbuf[mu+4][0], |  | ||||||
| 					      recv_from_rank, |  | ||||||
| 					      bytes,mu+4); |  | ||||||
| 	   |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
| 	Grid.StencilSendToRecvFromComplete(requests,0); |  | ||||||
| 	Grid.Barrier(); |  | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
| 	 |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |  | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |  | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|   }     |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   header(); |  | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |  | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |  | ||||||
|       				    lat*mpi_layout[1], |  | ||||||
|       				    lat*mpi_layout[2], |  | ||||||
|       				    lat*mpi_layout[3]}); |  | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |  | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |  | ||||||
|       Grid.ShmBufferFreeAll(); |  | ||||||
|       for(int d=0;d<8;d++){ |  | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       int ncomm; |  | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |  | ||||||
|       double dbytes; |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
| 	double start=usecond(); |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; |  | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
| @@ -396,64 +237,52 @@ int main (int argc, char ** argv) | |||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 					    (void *)&xbuf[mu][0], | ||||||
| 					      (void *)&xbuf[mu][0], | 					    xmit_to_rank, | ||||||
| 					      xmit_to_rank, | 					    (void *)&rbuf[mu][0], | ||||||
| 					      (void *)&rbuf[mu][0], | 					    recv_from_rank, | ||||||
| 					      recv_from_rank, | 					    bytes); | ||||||
| 					      bytes,mu); | 	 | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu); |  | ||||||
| 	    requests.resize(0); |  | ||||||
|  |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 					    (void *)&xbuf[mu+4][0], | ||||||
| 					      (void *)&xbuf[mu+4][0], | 					    xmit_to_rank, | ||||||
| 					      xmit_to_rank, | 					    (void *)&rbuf[mu+4][0], | ||||||
| 					      (void *)&rbuf[mu+4][0], | 					    recv_from_rank, | ||||||
| 					      recv_from_rank, | 					    bytes); | ||||||
| 					      bytes,mu+4); |  | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu+4); |  | ||||||
| 	    requests.resize(0); |  | ||||||
| 	   | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
|  | 	Grid.StencilSendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
| 	 |  | ||||||
|       } |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|  |       double rbytes    = xbytes; | ||||||
|  |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |       double time = stop-start; // microseconds | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|   |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Nloop=100; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -461,9 +290,6 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -471,71 +297,65 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|       double dbytes; |  | ||||||
|  |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	double start=usecond(); |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
|  | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	parallel_for(int dir=0;dir<8;dir++){ | 	 | ||||||
|  |  | ||||||
| 	  double tbytes; |  | ||||||
| 	  int mu =dir % 4; |  | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { | 	  if (mpi_layout[mu]>1 ) { | ||||||
| 	   | 	   | ||||||
| 	    ncomm++; | 	    ncomm++; | ||||||
|  | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	    if ( dir == mu ) {  | 	     | ||||||
| 	      int comm_proc=1; | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	    } else {  | 					    (void *)&xbuf[mu][0], | ||||||
| 	      int comm_proc = mpi_layout[mu]-1; | 					    xmit_to_rank, | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 					    (void *)&rbuf[mu][0], | ||||||
| 	    } | 					    recv_from_rank, | ||||||
|  | 					    bytes); | ||||||
|  | 	    Grid.StencilSendToRecvFromComplete(requests); | ||||||
|  | 	    requests.resize(0); | ||||||
|  |  | ||||||
| 	    tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 					       (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); | 	   | ||||||
|  | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| #pragma omp atomic | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	    dbytes+=tbytes; | 					    (void *)&xbuf[mu+4][0], | ||||||
|  | 					    xmit_to_rank, | ||||||
|  | 					    (void *)&rbuf[mu+4][0], | ||||||
|  | 					    recv_from_rank, | ||||||
|  | 					    bytes); | ||||||
|  | 	    Grid.StencilSendToRecvFromComplete(requests); | ||||||
|  | 	    requests.resize(0); | ||||||
|  | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|       } |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|  |       double rbytes    = xbytes; | ||||||
|  |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |       double time = stop-start; // microseconds | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|   |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,22 +1,28 @@ | |||||||
|  /************************************************************************************* |     /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_dwf.cc |     Source file: ./benchmarks/Benchmark_dwf.cc | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|     Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or |     the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. |     (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |     This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |     You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| @@ -145,7 +151,6 @@ int main (int argc, char ** argv) | |||||||
|   RealD M5  =1.8; |   RealD M5  =1.8; | ||||||
|  |  | ||||||
|   RealD NP = UGrid->_Nprocessors; |   RealD NP = UGrid->_Nprocessors; | ||||||
|   RealD NN = UGrid->NodeCount(); |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; |   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; | ||||||
| @@ -155,17 +160,13 @@ int main (int argc, char ** argv) | |||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|  |  | ||||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|   int ncall =500; |   int ncall =1000; | ||||||
|   if (1) { |   if (1) { | ||||||
|     FGrid->Barrier(); |     FGrid->Barrier(); | ||||||
|     Dw.ZeroCounters(); |     Dw.ZeroCounters(); | ||||||
| @@ -188,7 +189,6 @@ int main (int argc, char ** argv) | |||||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     err = ref-result;  |     err = ref-result;  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
| @@ -225,7 +225,6 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     err = ref-result;  |     err = ref-result;  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
| @@ -241,10 +240,6 @@ int main (int argc, char ** argv) | |||||||
|     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -276,7 +271,6 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; |     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; | ||||||
|     sDw.Report(); |     sDw.Report(); | ||||||
|     RealD sum=0; |     RealD sum=0; | ||||||
| @@ -302,7 +296,6 @@ int main (int argc, char ** argv) | |||||||
|       std::cout<< "sD ERR   \n " << err  <<std::endl; |       std::cout<< "sD ERR   \n " << err  <<std::endl; | ||||||
|     } |     } | ||||||
|     assert(sum < 1.0e-4); |     assert(sum < 1.0e-4); | ||||||
|  |  | ||||||
|      |      | ||||||
|     if(1){ |     if(1){ | ||||||
|       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
| @@ -310,10 +303,6 @@ int main (int argc, char ** argv) | |||||||
|       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  | ||||||
| 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  | ||||||
| @@ -353,7 +342,6 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|       sDw.Report(); |       sDw.Report(); | ||||||
|  |  | ||||||
|       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); |       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||||
| @@ -382,23 +370,8 @@ int main (int argc, char ** argv) | |||||||
|       } |       } | ||||||
|       assert(error<1.0e-4); |       assert(error<1.0e-4); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   if(0){ |  | ||||||
|     std::cout << "Single cache warm call to sDw.Dhop " <<std::endl; |  | ||||||
|     for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ |  | ||||||
|       sDw.Dhop(ssrc,sresult,0); |  | ||||||
|       PerformanceCounter Counter(i); |  | ||||||
|       Counter.Start(); |  | ||||||
|       sDw.Dhop(ssrc,sresult,0); |  | ||||||
|       Counter.Stop(); |  | ||||||
|       Counter.Report(); |  | ||||||
|     } |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { // Naive wilson dag implementation |   { // Naive wilson dag implementation | ||||||
|     ref = zero; |     ref = zero; | ||||||
| @@ -447,15 +420,14 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|  |  | ||||||
|   // S-direction is INNERMOST and takes no part in the parity. |   // S-direction is INNERMOST and takes no part in the parity. | ||||||
|  |   static int Opt;  // these are a temporary hack | ||||||
|  |   static int Comms;  // these are a temporary hack | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; |   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -476,7 +448,6 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|   Dw.DhopEO(src_o,r_e,DaggerNo); |   Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
| @@ -503,9 +474,8 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; |   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; |   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; | ||||||
|  |  | ||||||
|   assert(norm2(src_e)<1.0e-4); |   //assert(norm2(src_e)<1.0e-4); | ||||||
|   assert(norm2(src_o)<1.0e-4); |   //assert(norm2(src_o)<1.0e-4); | ||||||
|   Grid_finalize(); |  | ||||||
|   exit(0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -55,21 +55,21 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|   uint64_t lmax=96; |   uint64_t lmax=44; | ||||||
| #define NLOOP (10*lmax*lmax*lmax*lmax/vol) | #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       double a=2.0; |       double a=2.0; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -83,7 +83,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |       double bytes=3*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -94,17 +94,17 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|    |    | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       double a=2.0; |       double a=2.0; | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
| @@ -119,7 +119,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|       |       | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |       double bytes=3*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -129,20 +129,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       RealD a=2.0; |       RealD a=2.0; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -154,7 +154,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=2.0*vol*Nvec*sizeof(Real); |       double bytes=2*vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*1;// mul |       double flops=vol*Nvec*1;// mul | ||||||
|       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
| @@ -166,17 +166,17 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       RealD a=2.0; |       RealD a=2.0; | ||||||
|       Real nn;       |       Real nn;       | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
| @@ -187,7 +187,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=1.0*vol*Nvec*sizeof(Real); |       double bytes=vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -35,14 +35,13 @@ using namespace Grid::QCD; | |||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| #define LMAX (64) |  | ||||||
|  |  | ||||||
|   int64_t Nloop=20; |   int Nloop=1000; | ||||||
|  |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |  | ||||||
|   int64_t threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
| @@ -51,19 +50,19 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid);// random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid);// random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid);// random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	x=x*y; | 	x=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -83,20 +82,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	z=x*y; | 	z=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -114,20 +113,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	mult(z,x,y); | 	mult(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -145,20 +144,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	mac(z,x,y); | 	mac(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|   | |||||||
| @@ -1,7 +1,11 @@ | |||||||
| include Make.inc | include Make.inc | ||||||
|  |  | ||||||
| bench-local: all | simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o | ||||||
| 	./Benchmark_su3 |  | ||||||
| 	./Benchmark_memory_bandwidth | EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a | ||||||
| 	./Benchmark_wilson |  | ||||||
| 	./Benchmark_dwf --dslash-unroll | libsimple_su3_test_a_SOURCES = simple_su3_test.cc | ||||||
|  |  | ||||||
|  | libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc | ||||||
|  |  | ||||||
|  | libsimple_simd_test_a_SOURCES = simple_simd_test.cc | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| #!/usr/bin/env bash | #!/usr/bin/env bash | ||||||
|  |  | ||||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' | ||||||
|  |  | ||||||
| echo "-- deploying Eigen source..." | echo "-- deploying Eigen source..." | ||||||
| wget ${EIGEN_URL} --no-check-certificate | wget ${EIGEN_URL} --no-check-certificate | ||||||
|   | |||||||
							
								
								
									
										209
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										209
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -1,23 +1,16 @@ | |||||||
| AC_PREREQ([2.63]) | AC_PREREQ([2.63]) | ||||||
| AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) | AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) | ||||||
| AC_CANONICAL_BUILD | AC_CANONICAL_BUILD | ||||||
| AC_CANONICAL_HOST | AC_CANONICAL_HOST | ||||||
| AC_CANONICAL_TARGET | AC_CANONICAL_TARGET | ||||||
| AM_INIT_AUTOMAKE([subdir-objects 1.13]) | AM_INIT_AUTOMAKE(subdir-objects) | ||||||
| AM_EXTRA_RECURSIVE_TARGETS([tests bench]) |  | ||||||
| AC_CONFIG_MACRO_DIR([m4]) | AC_CONFIG_MACRO_DIR([m4]) | ||||||
| AC_CONFIG_SRCDIR([lib/Grid.h]) | AC_CONFIG_SRCDIR([lib/Grid.h]) | ||||||
| AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | ||||||
| m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||||
|  |  | ||||||
| ################ Get git info |  | ||||||
| #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) |  | ||||||
|  |  | ||||||
| ################ Set flags |  | ||||||
| # do not move! |  | ||||||
| CXXFLAGS="-O3 $CXXFLAGS" |  | ||||||
|  |  | ||||||
| ############### Checks for programs | ############### Checks for programs | ||||||
|  | CXXFLAGS="-O3 $CXXFLAGS" | ||||||
| AC_PROG_CXX | AC_PROG_CXX | ||||||
| AC_PROG_RANLIB | AC_PROG_RANLIB | ||||||
|  |  | ||||||
| @@ -31,14 +24,12 @@ AX_GXX_VERSION | |||||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||||
|       [version of g++ that will compile the code]) |       [version of g++ that will compile the code]) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ############### Checks for typedefs, structures, and compiler characteristics | ############### Checks for typedefs, structures, and compiler characteristics | ||||||
| AC_TYPE_SIZE_T | AC_TYPE_SIZE_T | ||||||
| AC_TYPE_UINT32_T | AC_TYPE_UINT32_T | ||||||
| AC_TYPE_UINT64_T | AC_TYPE_UINT64_T | ||||||
|  |  | ||||||
| ############### OpenMP | ############### OpenMP  | ||||||
| AC_OPENMP | AC_OPENMP | ||||||
| ac_openmp=no | ac_openmp=no | ||||||
| if test "${OPENMP_CXXFLAGS}X" != "X"; then | if test "${OPENMP_CXXFLAGS}X" != "X"; then | ||||||
| @@ -54,14 +45,9 @@ AC_CHECK_HEADERS(malloc/malloc.h) | |||||||
| AC_CHECK_HEADERS(malloc.h) | AC_CHECK_HEADERS(malloc.h) | ||||||
| AC_CHECK_HEADERS(endian.h) | AC_CHECK_HEADERS(endian.h) | ||||||
| AC_CHECK_HEADERS(execinfo.h) | AC_CHECK_HEADERS(execinfo.h) | ||||||
| AC_CHECK_HEADERS(numaif.h) |  | ||||||
| AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | ||||||
| AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | ||||||
|  |  | ||||||
| ############## Standard libraries |  | ||||||
| AC_CHECK_LIB([m],[cos]) |  | ||||||
| AC_CHECK_LIB([stdc++],[abort]) |  | ||||||
|  |  | ||||||
| ############### GMP and MPFR | ############### GMP and MPFR | ||||||
| AC_ARG_WITH([gmp], | AC_ARG_WITH([gmp], | ||||||
|     [AS_HELP_STRING([--with-gmp=prefix], |     [AS_HELP_STRING([--with-gmp=prefix], | ||||||
| @@ -74,23 +60,16 @@ AC_ARG_WITH([mpfr], | |||||||
|     [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] |     [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] | ||||||
|     [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) |     [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
| ############### FFTW3 | ############### FFTW3  | ||||||
| AC_ARG_WITH([fftw], | AC_ARG_WITH([fftw],     | ||||||
|             [AS_HELP_STRING([--with-fftw=prefix], |             [AS_HELP_STRING([--with-fftw=prefix], | ||||||
|             [try this for a non-standard install prefix of the FFTW3 library])], |             [try this for a non-standard install prefix of the FFTW3 library])], | ||||||
|             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] |             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] | ||||||
|             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) |             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
| ############### LIME | ############### lapack  | ||||||
| AC_ARG_WITH([lime], |  | ||||||
|             [AS_HELP_STRING([--with-lime=prefix], |  | ||||||
|             [try this for a non-standard install prefix of the LIME library])], |  | ||||||
|             [AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"] |  | ||||||
|             [AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"]) |  | ||||||
|  |  | ||||||
| ############### lapack |  | ||||||
| AC_ARG_ENABLE([lapack], | AC_ARG_ENABLE([lapack], | ||||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], |     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  | ||||||
|     [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) |     [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) | ||||||
|  |  | ||||||
| case ${ac_LAPACK} in | case ${ac_LAPACK} in | ||||||
| @@ -106,7 +85,7 @@ esac | |||||||
|  |  | ||||||
| ############### FP16 conversions | ############### FP16 conversions | ||||||
| AC_ARG_ENABLE([sfw-fp16], | AC_ARG_ENABLE([sfw-fp16], | ||||||
|     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], |     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],  | ||||||
|     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) |     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) | ||||||
| case ${ac_SFW_FP16} in | case ${ac_SFW_FP16} in | ||||||
|     yes) |     yes) | ||||||
| @@ -141,7 +120,7 @@ AC_ARG_WITH([hdf5], | |||||||
|  |  | ||||||
| ############### first-touch | ############### first-touch | ||||||
| AC_ARG_ENABLE([numa], | AC_ARG_ENABLE([numa], | ||||||
|     [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], |     [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],  | ||||||
|     [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) |     [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) | ||||||
|  |  | ||||||
| case ${ac_NUMA} in | case ${ac_NUMA} in | ||||||
| @@ -167,8 +146,8 @@ if test "${ac_MKL}x" != "nox"; then | |||||||
| fi | fi | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([__gmpf_init], [gmp], | AC_SEARCH_LIBS([__gmpf_init], [gmp], | ||||||
|                [AC_SEARCH_LIBS([mpfr_init], [mpfr], |                [AC_SEARCH_LIBS([mpfr_init], [mpfr],  | ||||||
|                                [AC_DEFINE([HAVE_LIBMPFR], [1], |                                [AC_DEFINE([HAVE_LIBMPFR], [1],  | ||||||
|                                           [Define to 1 if you have the `MPFR' library])] |                                           [Define to 1 if you have the `MPFR' library])] | ||||||
|                                [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] |                                [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] | ||||||
|                [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] |                [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] | ||||||
| @@ -177,7 +156,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp], | |||||||
| if test "${ac_LAPACK}x" != "nox"; then | if test "${ac_LAPACK}x" != "nox"; then | ||||||
|     AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], |     AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], | ||||||
|                    [AC_MSG_ERROR("LAPACK enabled but library not found")]) |                    [AC_MSG_ERROR("LAPACK enabled but library not found")]) | ||||||
| fi | fi    | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([fftw_execute], [fftw3], | AC_SEARCH_LIBS([fftw_execute], [fftw3], | ||||||
|                [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], |                [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], | ||||||
| @@ -185,23 +164,6 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3], | |||||||
|                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] |                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] | ||||||
|                [have_fftw=true]) |                [have_fftw=true]) | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([limeCreateReader], [lime], |  | ||||||
|                [AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])] |  | ||||||
|                [have_lime=true], |  | ||||||
| 	       [AC_MSG_WARN(C-LIME library was not found in your system. |  | ||||||
| In order to use ILGG file format please install or provide the correct path to your installation |  | ||||||
| Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([crc32], [z], |  | ||||||
|                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] |  | ||||||
|                [have_zlib=true] [LIBS="${LIBS} -lz"], |  | ||||||
| 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([move_pages], [numa], |  | ||||||
|                [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] |  | ||||||
|                [have_libnuma=true] [LIBS="${LIBS} -lnuma"], |  | ||||||
| 	       [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] |                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||||
|                [have_hdf5=true] |                [have_hdf5=true] | ||||||
| @@ -254,7 +216,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) |  | ||||||
|         SIMD_FLAGS='-march=knl';; |         SIMD_FLAGS='-march=knl';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -262,9 +223,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|                            [generic SIMD vector width (in bytes)]) |                            [generic SIMD vector width (in bytes)]) | ||||||
|         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" |         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       NEONv8) |  | ||||||
|         AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) |  | ||||||
|         SIMD_FLAGS='-march=armv8-a';; |  | ||||||
|       QPX|BGQ) |       QPX|BGQ) | ||||||
|         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) |         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
| @@ -293,7 +251,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) | ||||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) |  | ||||||
|         SIMD_FLAGS='-xmic-avx512';; |         SIMD_FLAGS='-xmic-avx512';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -331,41 +288,8 @@ case ${ac_PRECISION} in | |||||||
|      double) |      double) | ||||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) |        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) | ||||||
|      ;; |      ;; | ||||||
|      *) |  | ||||||
|      AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]); |  | ||||||
|      ;; |  | ||||||
| esac | esac | ||||||
|  |  | ||||||
| ######################  Shared memory allocation technique under MPI3 |  | ||||||
| AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], |  | ||||||
|               [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) |  | ||||||
|  |  | ||||||
| case ${ac_SHM} in |  | ||||||
|  |  | ||||||
|      shmget) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      shmopen) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      hugetlbfs) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      *) |  | ||||||
|      AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]); |  | ||||||
|      ;; |  | ||||||
| esac |  | ||||||
|  |  | ||||||
| ######################  Shared base path for SHMMMAP |  | ||||||
| AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], |  | ||||||
|               [Select SHM mmap base path for hugetlbfs])], |  | ||||||
| 	      [ac_SHMPATH=${enable_shmpath}], |  | ||||||
| 	      [ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/]) |  | ||||||
| AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) |  | ||||||
|  |  | ||||||
| ############### communication type selection | ############### communication type selection | ||||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | ||||||
|               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) |               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||||
| @@ -375,14 +299,14 @@ case ${ac_COMMS} in | |||||||
|         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) |         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) | ||||||
|         comms_type='none' |         comms_type='none' | ||||||
|      ;; |      ;; | ||||||
|  |      mpi3l*) | ||||||
|  |        AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] ) | ||||||
|  |        comms_type='mpi3l' | ||||||
|  |      ;; | ||||||
|      mpi3*) |      mpi3*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) |         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) | ||||||
|         comms_type='mpi3' |         comms_type='mpi3' | ||||||
|      ;; |      ;; | ||||||
|      mpit) |  | ||||||
|         AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) |  | ||||||
|         comms_type='mpit' |  | ||||||
|      ;; |  | ||||||
|      mpi*) |      mpi*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) |         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||||
|         comms_type='mpi' |         comms_type='mpi' | ||||||
| @@ -392,7 +316,7 @@ case ${ac_COMMS} in | |||||||
|         comms_type='shmem' |         comms_type='shmem' | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|         AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); |         AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
| case ${ac_COMMS} in | case ${ac_COMMS} in | ||||||
| @@ -410,7 +334,7 @@ esac | |||||||
| AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPIT,  [ test "${comms_type}X" == "mpitX" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | ||||||
|  |  | ||||||
| ############### RNG selection | ############### RNG selection | ||||||
| @@ -429,7 +353,7 @@ case ${ac_RNG} in | |||||||
|       AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) |       AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|       AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); |       AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| @@ -446,7 +370,7 @@ case ${ac_TIMERS} in | |||||||
|       AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) |       AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|       AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); |       AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| @@ -458,7 +382,7 @@ case ${ac_CHROMA} in | |||||||
|      yes|no) |      yes|no) | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|        AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); |        AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| @@ -479,67 +403,12 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg]) | |||||||
|  |  | ||||||
| ############### Ouput | ############### Ouput | ||||||
| cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | ||||||
| GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" |  | ||||||
| GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS" |  | ||||||
| GRID_LIBS=$LIBS |  | ||||||
| GRID_SHORT_SHA=`git rev-parse --short HEAD` |  | ||||||
| GRID_SHA=`git rev-parse HEAD` |  | ||||||
| GRID_BRANCH=`git rev-parse --abbrev-ref HEAD` |  | ||||||
| AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | ||||||
| AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | ||||||
| AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | ||||||
| AC_SUBST([AM_CFLAGS]) | AC_SUBST([AM_CFLAGS]) | ||||||
| AC_SUBST([AM_CXXFLAGS]) | AC_SUBST([AM_CXXFLAGS]) | ||||||
| AC_SUBST([AM_LDFLAGS]) | AC_SUBST([AM_LDFLAGS]) | ||||||
| AC_SUBST([GRID_CXXFLAGS]) |  | ||||||
| AC_SUBST([GRID_LDFLAGS]) |  | ||||||
| AC_SUBST([GRID_LIBS]) |  | ||||||
| AC_SUBST([GRID_SHA]) |  | ||||||
| AC_SUBST([GRID_BRANCH]) |  | ||||||
|  |  | ||||||
| git_commit=`cd $srcdir && ./scripts/configure.commit` |  | ||||||
|  |  | ||||||
| echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |  | ||||||
| Summary of configuration for $PACKAGE v$VERSION |  | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |  | ||||||
| ----- GIT VERSION ------------------------------------- |  | ||||||
| $git_commit |  | ||||||
| ----- PLATFORM ---------------------------------------- |  | ||||||
| architecture (build)        : $build_cpu |  | ||||||
| os (build)                  : $build_os |  | ||||||
| architecture (target)       : $target_cpu |  | ||||||
| os (target)                 : $target_os |  | ||||||
| compiler vendor             : ${ax_cv_cxx_compiler_vendor} |  | ||||||
| compiler version            : ${ax_cv_gxx_version} |  | ||||||
| ----- BUILD OPTIONS ----------------------------------- |  | ||||||
| SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} |  | ||||||
| Threading                   : ${ac_openmp} |  | ||||||
| Communications type         : ${comms_type} |  | ||||||
| Shared memory allocator     : ${ac_SHM} |  | ||||||
| Shared memory mmap path     : ${ac_SHMPATH} |  | ||||||
| Default precision           : ${ac_PRECISION} |  | ||||||
| Software FP16 conversion    : ${ac_SFW_FP16} |  | ||||||
| RNG choice                  : ${ac_RNG} |  | ||||||
| GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| LAPACK                      : ${ac_LAPACK} |  | ||||||
| FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| LIME (ILDG support)         : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` |  | ||||||
| ----- BUILD FLAGS ------------------------------------- |  | ||||||
| CXXFLAGS: |  | ||||||
| `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` |  | ||||||
| LDFLAGS: |  | ||||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` |  | ||||||
| LIBS: |  | ||||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` |  | ||||||
| -------------------------------------------------------" > grid.configure.summary |  | ||||||
|  |  | ||||||
| GRID_SUMMARY="`cat grid.configure.summary`" |  | ||||||
| AM_SUBST_NOTMAKE([GRID_SUMMARY]) |  | ||||||
| AC_SUBST([GRID_SUMMARY]) |  | ||||||
|  |  | ||||||
| AC_CONFIG_FILES([grid-config], [chmod +x grid-config]) |  | ||||||
| AC_CONFIG_FILES(Makefile) | AC_CONFIG_FILES(Makefile) | ||||||
| AC_CONFIG_FILES(lib/Makefile) | AC_CONFIG_FILES(lib/Makefile) | ||||||
| AC_CONFIG_FILES(tests/Makefile) | AC_CONFIG_FILES(tests/Makefile) | ||||||
| @@ -550,7 +419,6 @@ AC_CONFIG_FILES(tests/forces/Makefile) | |||||||
| AC_CONFIG_FILES(tests/hadrons/Makefile) | AC_CONFIG_FILES(tests/hadrons/Makefile) | ||||||
| AC_CONFIG_FILES(tests/hmc/Makefile) | AC_CONFIG_FILES(tests/hmc/Makefile) | ||||||
| AC_CONFIG_FILES(tests/solver/Makefile) | AC_CONFIG_FILES(tests/solver/Makefile) | ||||||
| AC_CONFIG_FILES(tests/smearing/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) | AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||||
| AC_CONFIG_FILES(tests/testu01/Makefile) | AC_CONFIG_FILES(tests/testu01/Makefile) | ||||||
| AC_CONFIG_FILES(benchmarks/Makefile) | AC_CONFIG_FILES(benchmarks/Makefile) | ||||||
| @@ -558,7 +426,36 @@ AC_CONFIG_FILES(extras/Makefile) | |||||||
| AC_CONFIG_FILES(extras/Hadrons/Makefile) | AC_CONFIG_FILES(extras/Hadrons/Makefile) | ||||||
| AC_OUTPUT | AC_OUTPUT | ||||||
|  |  | ||||||
|  | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  | Summary of configuration for $PACKAGE v$VERSION | ||||||
|  | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  | ----- PLATFORM ---------------------------------------- | ||||||
|  | architecture (build)        : $build_cpu | ||||||
|  | os (build)                  : $build_os | ||||||
|  | architecture (target)       : $target_cpu | ||||||
|  | os (target)                 : $target_os | ||||||
|  | compiler vendor             : ${ax_cv_cxx_compiler_vendor} | ||||||
|  | compiler version            : ${ax_cv_gxx_version} | ||||||
|  | ----- BUILD OPTIONS ----------------------------------- | ||||||
|  | SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | ||||||
|  | Threading                   : ${ac_openmp}  | ||||||
|  | Communications type         : ${comms_type} | ||||||
|  | Default precision           : ${ac_PRECISION} | ||||||
|  | Software FP16 conversion    : ${ac_SFW_FP16} | ||||||
|  | RNG choice                  : ${ac_RNG}  | ||||||
|  | GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | LAPACK                      : ${ac_LAPACK} | ||||||
|  | FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | ||||||
|  | ----- BUILD FLAGS ------------------------------------- | ||||||
|  | CXXFLAGS: | ||||||
|  | `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
|  | LDFLAGS: | ||||||
|  | `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
|  | LIBS: | ||||||
|  | `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
|  | -------------------------------------------------------" > config.summary | ||||||
| echo "" | echo "" | ||||||
| cat grid.configure.summary | cat config.summary | ||||||
| echo "" | echo "" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -162,8 +162,7 @@ void Application::saveParameterFile(const std::string parameterFileName) | |||||||
| sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | ||||||
|  |  | ||||||
| #define DEFINE_MEMPEAK \ | #define DEFINE_MEMPEAK \ | ||||||
| GeneticScheduler<unsigned int>::ObjFunc memPeak = \ | auto memPeak = [this](const std::vector<unsigned int> &program)\ | ||||||
| [this](const std::vector<unsigned int> &program)\ |  | ||||||
| {\ | {\ | ||||||
|     unsigned int memPeak;\ |     unsigned int memPeak;\ | ||||||
|     bool         msg;\ |     bool         msg;\ | ||||||
|   | |||||||
| @@ -41,10 +41,9 @@ using namespace Hadrons; | |||||||
| // constructor ///////////////////////////////////////////////////////////////// | // constructor ///////////////////////////////////////////////////////////////// | ||||||
| Environment::Environment(void) | Environment::Environment(void) | ||||||
| { | { | ||||||
|     dim_ = GridDefaultLatt(); |     nd_ = GridDefaultLatt().size(); | ||||||
|     nd_  = dim_.size(); |  | ||||||
|     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( |     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( | ||||||
|         dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), |         GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||||
|         GridDefaultMpi())); |         GridDefaultMpi())); | ||||||
|     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); |     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); | ||||||
|     auto loc = getGrid()->LocalDimensions(); |     auto loc = getGrid()->LocalDimensions(); | ||||||
| @@ -133,16 +132,6 @@ unsigned int Environment::getNd(void) const | |||||||
|     return nd_; |     return nd_; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::vector<int> Environment::getDim(void) const |  | ||||||
| { |  | ||||||
|     return dim_; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int Environment::getDim(const unsigned int mu) const |  | ||||||
| { |  | ||||||
|     return dim_[mu]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // random number generator ///////////////////////////////////////////////////// | // random number generator ///////////////////////////////////////////////////// | ||||||
| void Environment::setSeed(const std::vector<int> &seed) | void Environment::setSeed(const std::vector<int> &seed) | ||||||
| { | { | ||||||
| @@ -282,21 +271,6 @@ std::string Environment::getModuleType(const std::string name) const | |||||||
|     return getModuleType(getModuleAddress(name)); |     return getModuleType(getModuleAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string Environment::getModuleNamespace(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     std::string type = getModuleType(address), ns; |  | ||||||
|      |  | ||||||
|     auto pos2 = type.rfind("::"); |  | ||||||
|     auto pos1 = type.rfind("::", pos2 - 2); |  | ||||||
|      |  | ||||||
|     return type.substr(pos1 + 2, pos2 - pos1 - 2); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::string Environment::getModuleNamespace(const std::string name) const |  | ||||||
| { |  | ||||||
|     return getModuleNamespace(getModuleAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Environment::hasModule(const unsigned int address) const | bool Environment::hasModule(const unsigned int address) const | ||||||
| { | { | ||||||
|     return (address < module_.size()); |     return (address < module_.size()); | ||||||
| @@ -518,14 +492,7 @@ std::string Environment::getObjectType(const unsigned int address) const | |||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|     { |     { | ||||||
|         if (object_[address].type) |         return typeName(object_[address].type); | ||||||
|         { |  | ||||||
|             return typeName(object_[address].type); |  | ||||||
|         } |  | ||||||
|         else |  | ||||||
|         { |  | ||||||
|             return "<no type>"; |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|     else if (hasObject(address)) |     else if (hasObject(address)) | ||||||
|     { |     { | ||||||
| @@ -565,23 +532,6 @@ Environment::Size Environment::getObjectSize(const std::string name) const | |||||||
|     return getObjectSize(getObjectAddress(name)); |     return getObjectSize(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectModule(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     if (hasObject(address)) |  | ||||||
|     { |  | ||||||
|         return object_[address].module; |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectModule(const std::string name) const |  | ||||||
| { |  | ||||||
|     return getObjectModule(getObjectAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectLs(const unsigned int address) const | unsigned int Environment::getObjectLs(const unsigned int address) const | ||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|   | |||||||
| @@ -106,8 +106,6 @@ public: | |||||||
|     void                    createGrid(const unsigned int Ls); |     void                    createGrid(const unsigned int Ls); | ||||||
|     GridCartesian *         getGrid(const unsigned int Ls = 1) const; |     GridCartesian *         getGrid(const unsigned int Ls = 1) const; | ||||||
|     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; |     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; | ||||||
|     std::vector<int>        getDim(void) const; |  | ||||||
|     int                     getDim(const unsigned int mu) const; |  | ||||||
|     unsigned int            getNd(void) const; |     unsigned int            getNd(void) const; | ||||||
|     // random number generator |     // random number generator | ||||||
|     void                    setSeed(const std::vector<int> &seed); |     void                    setSeed(const std::vector<int> &seed); | ||||||
| @@ -133,8 +131,6 @@ public: | |||||||
|     std::string             getModuleName(const unsigned int address) const; |     std::string             getModuleName(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const unsigned int address) const; |     std::string             getModuleType(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const std::string name) const; |     std::string             getModuleType(const std::string name) const; | ||||||
|     std::string             getModuleNamespace(const unsigned int address) const; |  | ||||||
|     std::string             getModuleNamespace(const std::string name) const; |  | ||||||
|     bool                    hasModule(const unsigned int address) const; |     bool                    hasModule(const unsigned int address) const; | ||||||
|     bool                    hasModule(const std::string name) const; |     bool                    hasModule(const std::string name) const; | ||||||
|     Graph<unsigned int>     makeModuleGraph(void) const; |     Graph<unsigned int>     makeModuleGraph(void) const; | ||||||
| @@ -175,8 +171,6 @@ public: | |||||||
|     std::string             getObjectType(const std::string name) const; |     std::string             getObjectType(const std::string name) const; | ||||||
|     Size                    getObjectSize(const unsigned int address) const; |     Size                    getObjectSize(const unsigned int address) const; | ||||||
|     Size                    getObjectSize(const std::string name) const; |     Size                    getObjectSize(const std::string name) const; | ||||||
|     unsigned int            getObjectModule(const unsigned int address) const; |  | ||||||
|     unsigned int            getObjectModule(const std::string name) const; |  | ||||||
|     unsigned int            getObjectLs(const unsigned int address) const; |     unsigned int            getObjectLs(const unsigned int address) const; | ||||||
|     unsigned int            getObjectLs(const std::string name) const; |     unsigned int            getObjectLs(const std::string name) const; | ||||||
|     bool                    hasObject(const unsigned int address) const; |     bool                    hasObject(const unsigned int address) const; | ||||||
| @@ -187,10 +181,6 @@ public: | |||||||
|     bool                    hasCreatedObject(const std::string name) const; |     bool                    hasCreatedObject(const std::string name) const; | ||||||
|     bool                    isObject5d(const unsigned int address) const; |     bool                    isObject5d(const unsigned int address) const; | ||||||
|     bool                    isObject5d(const std::string name) const; |     bool                    isObject5d(const std::string name) const; | ||||||
|     template <typename T> |  | ||||||
|     bool                    isObjectOfType(const unsigned int address) const; |  | ||||||
|     template <typename T> |  | ||||||
|     bool                    isObjectOfType(const std::string name) const; |  | ||||||
|     Environment::Size       getTotalSize(void) const; |     Environment::Size       getTotalSize(void) const; | ||||||
|     void                    addOwnership(const unsigned int owner, |     void                    addOwnership(const unsigned int owner, | ||||||
|                                          const unsigned int property); |                                          const unsigned int property); | ||||||
| @@ -207,7 +197,6 @@ private: | |||||||
|     bool                                   dryRun_{false}; |     bool                                   dryRun_{false}; | ||||||
|     unsigned int                           traj_, locVol_; |     unsigned int                           traj_, locVol_; | ||||||
|     // grids |     // grids | ||||||
|     std::vector<int>                       dim_; |  | ||||||
|     GridPt                                 grid4d_; |     GridPt                                 grid4d_; | ||||||
|     std::map<unsigned int, GridPt>         grid5d_; |     std::map<unsigned int, GridPt>         grid5d_; | ||||||
|     GridRbPt                               gridRb4d_; |     GridRbPt                               gridRb4d_; | ||||||
| @@ -354,7 +343,7 @@ T * Environment::getObject(const unsigned int address) const | |||||||
|         else |         else | ||||||
|         { |         { | ||||||
|             HADRON_ERROR("object with address " + std::to_string(address) + |             HADRON_ERROR("object with address " + std::to_string(address) + | ||||||
|                          " does not have type '" + typeName(&typeid(T)) + |                          " does not have type '" + typeid(T).name() + | ||||||
|                          "' (has type '" + getObjectType(address) + "')"); |                          "' (has type '" + getObjectType(address) + "')"); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -391,37 +380,6 @@ T * Environment::createLattice(const std::string name) | |||||||
|     return createLattice<T>(getObjectAddress(name)); |     return createLattice<T>(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename T> |  | ||||||
| bool Environment::isObjectOfType(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     if (hasRegisteredObject(address)) |  | ||||||
|     { |  | ||||||
|         if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get())) |  | ||||||
|         { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         else |  | ||||||
|         { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else if (hasObject(address)) |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("object with address " + std::to_string(address) + |  | ||||||
|                      " exists but is not registered"); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename T> |  | ||||||
| bool Environment::isObjectOfType(const std::string name) const |  | ||||||
| { |  | ||||||
|     return isObjectOfType<T>(getObjectAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Environment_hpp_ | #endif // Hadrons_Environment_hpp_ | ||||||
|   | |||||||
| @@ -51,43 +51,23 @@ using Grid::operator<<; | |||||||
|  * error with GCC 5 (clang & GCC 6 compile fine without it). |  * error with GCC 5 (clang & GCC 6 compile fine without it). | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | // FIXME: find a way to do that in a more general fashion | ||||||
| #ifndef FIMPL | #ifndef FIMPL | ||||||
| #define FIMPL WilsonImplR | #define FIMPL WilsonImplR | ||||||
| #endif | #endif | ||||||
| #ifndef SIMPL |  | ||||||
| #define SIMPL ScalarImplCR |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| // type aliases | // type aliases | ||||||
| #define FERM_TYPE_ALIASES(FImpl, suffix)\ | #define TYPE_ALIASES(FImpl, suffix)\ | ||||||
| typedef FermionOperator<FImpl>                       FMat##suffix;             \ | typedef FermionOperator<FImpl>                       FMat##suffix;             \ | ||||||
| typedef typename FImpl::FermionField                 FermionField##suffix;     \ | typedef typename FImpl::FermionField                 FermionField##suffix;     \ | ||||||
| typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | ||||||
| typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | ||||||
| typedef std::vector<typename FImpl::SitePropagator::scalar_object>             \ | typedef typename FImpl::DoubledGaugeField            DoubledGaugeField##suffix;\ | ||||||
|                                                      SlicedPropagator##suffix; | typedef std::function<void(FermionField##suffix &,                             \ | ||||||
|  |  | ||||||
| #define GAUGE_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; |  | ||||||
|  |  | ||||||
| #define SCALAR_TYPE_ALIASES(SImpl, suffix)\ |  | ||||||
| typedef typename SImpl::Field ScalarField##suffix;\ |  | ||||||
| typedef typename SImpl::Field PropagatorField##suffix; |  | ||||||
|  |  | ||||||
| #define SOLVER_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| typedef std::function<void(FermionField##suffix &,\ |  | ||||||
|                       const FermionField##suffix &)> SolverFn##suffix; |                       const FermionField##suffix &)> SolverFn##suffix; | ||||||
|  |  | ||||||
| #define SINK_TYPE_ALIASES(suffix)\ |  | ||||||
| typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix; |  | ||||||
|  |  | ||||||
| #define FGS_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| FERM_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| GAUGE_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| SOLVER_TYPE_ALIASES(FImpl, suffix) |  | ||||||
|  |  | ||||||
| // logger | // logger | ||||||
| class HadronsLogger: public Logger | class HadronsLogger: public Logger | ||||||
| { | { | ||||||
| @@ -165,15 +145,6 @@ std::string typeName(void) | |||||||
|     return typeName(typeIdPt<T>()); |     return typeName(typeIdPt<T>()); | ||||||
| } | } | ||||||
|  |  | ||||||
| // default writers/readers |  | ||||||
| #ifdef HAVE_HDF5 |  | ||||||
| typedef Hdf5Reader CorrReader; |  | ||||||
| typedef Hdf5Writer CorrWriter; |  | ||||||
| #else |  | ||||||
| typedef XmlReader CorrReader; |  | ||||||
| typedef XmlWriter CorrWriter; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Global_hpp_ | #endif // Hadrons_Global_hpp_ | ||||||
|   | |||||||
| @@ -1,25 +1,40 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
| #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSink/Point.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Point.hpp> | #include <Grid/Hadrons/Modules/MSource/Point.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Wall.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/Quark.hpp> | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MAction_DWF_hpp_ | #ifndef Hadrons_DWF_hpp_ | ||||||
| #define Hadrons_MAction_DWF_hpp_ | #define Hadrons_DWF_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -48,15 +48,14 @@ public: | |||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     unsigned int, Ls, |                                     unsigned int, Ls, | ||||||
|                                     double      , mass, |                                     double      , mass, | ||||||
|                                     double      , M5, |                                     double      , M5); | ||||||
|                                     std::string , boundary); |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TDWF: public Module<DWFPar> | class TDWF: public Module<DWFPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TDWF(const std::string name); |     TDWF(const std::string name); | ||||||
| @@ -117,19 +116,14 @@ void TDWF<FImpl>::execute(void) | |||||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " |                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" |                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  |  | ||||||
|                  << std::endl; |  | ||||||
|     env().createGrid(par().Ls); |     env().createGrid(par().Ls); | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &g4     = *env().getGrid(); |     auto &g4     = *env().getGrid(); | ||||||
|     auto &grb4   = *env().getRbGrid(); |     auto &grb4   = *env().getRbGrid(); | ||||||
|     auto &g5     = *env().getGrid(par().Ls); |     auto &g5     = *env().getGrid(par().Ls); | ||||||
|     auto &grb5   = *env().getRbGrid(par().Ls); |     auto &grb5   = *env().getRbGrid(par().Ls); | ||||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); |  | ||||||
|     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); |  | ||||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, |     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||||
|                                                 par().mass, par().M5, |                                                 par().mass, par().M5); | ||||||
|                                                 implParams); |  | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -137,4 +131,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MAction_DWF_hpp_ | #endif // Hadrons_DWF_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MAction_Wilson_hpp_ | #ifndef Hadrons_Wilson_hpp_ | ||||||
| #define Hadrons_MAction_Wilson_hpp_ | #define Hadrons_Wilson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -46,15 +46,14 @@ class WilsonPar: Serializable | |||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     double     , mass, |                                     double     , mass); | ||||||
|                                     std::string, boundary); |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TWilson: public Module<WilsonPar> | class TWilson: public Module<WilsonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TWilson(const std::string name); |     TWilson(const std::string name); | ||||||
| @@ -113,15 +112,10 @@ void TWilson<FImpl>::execute() | |||||||
| { | { | ||||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass |     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; |                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  |  | ||||||
|                  << std::endl; |  | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &grid   = *env().getGrid(); |     auto &grid   = *env().getGrid(); | ||||||
|     auto &gridRb = *env().getRbGrid(); |     auto &gridRb = *env().getRbGrid(); | ||||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); |     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); | ||||||
|     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); |  | ||||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, |  | ||||||
|                                             implParams); |  | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Baryon_hpp_ | #ifndef Hadrons_Baryon_hpp_ | ||||||
| #define Hadrons_MContraction_Baryon_hpp_ | #define Hadrons_Baryon_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3> | |||||||
| class TBaryon: public Module<BaryonPar> | class TBaryon: public Module<BaryonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |     TYPE_ALIASES(FImpl1, 1); | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |     TYPE_ALIASES(FImpl2, 2); | ||||||
|     FERM_TYPE_ALIASES(FImpl3, 3); |     TYPE_ALIASES(FImpl3, 3); | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
| @@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" |                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" | ||||||
|                  << par().q3 << "'" << std::endl; |                  << par().q3 << "'" << std::endl; | ||||||
|      |      | ||||||
|     CorrWriter             writer(par().output); |     XmlWriter             writer(par().output); | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); |     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); | ||||||
| @@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|      |      | ||||||
|     // FIXME: do contractions |     // FIXME: do contractions | ||||||
|      |      | ||||||
|     // write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Baryon_hpp_ | #endif // Hadrons_Baryon_hpp_ | ||||||
|   | |||||||
| @@ -1,144 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
| #define Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                                DiscLoop                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| class DiscLoopPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar, |  | ||||||
|                                     std::string,    q_loop, |  | ||||||
|                                     Gamma::Algebra, gamma, |  | ||||||
|                                     std::string,    output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TDiscLoop: public Module<DiscLoopPar> |  | ||||||
| { |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
|     class Result: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |  | ||||||
|                                         Gamma::Algebra, gamma, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TDiscLoop(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TDiscLoop(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       TDiscLoop implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TDiscLoop<FImpl>::TDiscLoop(const std::string name) |  | ||||||
| : Module<DiscLoopPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TDiscLoop<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q_loop}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TDiscLoop<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TDiscLoop<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|      |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TDiscLoop<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing disconnected loop contraction '" << getName()  |  | ||||||
|                  << "' using '" << par().q_loop << "' with " << par().gamma  |  | ||||||
|                  << " insertion." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter            writer(par().output); |  | ||||||
|     PropagatorField       &q_loop = *env().template getObject<PropagatorField>(par().q_loop); |  | ||||||
|     LatticeComplex        c(env().getGrid()); |  | ||||||
|     Gamma                 gamma(par().gamma); |  | ||||||
|     std::vector<TComplex> buf; |  | ||||||
|     Result                result; |  | ||||||
|  |  | ||||||
|     c = trace(gamma*q_loop); |  | ||||||
|     sliceSum(c, buf, Tp); |  | ||||||
|  |  | ||||||
|     result.gamma = par().gamma; |  | ||||||
|     result.corr.resize(buf.size()); |  | ||||||
|     for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|     { |  | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     write(writer, "disc", result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
| @@ -1,170 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
| #define Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * 3pt contraction with gamma matrix insertion. |  | ||||||
|  * |  | ||||||
|  * Schematic: |  | ||||||
|  * |  | ||||||
|  *             q2           q3 |  | ||||||
|  *        /----<------*------<----¬ |  | ||||||
|  *       /          gamma          \ |  | ||||||
|  *      /                           \ |  | ||||||
|  *   i *                            * f |  | ||||||
|  *      \                          / |  | ||||||
|  *       \                        / |  | ||||||
|  *        \----------->----------/ |  | ||||||
|  *                   q1 |  | ||||||
|  * |  | ||||||
|  *      trace(g5*q1*adj(q2)*g5*gamma*q3) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                               Gamma3pt                                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| class Gamma3ptPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar, |  | ||||||
|                                     std::string,    q1, |  | ||||||
|                                     std::string,    q2, |  | ||||||
|                                     std::string,    q3, |  | ||||||
|                                     Gamma::Algebra, gamma, |  | ||||||
|                                     std::string,    output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| class TGamma3pt: public Module<Gamma3ptPar> |  | ||||||
| { |  | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |  | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |  | ||||||
|     FERM_TYPE_ALIASES(FImpl3, 3); |  | ||||||
|     class Result: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |  | ||||||
|                                         Gamma::Algebra, gamma, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TGamma3pt(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TGamma3pt(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       TGamma3pt implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name) |  | ||||||
| : Module<Gamma3ptPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void) |  | ||||||
| { |  | ||||||
|      |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing 3pt contractions '" << getName() << "' using" |  | ||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "' and '" |  | ||||||
|                  << par().q3 << "', with " << par().gamma << " insertion."  |  | ||||||
|                  << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter            writer(par().output); |  | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |  | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |  | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q3); |  | ||||||
|     LatticeComplex        c(env().getGrid()); |  | ||||||
|     Gamma                 g5(Gamma::Algebra::Gamma5); |  | ||||||
|     Gamma                 gamma(par().gamma); |  | ||||||
|     std::vector<TComplex> buf; |  | ||||||
|     Result                result; |  | ||||||
|  |  | ||||||
|     c = trace(g5*q1*adj(q2)*(g5*gamma)*q3); |  | ||||||
|     sliceSum(c, buf, Tp); |  | ||||||
|  |  | ||||||
|     result.gamma = par().gamma; |  | ||||||
|     result.corr.resize(buf.size()); |  | ||||||
|     for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|     { |  | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     write(writer, "gamma3pt", result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
| @@ -6,10 +6,8 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|         Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -29,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Meson_hpp_ | #ifndef Hadrons_Meson_hpp_ | ||||||
| #define Hadrons_MContraction_Meson_hpp_ | #define Hadrons_Meson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -38,56 +36,32 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Meson contractions |  | ||||||
|  ----------------------------- |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - q1: input propagator 1 (string) |  | ||||||
|  - q2: input propagator 2 (string) |  | ||||||
|  - gammas: gamma products to insert at sink & source, pairs of gamma matrices  |  | ||||||
|            (space-separated strings) in angled brackets (i.e. <g_sink g_src>), |  | ||||||
|            in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>"). |  | ||||||
|  |  | ||||||
|            Special values: "all" - perform all possible contractions. |  | ||||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."), |  | ||||||
|         given as multiples of (2*pi) / L. |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                                TMeson                                       * |  *                                TMeson                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
| typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair; |  | ||||||
|  |  | ||||||
| class MesonPar: Serializable | class MesonPar: Serializable | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, | ||||||
|                                     std::string, q1, |                                     std::string,    q1, | ||||||
|                                     std::string, q2, |                                     std::string,    q2, | ||||||
|                                     std::string, gammas, |                                     std::string,    output, | ||||||
|                                     std::string, sink, |                                     Gamma::Algebra, gammaSource, | ||||||
|                                     std::string, output); |                                     Gamma::Algebra, gammaSink); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| class TMeson: public Module<MesonPar> | class TMeson: public Module<MesonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |     TYPE_ALIASES(FImpl1, 1); | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |     TYPE_ALIASES(FImpl2, 2); | ||||||
|     FERM_TYPE_ALIASES(ScalarImplCR, Scalar); |  | ||||||
|     SINK_TYPE_ALIASES(Scalar); |  | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr); | ||||||
|                                         Gamma::Algebra, gamma_snk, |  | ||||||
|                                         Gamma::Algebra, gamma_src, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |     }; | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
| @@ -97,7 +71,6 @@ public: | |||||||
|     // dependencies/products |     // dependencies/products | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|     virtual void parseGammaString(std::vector<GammaPair> &gammaList); |  | ||||||
|     // execution |     // execution | ||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
| @@ -117,7 +90,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name) | |||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> input = {par().q1, par().q2, par().sink}; |     std::vector<std::string> input = {par().q1, par().q2}; | ||||||
|      |      | ||||||
|     return input; |     return input; | ||||||
| } | } | ||||||
| @@ -130,35 +103,7 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | |||||||
|     return output; |     return output; | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> |  | ||||||
| void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) |  | ||||||
| { |  | ||||||
|     gammaList.clear(); |  | ||||||
|     // Determine gamma matrices to insert at source/sink. |  | ||||||
|     if (par().gammas.compare("all") == 0) |  | ||||||
|     { |  | ||||||
|         // Do all contractions. |  | ||||||
|         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) |  | ||||||
|         { |  | ||||||
|             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) |  | ||||||
|             { |  | ||||||
|                 gammaList.push_back(std::make_pair((Gamma::Algebra)i,  |  | ||||||
|                                                    (Gamma::Algebra)j)); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         // Parse individual contractions from input string. |  | ||||||
|         gammaList = strToVec<GammaPair>(par().gammas); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| #define mesonConnected(q1, q2, gSnk, gSrc) \ |  | ||||||
| (g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| void TMeson<FImpl1, FImpl2>::execute(void) | void TMeson<FImpl1, FImpl2>::execute(void) | ||||||
| { | { | ||||||
| @@ -166,73 +111,21 @@ void TMeson<FImpl1, FImpl2>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" |                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|      |      | ||||||
|     CorrWriter             writer(par().output); |     XmlWriter             writer(par().output); | ||||||
|     std::vector<TComplex>  buf; |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     std::vector<Result>    result; |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     Gamma                  g5(Gamma::Algebra::Gamma5); |     LatticeComplex        c(env().getGrid()); | ||||||
|     std::vector<GammaPair> gammaList; |     Gamma                 gSrc(par().gammaSource), gSnk(par().gammaSink); | ||||||
|     int                    nt = env().getDim(Tp); |     Gamma                 g5(Gamma::Algebra::Gamma5); | ||||||
|  |     std::vector<TComplex> buf; | ||||||
|  |     Result                result; | ||||||
|      |      | ||||||
|     parseGammaString(gammaList); |     c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5); | ||||||
|     result.resize(gammaList.size()); |     sliceSum(c, buf, Tp); | ||||||
|     for (unsigned int i = 0; i < result.size(); ++i) |     result.corr.resize(buf.size()); | ||||||
|  |     for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|     { |     { | ||||||
|         result[i].gamma_snk = gammaList[i].first; |         result.corr[t] = TensorRemove(buf[t]); | ||||||
|         result[i].gamma_src = gammaList[i].second; |  | ||||||
|         result[i].corr.resize(nt); |  | ||||||
|     } |  | ||||||
|     if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and |  | ||||||
|         env().template isObjectOfType<SlicedPropagator2>(par().q2)) |  | ||||||
|     { |  | ||||||
|         SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1); |  | ||||||
|         SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "(propagator already sinked)" << std::endl; |  | ||||||
|         for (unsigned int i = 0; i < result.size(); ++i) |  | ||||||
|         { |  | ||||||
|             Gamma gSnk(gammaList[i].first); |  | ||||||
|             Gamma gSrc(gammaList[i].second); |  | ||||||
|              |  | ||||||
|             for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|             { |  | ||||||
|                 result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         PropagatorField1 &q1   = *env().template getObject<PropagatorField1>(par().q1); |  | ||||||
|         PropagatorField2 &q2   = *env().template getObject<PropagatorField2>(par().q2); |  | ||||||
|         LatticeComplex   c(env().getGrid()); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; |  | ||||||
|         for (unsigned int i = 0; i < result.size(); ++i) |  | ||||||
|         { |  | ||||||
|             Gamma       gSnk(gammaList[i].first); |  | ||||||
|             Gamma       gSrc(gammaList[i].second); |  | ||||||
|             std::string ns; |  | ||||||
|                  |  | ||||||
|             ns = env().getModuleNamespace(env().getObjectModule(par().sink)); |  | ||||||
|             if (ns == "MSource") |  | ||||||
|             { |  | ||||||
|                 PropagatorField1 &sink = |  | ||||||
|                     *env().template getObject<PropagatorField1>(par().sink); |  | ||||||
|                  |  | ||||||
|                 c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); |  | ||||||
|                 sliceSum(c, buf, Tp); |  | ||||||
|             } |  | ||||||
|             else if (ns == "MSink") |  | ||||||
|             { |  | ||||||
|                 SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink); |  | ||||||
|                  |  | ||||||
|                 c   = trace(mesonConnected(q1, q2, gSnk, gSrc)); |  | ||||||
|                 buf = sink(c); |  | ||||||
|             } |  | ||||||
|             for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|             { |  | ||||||
|                 result[i].corr[t] = TensorRemove(buf[t]); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|     write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
| @@ -241,4 +134,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Meson_hpp_ | #endif // Hadrons_Meson_hpp_ | ||||||
|   | |||||||
| @@ -1,114 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonian                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| /******************************************************************************* |  | ||||||
|  * Utilities for contractions involving the Weak Hamiltonian. |  | ||||||
|  ******************************************************************************/ |  | ||||||
| //// Sum and store correlator. |  | ||||||
| #define MAKE_DIAG(exp, buf, res, n)\ |  | ||||||
| sliceSum(exp, buf, Tp);\ |  | ||||||
| res.name = (n);\ |  | ||||||
| res.corr.resize(buf.size());\ |  | ||||||
| for (unsigned int t = 0; t < buf.size(); ++t)\ |  | ||||||
| {\ |  | ||||||
|     res.corr[t] = TensorRemove(buf[t]);\ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //// Contraction of mu index: use 'mu' variable in exp. |  | ||||||
| #define SUM_MU(buf,exp)\ |  | ||||||
| buf = zero;\ |  | ||||||
| for (unsigned int mu = 0; mu < ndim; ++mu)\ |  | ||||||
| {\ |  | ||||||
|     buf += exp;\ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| enum  |  | ||||||
| { |  | ||||||
|   i_V = 0, |  | ||||||
|   i_A = 1, |  | ||||||
|   n_i = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class WeakHamiltonianPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar, |  | ||||||
|                                     std::string, q1, |  | ||||||
|                                     std::string, q2, |  | ||||||
|                                     std::string, q3, |  | ||||||
|                                     std::string, q4, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #define MAKE_WEAK_MODULE(modname)\ |  | ||||||
| class T##modname: public Module<WeakHamiltonianPar>\ |  | ||||||
| {\ |  | ||||||
| public:\ |  | ||||||
|     FERM_TYPE_ALIASES(FIMPL,)\ |  | ||||||
|     class Result: Serializable\ |  | ||||||
|     {\ |  | ||||||
|     public:\ |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\ |  | ||||||
|                                         std::string, name,\ |  | ||||||
|                                         std::vector<Complex>, corr);\ |  | ||||||
|     };\ |  | ||||||
| public:\ |  | ||||||
|     /* constructor */ \ |  | ||||||
|     T##modname(const std::string name);\ |  | ||||||
|     /* destructor */ \ |  | ||||||
|     virtual ~T##modname(void) = default;\ |  | ||||||
|     /* dependency relation */ \ |  | ||||||
|     virtual std::vector<std::string> getInput(void);\ |  | ||||||
|     virtual std::vector<std::string> getOutput(void);\ |  | ||||||
|     /* setup */ \ |  | ||||||
|     virtual void setup(void);\ |  | ||||||
|     /* execution */ \ |  | ||||||
|     virtual void execute(void);\ |  | ||||||
|     std::vector<std::string> VA_label = {"V", "A"};\ |  | ||||||
| };\ |  | ||||||
| MODULE_REGISTER_NS(modname, T##modname, MContraction); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
| @@ -1,137 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian current-current contractions, Eye-type. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical |  | ||||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematics:        q4                 |                   |  | ||||||
|  *                  /-<-¬                |                              |  | ||||||
|  *                 /     \               |             q2           q3 |  | ||||||
|  *                 \     /               |        /----<------*------<----¬                         |  | ||||||
|  *            q2    \   /    q3          |       /          /-*-¬          \ |  | ||||||
|  *       /-----<-----* *-----<----¬      |      /          /     \          \ |  | ||||||
|  *    i *            H_W           * f   |   i *           \     /  q4      * f |  | ||||||
|  *       \                        /      |      \           \->-/          /    |  | ||||||
|  *        \                      /       |       \                        /        |  | ||||||
|  *         \---------->---------/        |        \----------->----------/         |  | ||||||
|  *                   q1                  |                   q1                   |  | ||||||
|  *                                       | |  | ||||||
|  *                Saucer (S)             |                  Eye (E) |  | ||||||
|  *  |  | ||||||
|  * S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2]) |  | ||||||
|  * E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2]) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                  TWeakHamiltonianEye implementation                        * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakHamiltonianEye::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakHamiltonianEye::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianEye::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianEye::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_eye_diag); |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp1(env().getGrid()); |  | ||||||
|     LatticeComplex               tmp2(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> S_body(ndim, tmp1); |  | ||||||
|     std::vector<PropagatorField> S_loop(ndim, tmp1); |  | ||||||
|     std::vector<LatticeComplex>  E_body(ndim, tmp2); |  | ||||||
|     std::vector<LatticeComplex>  E_loop(ndim, tmp2); |  | ||||||
|  |  | ||||||
|     // Setup for S-type contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu])); |  | ||||||
|         S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform S-type contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu])) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S") |  | ||||||
|  |  | ||||||
|     // Recycle sub-expressions for E-type contractions. |  | ||||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         E_body[mu] = trace(S_body[mu]); |  | ||||||
|         E_loop[mu] = trace(S_loop[mu]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform E-type contractions. |  | ||||||
|     SUM_MU(expbuf, E_body[mu]*E_loop[mu]) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_Eye", result); |  | ||||||
| } |  | ||||||
| @@ -1,58 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonianEye                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     S_diag = 0, |  | ||||||
|     E_diag = 1, |  | ||||||
|     n_eye_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Saucer and Eye subdiagram contractions. |  | ||||||
| #define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma) |  | ||||||
| #define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakHamiltonianEye) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
| @@ -1,139 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian current-current contractions, Non-Eye-type. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical |  | ||||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematic:      |  | ||||||
|  *            q2             q3          |           q2              q3 |  | ||||||
|  *          /--<--¬       /--<--¬        |        /--<--¬         /--<--¬        |  | ||||||
|  *         /       \     /       \       |       /       \       /       \       |  | ||||||
|  *        /         \   /         \      |      /         \     /         \      |  | ||||||
|  *       /           \ /           \     |     /           \   /           \     |  | ||||||
|  *    i *             * H_W         *  f |  i *             * * H_W         * f  |  | ||||||
|  *      \             *             |    |     \           /   \           / |  | ||||||
|  *       \           / \           /     |      \         /     \         /     |  | ||||||
|  *        \         /   \         /      |       \       /       \       /   |  | ||||||
|  *         \       /     \       /       |        \-->--/         \-->--/       |  | ||||||
|  *          \-->--/       \-->--/        |          q1               q4  |  | ||||||
|  *            q1             q4          | |  | ||||||
|  *                Connected (C)          |                 Wing (W) |  | ||||||
|  * |  | ||||||
|  * C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu]) |  | ||||||
|  * W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu]) |  | ||||||
|  *  |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                  TWeakHamiltonianNonEye implementation                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakHamiltonianNonEye::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianNonEye::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianNonEye::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|      |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_noneye_diag);  |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp1(env().getGrid()); |  | ||||||
|     LatticeComplex               tmp2(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> C_i_side_loop(ndim, tmp1); |  | ||||||
|     std::vector<PropagatorField> C_f_side_loop(ndim, tmp1); |  | ||||||
|     std::vector<LatticeComplex>  W_i_side_loop(ndim, tmp2); |  | ||||||
|     std::vector<LatticeComplex>  W_f_side_loop(ndim, tmp2); |  | ||||||
|  |  | ||||||
|     // Setup for C-type contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu])); |  | ||||||
|         C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform C-type contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu])) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C") |  | ||||||
|  |  | ||||||
|     // Recycle sub-expressions for W-type contractions. |  | ||||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         W_i_side_loop[mu] = trace(C_i_side_loop[mu]); |  | ||||||
|         W_f_side_loop[mu] = trace(C_f_side_loop[mu]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform W-type contractions. |  | ||||||
|     SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu]) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_NonEye", result); |  | ||||||
| } |  | ||||||
| @@ -1,57 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonianNonEye                              * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     W_diag = 0, |  | ||||||
|     C_diag = 1, |  | ||||||
|     n_noneye_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Wing and Connected subdiagram contractions |  | ||||||
| #define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakHamiltonianNonEye) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
| @@ -1,135 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian + current contractions, disconnected topology for neutral  |  | ||||||
|  * mesons. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by operators Q_1,...,10 of the dS=1 Weak |  | ||||||
|  * Hamiltonian in the physical basis and an additional current J (see e.g.  |  | ||||||
|  * Fig 11 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematic: |  | ||||||
|  *                         |  | ||||||
|  *           q2          q4             q3 |  | ||||||
|  *       /--<--¬     /---<--¬       /---<--¬ |  | ||||||
|  *     /         \ /         \     /        \ |  | ||||||
|  *  i *           * H_W      |  J *          * f |  | ||||||
|  *     \         / \         /     \        / |  | ||||||
|  *      \--->---/   \-------/       \------/ |  | ||||||
|  *          q1  |  | ||||||
|  *  |  | ||||||
|  * options |  | ||||||
|  * - q1: input propagator 1 (string) |  | ||||||
|  * - q2: input propagator 2 (string) |  | ||||||
|  * - q3: input propagator 3 (string), assumed to be sequential propagator  |  | ||||||
|  * - q4: input propagator 4 (string), assumed to be a loop |  | ||||||
|  *  |  | ||||||
|  * type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5) |  | ||||||
|  * type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /******************************************************************************* |  | ||||||
|  *                  TWeakNeutral4ptDisc implementation                         * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakNeutral4ptDisc::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakNeutral4ptDisc::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakNeutral4ptDisc::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_neut_disc_diag); |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> meson(ndim, tmp); |  | ||||||
|     std::vector<PropagatorField> loop(ndim, tmp); |  | ||||||
|     LatticeComplex               curr(env().getGrid()); |  | ||||||
|  |  | ||||||
|     // Setup for type 1 contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu])); |  | ||||||
|         loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|     curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5)); |  | ||||||
|  |  | ||||||
|     // Perform type 1 contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(meson[mu]*loop[mu])) |  | ||||||
|     expbuf *= curr; |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1") |  | ||||||
|  |  | ||||||
|     // Perform type 2 contractions. |  | ||||||
|     SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu])) |  | ||||||
|     expbuf *= curr; |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_disc0", result); |  | ||||||
| } |  | ||||||
| @@ -1,59 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakNeutral4ptDisc                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     neut_disc_1_diag = 0, |  | ||||||
|     neut_disc_2_diag = 1, |  | ||||||
|     n_neut_disc_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Neutral 4pt disconnected subdiagram contractions. |  | ||||||
| #define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) |  | ||||||
| #define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma) |  | ||||||
| #define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma)) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakNeutral4ptDisc) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | |||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| void TLoad::execute(void) | void TLoad::execute(void) | ||||||
| { | { | ||||||
|     FieldMetaData  header; |     NerscField  header; | ||||||
|     std::string fileName = par().file + "." |     std::string fileName = par().file + "." | ||||||
|                            + std::to_string(env().getTrajectory()); |                            + std::to_string(env().getTrajectory()); | ||||||
|      |      | ||||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | |||||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); |     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||||
|     NerscIO::readConfiguration(U, header, fileName); |     NerscIO::readConfiguration(U, header, fileName); | ||||||
|     LOG(Message) << "NERSC header:" << std::endl; |     LOG(Message) << "NERSC header:" << std::endl; | ||||||
|     dump_meta_data(header, LOG(Message)); |     dump_nersc_header(header, LOG(Message)); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Load_hpp_ | #ifndef Hadrons_Load_hpp_ | ||||||
| #define Hadrons_MGauge_Load_hpp_ | #define Hadrons_Load_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Load_hpp_ | #endif // Hadrons_Load_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Random_hpp_ | #ifndef Hadrons_Random_hpp_ | ||||||
| #define Hadrons_MGauge_Random_hpp_ | #define Hadrons_Random_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Random_hpp_ | #endif // Hadrons_Random_hpp_ | ||||||
|   | |||||||
| @@ -1,88 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MGauge; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                  TStochEm implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TStochEm::TStochEm(const std::string name) |  | ||||||
| : Module<StochEmPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TStochEm::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TStochEm::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TStochEm::setup(void) |  | ||||||
| { |  | ||||||
|     if (!env().hasRegisteredObject("_" + getName() + "_weight")) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<EmComp>("_" + getName() + "_weight"); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<EmField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TStochEm::execute(void) |  | ||||||
| { |  | ||||||
|     PhotonR photon(par().gauge, par().zmScheme); |  | ||||||
|     EmField &a = *env().createLattice<EmField>(getName()); |  | ||||||
|     EmComp  *w; |  | ||||||
|      |  | ||||||
|     if (!env().hasCreatedObject("_" + getName() + "_weight")) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching stochatic EM potential weight (gauge: " |  | ||||||
|                      << par().gauge << ", zero-mode scheme: " |  | ||||||
|                      << par().zmScheme << ")..." << std::endl; |  | ||||||
|         w = env().createLattice<EmComp>("_" + getName() + "_weight"); |  | ||||||
|         photon.StochasticWeight(*w); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         w = env().getObject<EmComp>("_" + getName() + "_weight"); |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Generating stochatic EM potential..." << std::endl; |  | ||||||
|     photon.StochasticField(a, *env().get4dRng(), *w); |  | ||||||
| } |  | ||||||
| @@ -1,75 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef Hadrons_MGauge_StochEm_hpp_ |  | ||||||
| #define Hadrons_MGauge_StochEm_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         StochEm                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MGauge) |  | ||||||
|  |  | ||||||
| class StochEmPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, |  | ||||||
|                                     PhotonR::Gauge,    gauge, |  | ||||||
|                                     PhotonR::ZmScheme, zmScheme); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TStochEm: public Module<StochEmPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     typedef PhotonR::GaugeField     EmField; |  | ||||||
|     typedef PhotonR::GaugeLinkField EmComp; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TStochEm(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TStochEm(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_StochEm_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Unit_hpp_ | #ifndef Hadrons_Unit_hpp_ | ||||||
| #define Hadrons_MGauge_Unit_hpp_ | #define Hadrons_Unit_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Unit_hpp_ | #endif // Hadrons_Unit_hpp_ | ||||||
|   | |||||||
| @@ -1,132 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
| #define Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Noise loop propagator |  | ||||||
|  ----------------------------- |  | ||||||
|  * loop_x = q_x * adj(eta_x) |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - q = Result of inversion on noise source. |  | ||||||
|  - eta = noise source. |  | ||||||
|  |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         NoiseLoop                                          * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MLoop) |  | ||||||
|  |  | ||||||
| class NoiseLoopPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar, |  | ||||||
|                                     std::string, q, |  | ||||||
|                                     std::string, eta); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TNoiseLoop: public Module<NoiseLoopPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TNoiseLoop(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TNoiseLoop(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                 TNoiseLoop implementation                                  * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TNoiseLoop<FImpl>::TNoiseLoop(const std::string name) |  | ||||||
| : Module<NoiseLoopPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TNoiseLoop<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q, par().eta}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TNoiseLoop<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TNoiseLoop<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     PropagatorField &loop = *env().template createLattice<PropagatorField>(getName()); |  | ||||||
|     PropagatorField &q    = *env().template getObject<PropagatorField>(par().q); |  | ||||||
|     PropagatorField &eta  = *env().template getObject<PropagatorField>(par().eta); |  | ||||||
|     loop = q*adj(eta); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
| @@ -1,226 +0,0 @@ | |||||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MScalar; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                     TChargedProp implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TChargedProp::TChargedProp(const std::string name) |  | ||||||
| : Module<ChargedPropPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TChargedProp::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().source, par().emField}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TChargedProp::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TChargedProp::setup(void) |  | ||||||
| { |  | ||||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); |  | ||||||
|     phaseName_.clear(); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         phaseName_.push_back("_shiftphase_" + std::to_string(mu)); |  | ||||||
|     } |  | ||||||
|     GFSrcName_ = "_" + getName() + "_DinvSrc"; |  | ||||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     if (!env().hasRegisteredObject(phaseName_[0])) |  | ||||||
|     { |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             env().registerLattice<ScalarField>(phaseName_[mu]); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (!env().hasRegisteredObject(GFSrcName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(GFSrcName_); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<ScalarField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TChargedProp::execute(void) |  | ||||||
| { |  | ||||||
|     // CACHING ANALYTIC EXPRESSIONS |  | ||||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); |  | ||||||
|     Complex     ci(0.0,1.0); |  | ||||||
|     FFT         fft(env().getGrid()); |  | ||||||
|      |  | ||||||
|     // cache free scalar propagator |  | ||||||
|     if (!env().hasCreatedObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching momentum space free scalar propagator" |  | ||||||
|                      << " (mass= " << par().mass << ")..." << std::endl; |  | ||||||
|         freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_); |  | ||||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     // cache G*F*src |  | ||||||
|     if (!env().hasCreatedObject(GFSrcName_)) |  | ||||||
|          |  | ||||||
|     { |  | ||||||
|         GFSrc_ = env().createLattice<ScalarField>(GFSrcName_); |  | ||||||
|         fft.FFT_all_dim(*GFSrc_, source, FFT::forward); |  | ||||||
|         *GFSrc_ = (*freeMomProp_)*(*GFSrc_); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         GFSrc_ = env().getObject<ScalarField>(GFSrcName_); |  | ||||||
|     } |  | ||||||
|     // cache phases |  | ||||||
|     if (!env().hasCreatedObject(phaseName_[0])) |  | ||||||
|     { |  | ||||||
|         std::vector<int> &l = env().getGrid()->_fdimensions; |  | ||||||
|          |  | ||||||
|         LOG(Message) << "Caching shift phases..." << std::endl; |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             Real    twoPiL = M_PI*2./l[mu]; |  | ||||||
|              |  | ||||||
|             phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu])); |  | ||||||
|             LatticeCoordinate(*(phase_[mu]), mu); |  | ||||||
|             *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             phase_.push_back(env().getObject<ScalarField>(phaseName_[mu])); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // PROPAGATOR CALCULATION |  | ||||||
|     LOG(Message) << "Computing charged scalar propagator" |  | ||||||
|                  << " (mass= " << par().mass |  | ||||||
|                  << ", charge= " << par().charge << ")..." << std::endl; |  | ||||||
|      |  | ||||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); |  | ||||||
|     ScalarField buf(env().getGrid()); |  | ||||||
|     ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; |  | ||||||
|     double      q = par().charge; |  | ||||||
|      |  | ||||||
|     // G*F*Src |  | ||||||
|     prop = GFSrc; |  | ||||||
|  |  | ||||||
|     // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) |  | ||||||
|     buf = GFSrc; |  | ||||||
|     momD1(buf, fft); |  | ||||||
|     buf = G*buf; |  | ||||||
|     prop = prop - q*buf; |  | ||||||
|  |  | ||||||
|     // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) |  | ||||||
|     momD1(buf, fft); |  | ||||||
|     prop = prop + q*q*G*buf; |  | ||||||
|  |  | ||||||
|     // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) |  | ||||||
|     buf = GFSrc; |  | ||||||
|     momD2(buf, fft); |  | ||||||
|     prop = prop - q*q*G*buf; |  | ||||||
|  |  | ||||||
|     // final FT |  | ||||||
|     fft.FFT_all_dim(prop, prop, FFT::backward); |  | ||||||
|      |  | ||||||
|     // OUTPUT IF NECESSARY |  | ||||||
|     if (!par().output.empty()) |  | ||||||
|     { |  | ||||||
|         std::string           filename = par().output + "." + |  | ||||||
|                                          std::to_string(env().getTrajectory()); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "Saving zero-momentum projection to '" |  | ||||||
|                      << filename << "'..." << std::endl; |  | ||||||
|          |  | ||||||
|         CorrWriter            writer(filename); |  | ||||||
|         std::vector<TComplex> vecBuf; |  | ||||||
|         std::vector<Complex>  result; |  | ||||||
|          |  | ||||||
|         sliceSum(prop, vecBuf, Tp); |  | ||||||
|         result.resize(vecBuf.size()); |  | ||||||
|         for (unsigned int t = 0; t < vecBuf.size(); ++t) |  | ||||||
|         { |  | ||||||
|             result[t] = TensorRemove(vecBuf[t]); |  | ||||||
|         } |  | ||||||
|         write(writer, "charge", q); |  | ||||||
|         write(writer, "prop", result); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void TChargedProp::momD1(ScalarField &s, FFT &fft) |  | ||||||
| { |  | ||||||
|     EmField     &A = *env().getObject<EmField>(par().emField); |  | ||||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), |  | ||||||
|                 Amu(env().getGrid()); |  | ||||||
|     Complex     ci(0.0,1.0); |  | ||||||
|  |  | ||||||
|     result = zero; |  | ||||||
|  |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = (*phase_[mu])*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); |  | ||||||
|         buf = Amu*buf; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result - ci*buf; |  | ||||||
|     } |  | ||||||
|     fft.FFT_all_dim(s, s, FFT::backward); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = Amu*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + ci*adj(*phase_[mu])*buf; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     s = result; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void TChargedProp::momD2(ScalarField &s, FFT &fft) |  | ||||||
| { |  | ||||||
|     EmField     &A = *env().getObject<EmField>(par().emField); |  | ||||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), |  | ||||||
|                 Amu(env().getGrid()); |  | ||||||
|  |  | ||||||
|     result = zero; |  | ||||||
|      |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = (*phase_[mu])*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); |  | ||||||
|         buf = Amu*Amu*buf; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + .5*buf; |  | ||||||
|     } |  | ||||||
|     fft.FFT_all_dim(s, s, FFT::backward); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu);         |  | ||||||
|         buf = Amu*Amu*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + .5*adj(*phase_[mu])*buf; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     s = result; |  | ||||||
| } |  | ||||||
| @@ -1,61 +0,0 @@ | |||||||
| #ifndef Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
| #define Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       Charged scalar propagator                            * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MScalar) |  | ||||||
|  |  | ||||||
| class ChargedPropPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, |  | ||||||
|                                     std::string, emField, |  | ||||||
|                                     std::string, source, |  | ||||||
|                                     double,      mass, |  | ||||||
|                                     double,      charge, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TChargedProp: public Module<ChargedPropPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     SCALAR_TYPE_ALIASES(SIMPL,); |  | ||||||
|     typedef PhotonR::GaugeField     EmField; |  | ||||||
|     typedef PhotonR::GaugeLinkField EmComp; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TChargedProp(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TChargedProp(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| private: |  | ||||||
|     void momD1(ScalarField &s, FFT &fft); |  | ||||||
|     void momD2(ScalarField &s, FFT &fft); |  | ||||||
| private: |  | ||||||
|     std::string                freeMomPropName_, GFSrcName_; |  | ||||||
|     std::vector<std::string>   phaseName_; |  | ||||||
|     ScalarField                *freeMomProp_, *GFSrc_; |  | ||||||
|     std::vector<ScalarField *> phase_; |  | ||||||
|     EmField                    *A; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
| @@ -1,79 +0,0 @@ | |||||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MScalar; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                        TFreeProp implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TFreeProp::TFreeProp(const std::string name) |  | ||||||
| : Module<FreePropPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TFreeProp::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().source}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TFreeProp::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TFreeProp::setup(void) |  | ||||||
| { |  | ||||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); |  | ||||||
|      |  | ||||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<ScalarField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TFreeProp::execute(void) |  | ||||||
| { |  | ||||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); |  | ||||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); |  | ||||||
|     ScalarField *freeMomProp; |  | ||||||
|  |  | ||||||
|     if (!env().hasCreatedObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching momentum space free scalar propagator" |  | ||||||
|                      << " (mass= " << par().mass << ")..." << std::endl; |  | ||||||
|         freeMomProp = env().createLattice<ScalarField>(freeMomPropName_); |  | ||||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         freeMomProp = env().getObject<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Computing free scalar propagator..." << std::endl; |  | ||||||
|     SIMPL::FreePropagator(source, prop, *freeMomProp); |  | ||||||
|      |  | ||||||
|     if (!par().output.empty()) |  | ||||||
|     { |  | ||||||
|         TextWriter            writer(par().output + "." + |  | ||||||
|                                      std::to_string(env().getTrajectory())); |  | ||||||
|         std::vector<TComplex> buf; |  | ||||||
|         std::vector<Complex>  result; |  | ||||||
|          |  | ||||||
|         sliceSum(prop, buf, Tp); |  | ||||||
|         result.resize(buf.size()); |  | ||||||
|         for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|         { |  | ||||||
|             result[t] = TensorRemove(buf[t]); |  | ||||||
|         } |  | ||||||
|         write(writer, "prop", result); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,50 +0,0 @@ | |||||||
| #ifndef Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
| #define Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                               FreeProp                                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MScalar) |  | ||||||
|  |  | ||||||
| class FreePropPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, |  | ||||||
|                                     std::string, source, |  | ||||||
|                                     double,      mass, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TFreeProp: public Module<FreePropPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     SCALAR_TYPE_ALIASES(SIMPL,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TFreeProp(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TFreeProp(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| private: |  | ||||||
|     std::string freeMomPropName_; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
| @@ -1,6 +0,0 @@ | |||||||
| #ifndef Hadrons_Scalar_hpp_ |  | ||||||
| #define Hadrons_Scalar_hpp_ |  | ||||||
|  |  | ||||||
| #define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) |  | ||||||
|  |  | ||||||
| #endif // Hadrons_Scalar_hpp_ |  | ||||||
| @@ -1,114 +0,0 @@ | |||||||
| #ifndef Hadrons_MSink_Point_hpp_ |  | ||||||
| #define Hadrons_MSink_Point_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                                   Point                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MSink) |  | ||||||
|  |  | ||||||
| class PointPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, |  | ||||||
|                                     std::string, mom); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TPoint: public Module<PointPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
|     SINK_TYPE_ALIASES(); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TPoint(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TPoint(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSink); |  | ||||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                          TPoint implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TPoint<FImpl>::TPoint(const std::string name) |  | ||||||
| : Module<PointPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TPoint<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TPoint<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TPoint<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     unsigned int size; |  | ||||||
|      |  | ||||||
|     size = env().template lattice4dSize<LatticeComplex>(); |  | ||||||
|     env().registerObject(getName(), size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TPoint<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     std::vector<Real> p = strToVec<Real>(par().mom); |  | ||||||
|     LatticeComplex    ph(env().getGrid()), coor(env().getGrid()); |  | ||||||
|     Complex           i(0.0,1.0); |  | ||||||
|      |  | ||||||
|     LOG(Message) << "Setting up point sink function for momentum [" |  | ||||||
|                  << par().mom << "]" << std::endl; |  | ||||||
|     ph = zero; |  | ||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |  | ||||||
|     { |  | ||||||
|         LatticeCoordinate(coor, mu); |  | ||||||
|         ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; |  | ||||||
|     } |  | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |  | ||||||
|     auto sink = [ph](const PropagatorField &field) |  | ||||||
|     { |  | ||||||
|         SlicedPropagator res; |  | ||||||
|         PropagatorField  tmp = ph*field; |  | ||||||
|          |  | ||||||
|         sliceSum(tmp, res, Tp); |  | ||||||
|          |  | ||||||
|         return res; |  | ||||||
|     }; |  | ||||||
|     env().setObject(getName(), new SinkFn(sink)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MSink_Point_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSolver_RBPrecCG_hpp_ | #ifndef Hadrons_RBPrecCG_hpp_ | ||||||
| #define Hadrons_MSolver_RBPrecCG_hpp_ | #define Hadrons_RBPrecCG_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -53,7 +53,7 @@ template <typename FImpl> | |||||||
| class TRBPrecCG: public Module<RBPrecCGPar> | class TRBPrecCG: public Module<RBPrecCGPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TRBPrecCG(const std::string name); |     TRBPrecCG(const std::string name); | ||||||
| @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSolver_RBPrecCG_hpp_ | #endif // Hadrons_RBPrecCG_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_Point_hpp_ | #ifndef Hadrons_Point_hpp_ | ||||||
| #define Hadrons_MSource_Point_hpp_ | #define Hadrons_Point_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,7 +63,7 @@ template <typename FImpl> | |||||||
| class TPoint: public Module<PointPar> | class TPoint: public Module<PointPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TPoint(const std::string name); |     TPoint(const std::string name); | ||||||
| @@ -78,8 +78,7 @@ public: | |||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSource); | MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource); | ||||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TPoint template implementation                       * |  *                       TPoint template implementation                       * | ||||||
| @@ -133,4 +132,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_Point_hpp_ | #endif // Hadrons_Point_hpp_ | ||||||
|   | |||||||
| @@ -6,7 +6,6 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
| @@ -28,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_SeqGamma_hpp_ | #ifndef Hadrons_SeqGamma_hpp_ | ||||||
| #define Hadrons_MSource_SeqGamma_hpp_ | #define Hadrons_SeqGamma_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -72,7 +71,7 @@ template <typename FImpl> | |||||||
| class TSeqGamma: public Module<SeqGammaPar> | class TSeqGamma: public Module<SeqGammaPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TSeqGamma(const std::string name); |     TSeqGamma(const std::string name); | ||||||
| @@ -150,9 +149,9 @@ void TSeqGamma<FImpl>::execute(void) | |||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||||
|     { |     { | ||||||
|         LatticeCoordinate(coor, mu); |         LatticeCoordinate(coor, mu); | ||||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); |         ph = ph + p[mu]*coor; | ||||||
|     } |     } | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |     ph = exp(i*ph); | ||||||
|     LatticeCoordinate(t, Tp); |     LatticeCoordinate(t, Tp); | ||||||
|     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); |     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); | ||||||
| } | } | ||||||
| @@ -161,4 +160,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_SeqGamma_hpp_ | #endif // Hadrons_SeqGamma_hpp_ | ||||||
|   | |||||||
| @@ -1,147 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MSource/Wall.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_WallSource_hpp_ |  | ||||||
| #define Hadrons_MSource_WallSource_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Wall source |  | ||||||
|  ----------------------------- |  | ||||||
|  * src_x = delta(x_3 - tW) * exp(i x.mom) |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - tW: source timeslice (integer) |  | ||||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.") |  | ||||||
|   |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         Wall                                               * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MSource) |  | ||||||
|  |  | ||||||
| class WallPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar, |  | ||||||
|                                     unsigned int, tW, |  | ||||||
|                                     std::string, mom); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TWall: public Module<WallPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TWall(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TWall(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                 TWall implementation                                       * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TWall<FImpl>::TWall(const std::string name) |  | ||||||
| : Module<WallPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TWall<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TWall<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TWall<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TWall<FImpl>::execute(void) |  | ||||||
| {     |  | ||||||
|     LOG(Message) << "Generating wall source at t = " << par().tW  |  | ||||||
|                  << " with momentum " << par().mom << std::endl; |  | ||||||
|      |  | ||||||
|     PropagatorField &src = *env().template createLattice<PropagatorField>(getName()); |  | ||||||
|     Lattice<iScalar<vInteger>> t(env().getGrid()); |  | ||||||
|     LatticeComplex             ph(env().getGrid()), coor(env().getGrid()); |  | ||||||
|     std::vector<Real>          p; |  | ||||||
|     Complex                    i(0.0,1.0); |  | ||||||
|      |  | ||||||
|     p  = strToVec<Real>(par().mom); |  | ||||||
|     ph = zero; |  | ||||||
|     for(unsigned int mu = 0; mu < Nd; mu++) |  | ||||||
|     { |  | ||||||
|         LatticeCoordinate(coor, mu); |  | ||||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); |  | ||||||
|     } |  | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |  | ||||||
|     LatticeCoordinate(t, Tp); |  | ||||||
|     src = 1.; |  | ||||||
|     src = where((t == par().tW), src*ph, 0.*src); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_WallSource_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_Z2_hpp_ | #ifndef Hadrons_Z2_hpp_ | ||||||
| #define Hadrons_MSource_Z2_hpp_ | #define Hadrons_Z2_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -67,7 +67,7 @@ template <typename FImpl> | |||||||
| class TZ2: public Module<Z2Par> | class TZ2: public Module<Z2Par> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TZ2(const std::string name); |     TZ2(const std::string name); | ||||||
| @@ -82,8 +82,7 @@ public: | |||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Z2,       TZ2<FIMPL>,        MSource); | MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource); | ||||||
| MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TZ2 template implementation                          * |  *                       TZ2 template implementation                          * | ||||||
| @@ -149,4 +148,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_Z2_hpp_ | #endif // Hadrons_Z2_hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,34 @@ | |||||||
| #ifndef Hadrons_MFermion_GaugeProp_hpp_ | /*************************************************************************************
 | ||||||
| #define Hadrons_MFermion_GaugeProp_hpp_ | 
 | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  | 
 | ||||||
|  | Source file: extras/Hadrons/Modules/Quark.hpp | ||||||
|  | 
 | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  | 
 | ||||||
|  | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  | 
 | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  | 
 | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  | 
 | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | 
 | ||||||
|  | #ifndef Hadrons_Quark_hpp_ | ||||||
|  | #define Hadrons_Quark_hpp_ | ||||||
| 
 | 
 | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -8,29 +37,27 @@ | |||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                                GaugeProp                                   * |  *                               TQuark                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MFermion) | class QuarkPar: Serializable | ||||||
| 
 |  | ||||||
| class GaugePropPar: Serializable |  | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, | ||||||
|                                     std::string, source, |                                     std::string, source, | ||||||
|                                     std::string, solver); |                                     std::string, solver); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TGaugeProp: public Module<GaugePropPar> | class TQuark: public Module<QuarkPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor
 |     // constructor
 | ||||||
|     TGaugeProp(const std::string name); |     TQuark(const std::string name); | ||||||
|     // destructor
 |     // destructor
 | ||||||
|     virtual ~TGaugeProp(void) = default; |     virtual ~TQuark(void) = default; | ||||||
|     // dependency relation
 |     // dependencies/products
 | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|     // setup
 |     // setup
 | ||||||
| @@ -42,20 +69,20 @@ private: | |||||||
|     SolverFn     *solver_{nullptr}; |     SolverFn     *solver_{nullptr}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion); | MODULE_REGISTER(Quark, TQuark<FIMPL>); | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                      TGaugeProp implementation                             * |  *                          TQuark implementation                             * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| // constructor /////////////////////////////////////////////////////////////////
 | // constructor /////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| TGaugeProp<FImpl>::TGaugeProp(const std::string name) | TQuark<FImpl>::TQuark(const std::string name) | ||||||
| : Module<GaugePropPar>(name) | : Module(name) | ||||||
| {} | {} | ||||||
| 
 | 
 | ||||||
| // dependencies/products ///////////////////////////////////////////////////////
 | // dependencies/products ///////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | std::vector<std::string> TQuark<FImpl>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> in = {par().source, par().solver}; |     std::vector<std::string> in = {par().source, par().solver}; | ||||||
|      |      | ||||||
| @@ -63,7 +90,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | std::vector<std::string> TQuark<FImpl>::getOutput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> out = {getName(), getName() + "_5d"}; |     std::vector<std::string> out = {getName(), getName() + "_5d"}; | ||||||
|      |      | ||||||
| @@ -72,7 +99,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | |||||||
| 
 | 
 | ||||||
| // setup ///////////////////////////////////////////////////////////////////////
 | // setup ///////////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TGaugeProp<FImpl>::setup(void) | void TQuark<FImpl>::setup(void) | ||||||
| { | { | ||||||
|     Ls_ = env().getObjectLs(par().solver); |     Ls_ = env().getObjectLs(par().solver); | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |     env().template registerLattice<PropagatorField>(getName()); | ||||||
| @@ -84,13 +111,13 @@ void TGaugeProp<FImpl>::setup(void) | |||||||
| 
 | 
 | ||||||
| // execution ///////////////////////////////////////////////////////////////////
 | // execution ///////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TGaugeProp<FImpl>::execute(void) | void TQuark<FImpl>::execute(void) | ||||||
| { | { | ||||||
|     LOG(Message) << "Computing quark propagator '" << getName() << "'" |     LOG(Message) << "Computing quark propagator '" << getName() << "'" | ||||||
|     << std::endl; |                  << std::endl; | ||||||
|      |      | ||||||
|     FermionField    source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), |     FermionField    source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), | ||||||
|     tmp(env().getGrid()); |                     tmp(env().getGrid()); | ||||||
|     std::string     propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); |     std::string     propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); | ||||||
|     PropagatorField &prop    = *env().template createLattice<PropagatorField>(propName); |     PropagatorField &prop    = *env().template createLattice<PropagatorField>(propName); | ||||||
|     PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source); |     PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source); | ||||||
| @@ -101,7 +128,7 @@ void TGaugeProp<FImpl>::execute(void) | |||||||
|     } |     } | ||||||
|      |      | ||||||
|     LOG(Message) << "Inverting using solver '" << par().solver |     LOG(Message) << "Inverting using solver '" << par().solver | ||||||
|     << "' on source '" << par().source << "'" << std::endl; |                  << "' on source '" << par().source << "'" << std::endl; | ||||||
|     for (unsigned int s = 0; s < Ns; ++s) |     for (unsigned int s = 0; s < Ns; ++s) | ||||||
|     for (unsigned int c = 0; c < Nc; ++c) |     for (unsigned int c = 0; c < Nc; ++c) | ||||||
|     { |     { | ||||||
| @@ -143,18 +170,16 @@ void TGaugeProp<FImpl>::execute(void) | |||||||
|         if (Ls_ > 1) |         if (Ls_ > 1) | ||||||
|         { |         { | ||||||
|             PropagatorField &p4d = |             PropagatorField &p4d = | ||||||
|             *env().template getObject<PropagatorField>(getName()); |                 *env().template getObject<PropagatorField>(getName()); | ||||||
|              |              | ||||||
|             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); |             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); | ||||||
|             axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); |             axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1); | ||||||
|             ExtractSlice(tmp, sol, 0, 0); |             ExtractSlice(tmp, sol, 0, 0); | ||||||
|             FermToProp(p4d, tmp, s, c); |             FermToProp(p4d, tmp, s, c); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| END_MODULE_NAMESPACE |  | ||||||
| 
 |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| #endif // Hadrons_MFermion_GaugeProp_hpp_
 | #endif // Hadrons_Quark_hpp_
 | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #ifndef Hadrons____FILEBASENAME____hpp_ | ||||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #define Hadrons____FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #endif // Hadrons____FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #ifndef Hadrons____FILEBASENAME____hpp_ | ||||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #define Hadrons____FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #endif // Hadrons____FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,38 +1,19 @@ | |||||||
| modules_cc =\ | modules_cc =\ | ||||||
|   Modules/MContraction/WeakHamiltonianEye.cc \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianNonEye.cc \ |  | ||||||
|   Modules/MContraction/WeakNeutral4ptDisc.cc \ |  | ||||||
|   Modules/MGauge/Load.cc \ |   Modules/MGauge/Load.cc \ | ||||||
|   Modules/MGauge/Random.cc \ |   Modules/MGauge/Random.cc \ | ||||||
|   Modules/MGauge/StochEm.cc \ |   Modules/MGauge/Unit.cc | ||||||
|   Modules/MGauge/Unit.cc \ |  | ||||||
|   Modules/MScalar/ChargedProp.cc \ |  | ||||||
|   Modules/MScalar/FreeProp.cc |  | ||||||
|  |  | ||||||
| modules_hpp =\ | modules_hpp =\ | ||||||
|   Modules/MAction/DWF.hpp \ |   Modules/MAction/DWF.hpp \ | ||||||
|   Modules/MAction/Wilson.hpp \ |   Modules/MAction/Wilson.hpp \ | ||||||
|   Modules/MContraction/Baryon.hpp \ |   Modules/MContraction/Baryon.hpp \ | ||||||
|   Modules/MContraction/DiscLoop.hpp \ |  | ||||||
|   Modules/MContraction/Gamma3pt.hpp \ |  | ||||||
|   Modules/MContraction/Meson.hpp \ |   Modules/MContraction/Meson.hpp \ | ||||||
|   Modules/MContraction/WeakHamiltonian.hpp \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianEye.hpp \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianNonEye.hpp \ |  | ||||||
|   Modules/MContraction/WeakNeutral4ptDisc.hpp \ |  | ||||||
|   Modules/MFermion/GaugeProp.hpp \ |  | ||||||
|   Modules/MGauge/Load.hpp \ |   Modules/MGauge/Load.hpp \ | ||||||
|   Modules/MGauge/Random.hpp \ |   Modules/MGauge/Random.hpp \ | ||||||
|   Modules/MGauge/StochEm.hpp \ |  | ||||||
|   Modules/MGauge/Unit.hpp \ |   Modules/MGauge/Unit.hpp \ | ||||||
|   Modules/MLoop/NoiseLoop.hpp \ |  | ||||||
|   Modules/MScalar/ChargedProp.hpp \ |  | ||||||
|   Modules/MScalar/FreeProp.hpp \ |  | ||||||
|   Modules/MScalar/Scalar.hpp \ |  | ||||||
|   Modules/MSink/Point.hpp \ |  | ||||||
|   Modules/MSolver/RBPrecCG.hpp \ |   Modules/MSolver/RBPrecCG.hpp \ | ||||||
|   Modules/MSource/Point.hpp \ |   Modules/MSource/Point.hpp \ | ||||||
|   Modules/MSource/SeqGamma.hpp \ |   Modules/MSource/SeqGamma.hpp \ | ||||||
|   Modules/MSource/Wall.hpp \ |   Modules/MSource/Z2.hpp \ | ||||||
|   Modules/MSource/Z2.hpp |   Modules/Quark.hpp | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,11 +0,0 @@ | |||||||
| #include <qed-fvol/Global.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace QCD; |  | ||||||
| using namespace QedFVol; |  | ||||||
|  |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); |  | ||||||
| @@ -1,42 +0,0 @@ | |||||||
| #ifndef QedFVol_Global_hpp_ |  | ||||||
| #define QedFVol_Global_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
| #define BEGIN_QEDFVOL_NAMESPACE \ |  | ||||||
| namespace Grid {\ |  | ||||||
| using namespace QCD;\ |  | ||||||
| namespace QedFVol {\ |  | ||||||
| using Grid::operator<<; |  | ||||||
| #define END_QEDFVOL_NAMESPACE }} |  | ||||||
|  |  | ||||||
| /* the 'using Grid::operator<<;' statement prevents a very nasty compilation |  | ||||||
|  * error with GCC (clang compiles fine without it). |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| BEGIN_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| class QedFVolLogger: public Logger |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, |  | ||||||
|                                                   GridLogColours, "BLACK"){}; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #define LOG(channel) std::cout << QedFVolLog##channel |  | ||||||
| #define QEDFVOL_ERROR(msg)\ |  | ||||||
| LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ |  | ||||||
|            << __LINE__ << ")" << std::endl;\ |  | ||||||
| abort(); |  | ||||||
|  |  | ||||||
| #define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; |  | ||||||
|  |  | ||||||
| extern QedFVolLogger QedFVolLogError; |  | ||||||
| extern QedFVolLogger QedFVolLogWarning; |  | ||||||
| extern QedFVolLogger QedFVolLogMessage; |  | ||||||
| extern QedFVolLogger QedFVolLogIterative; |  | ||||||
| extern QedFVolLogger QedFVolLogDebug; |  | ||||||
|  |  | ||||||
| END_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // QedFVol_Global_hpp_ |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| AM_CXXFLAGS += -I$(top_srcdir)/extras |  | ||||||
|  |  | ||||||
| bin_PROGRAMS = qed-fvol |  | ||||||
|  |  | ||||||
| qed_fvol_SOURCES =   \ |  | ||||||
|     qed-fvol.cc      \ |  | ||||||
|     Global.cc |  | ||||||
|  |  | ||||||
| qed_fvol_LDADD   = -lGrid |  | ||||||
| @@ -1,265 +0,0 @@ | |||||||
| #ifndef QEDFVOL_WILSONLOOPS_H |  | ||||||
| #define QEDFVOL_WILSONLOOPS_H |  | ||||||
|  |  | ||||||
| #include <Global.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| template <class Gimpl> class NewWilsonLoops : public Gimpl { |  | ||||||
| public: |  | ||||||
|   INHERIT_GIMPL_TYPES(Gimpl); |  | ||||||
|  |  | ||||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; |  | ||||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // directed plaquette oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U, |  | ||||||
|                            const int mu, const int nu) { |  | ||||||
|     // Annoyingly, must use either scope resolution to find dependent base |  | ||||||
|     // class, |  | ||||||
|     // or this-> ; there is no "this" in a static method. This forces explicit |  | ||||||
|     // Gimpl scope |  | ||||||
|     // resolution throughout the usage in this file, and rather defeats the |  | ||||||
|     // purpose of deriving |  | ||||||
|     // from Gimpl. |  | ||||||
|     plaq = Gimpl::CovShiftBackward( |  | ||||||
|         U[mu], mu, Gimpl::CovShiftBackward( |  | ||||||
|                        U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // trace of directed plaquette oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void traceDirPlaquette(LatticeComplex &plaq, |  | ||||||
|                                 const std::vector<GaugeMat> &U, const int mu, |  | ||||||
|                                 const int nu) { |  | ||||||
|     GaugeMat sp(U[0]._grid); |  | ||||||
|     dirPlaquette(sp, U, mu, nu); |  | ||||||
|     plaq = trace(sp); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void sitePlaquette(LatticeComplex &Plaq, |  | ||||||
|                             const std::vector<GaugeMat> &U) { |  | ||||||
|     LatticeComplex sitePlaq(U[0]._grid); |  | ||||||
|     Plaq = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceDirPlaquette(sitePlaq, U, mu, nu); |  | ||||||
|         Plaq = Plaq + sitePlaq; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumPlaquette(const GaugeLorentz &Umu) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Plaq(Umu._grid); |  | ||||||
|  |  | ||||||
|     sitePlaquette(Plaq, U); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Plaq); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgPlaquette(const GaugeLorentz &Umu) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumplaq = sumPlaquette(Umu); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; |  | ||||||
|     return sumplaq / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // Wilson loop of size (R1, R2), oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U, |  | ||||||
|                            const int Rmu, const int Rnu, |  | ||||||
|                            const int mu, const int nu) { |  | ||||||
|     wl = U[nu]; |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rnu-1; i++){ |  | ||||||
|       wl = Gimpl::CovShiftForward(U[nu], nu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rmu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftForward(U[mu], mu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rnu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftBackward(U[nu], nu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rmu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftBackward(U[mu], mu, wl); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // trace of Wilson Loop oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void traceWilsonLoop(LatticeComplex &wl, |  | ||||||
|                                 const std::vector<GaugeMat> &U, |  | ||||||
|                                 const int Rmu, const int Rnu, |  | ||||||
|                                 const int mu, const int nu) { |  | ||||||
|     GaugeMat sp(U[0]._grid); |  | ||||||
|     wilsonLoop(sp, U, Rmu, Rnu, mu, nu); |  | ||||||
|     wl = trace(sp); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over planes of Wilson loop with length R1 |  | ||||||
|   // in the time direction |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteTimelikeWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|  |  | ||||||
|     int ndim = U[0]._grid->_ndimension; |  | ||||||
|  |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int nu = 0; nu < ndim - 1; nu++) { |  | ||||||
|       traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); |  | ||||||
|       Wl = Wl + siteWl; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum Wilson loop over all planes orthogonal to the time direction |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteSpatialWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|  |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of timelike Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteTimelikeWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of spatial Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteSpatialWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * ndim * (ndim - 1); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of timelike Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * (ndim - 1); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of spatial Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * (ndim - 1) * (ndim - 2); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| END_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // QEDFVOL_WILSONLOOPS_H |  | ||||||
| @@ -1,88 +0,0 @@ | |||||||
| #include <Global.hpp> |  | ||||||
| #include <WilsonLoops.h> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace QCD; |  | ||||||
| using namespace QedFVol; |  | ||||||
|  |  | ||||||
| typedef PeriodicGaugeImpl<QedGimplR>    QedPeriodicGimplR; |  | ||||||
| typedef PhotonR::GaugeField             EmField; |  | ||||||
| typedef PhotonR::GaugeLinkField         EmComp; |  | ||||||
|  |  | ||||||
| const int NCONFIGS = 10; |  | ||||||
| const int NWILSON = 10; |  | ||||||
|  |  | ||||||
| int main(int argc, char *argv[]) |  | ||||||
| { |  | ||||||
|     // parse command line |  | ||||||
|     std::string parameterFileName; |  | ||||||
|      |  | ||||||
|     if (argc < 2) |  | ||||||
|     { |  | ||||||
|         std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]"; |  | ||||||
|         std::cerr << std::endl; |  | ||||||
|         std::exit(EXIT_FAILURE); |  | ||||||
|     } |  | ||||||
|     parameterFileName = argv[1]; |  | ||||||
|      |  | ||||||
|     // initialization |  | ||||||
|     Grid_init(&argc, &argv); |  | ||||||
|     QedFVolLogError.Active(GridLogError.isActive()); |  | ||||||
|     QedFVolLogWarning.Active(GridLogWarning.isActive()); |  | ||||||
|     QedFVolLogMessage.Active(GridLogMessage.isActive()); |  | ||||||
|     QedFVolLogIterative.Active(GridLogIterative.isActive()); |  | ||||||
|     QedFVolLogDebug.Active(GridLogDebug.isActive()); |  | ||||||
|     LOG(Message) << "Grid initialized" << std::endl; |  | ||||||
|      |  | ||||||
|     // QED stuff |  | ||||||
|     std::vector<int> latt_size   = GridDefaultLatt(); |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|     GridCartesian    grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|     GridParallelRNG  pRNG(&grid); |  | ||||||
|     PhotonR          photon(PhotonR::Gauge::feynman, |  | ||||||
|                             PhotonR::ZmScheme::qedL); |  | ||||||
|     EmField          a(&grid); |  | ||||||
|     EmField          expA(&grid); |  | ||||||
|  |  | ||||||
|     Complex imag_unit(0, 1); |  | ||||||
|  |  | ||||||
|     Real wlA; |  | ||||||
|     std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); |  | ||||||
|  |  | ||||||
|     pRNG.SeedRandomDevice(); |  | ||||||
|  |  | ||||||
|     LOG(Message) << "Wilson loop calculation beginning" << std::endl; |  | ||||||
|     for(int ic = 0; ic < NCONFIGS; ic++){ |  | ||||||
|         LOG(Message) << "Configuration " << ic <<std::endl; |  | ||||||
|         photon.StochasticField(a, pRNG); |  | ||||||
|  |  | ||||||
|         // Exponentiate photon field |  | ||||||
|         expA = exp(imag_unit*a); |  | ||||||
|  |  | ||||||
|         // Calculate Wilson loops |  | ||||||
|         for(int iw=1; iw<=NWILSON; iw++){ |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlAvg[iw-1] -= 2*log(wlA); |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlTime[iw-1] -= 2*log(wlA); |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlSpace[iw-1] -= 2*log(wlA); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Wilson loop calculation completed" << std::endl; |  | ||||||
|      |  | ||||||
|     // Calculate Wilson loops |  | ||||||
|     for(int iw=1; iw<=10; iw++){ |  | ||||||
|         LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // epilogue |  | ||||||
|     LOG(Message) << "Grid is finalizing now" << std::endl; |  | ||||||
|     Grid_finalize(); |  | ||||||
|      |  | ||||||
|     return EXIT_SUCCESS; |  | ||||||
| } |  | ||||||
| @@ -20,17 +20,4 @@ The simple testcase in this directory is the submitted bug report that encapsula | |||||||
| problem. The test case works with icpc and with clang++, but fails consistently on g++ | problem. The test case works with icpc and with clang++, but fails consistently on g++ | ||||||
| current variants. | current variants. | ||||||
|  |  | ||||||
| Peter | Peter | ||||||
|  |  | ||||||
|  |  | ||||||
| ************ |  | ||||||
|  |  | ||||||
| Second GCC bug reported, see Issue 100. |  | ||||||
|  |  | ||||||
| https://wandbox.org/permlink/tzssJza6R9XnqANw |  | ||||||
| https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652 |  | ||||||
|  |  | ||||||
| Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the |  | ||||||
| CI test suite. The limitations of Travis runtime limits & weak cores are being shown. |  | ||||||
|  |  | ||||||
| Travis uses 5.4.1 for g++-5. |  | ||||||
| @@ -1,86 +0,0 @@ | |||||||
| #! /bin/sh |  | ||||||
|  |  | ||||||
| prefix=@prefix@ |  | ||||||
| exec_prefix=@exec_prefix@ |  | ||||||
| includedir=@includedir@ |  | ||||||
|  |  | ||||||
| usage() |  | ||||||
| { |  | ||||||
|   cat <<EOF |  | ||||||
| Usage: grid-config [OPTION] |  | ||||||
|  |  | ||||||
| Known values for OPTION are: |  | ||||||
|  |  | ||||||
|   --prefix     show Grid installation prefix |  | ||||||
|   --cxxflags   print pre-processor and compiler flags |  | ||||||
|   --ldflags    print library linking flags |  | ||||||
|   --libs       print library linking information |  | ||||||
|   --summary    print full build summary |  | ||||||
|   --help       display this help and exit |  | ||||||
|   --version    output version information |  | ||||||
|   --git        print git revision |  | ||||||
|  |  | ||||||
| EOF |  | ||||||
|    |  | ||||||
|   exit $1 |  | ||||||
| } |  | ||||||
|  |  | ||||||
| if test $# -eq 0; then |  | ||||||
|   usage 1 |  | ||||||
| fi |  | ||||||
|  |  | ||||||
| cflags=false |  | ||||||
| libs=false |  | ||||||
|  |  | ||||||
| while test $# -gt 0; do |  | ||||||
|   case "$1" in |  | ||||||
|     -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; |  | ||||||
|     *) optarg= ;; |  | ||||||
|   esac |  | ||||||
|    |  | ||||||
|   case "$1" in |  | ||||||
|     --prefix) |  | ||||||
|       echo $prefix |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --version) |  | ||||||
|       echo @VERSION@ |  | ||||||
|       exit 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --git) |  | ||||||
|       echo "@GRID_BRANCH@ @GRID_SHA@" |  | ||||||
|       exit 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --help) |  | ||||||
|       usage 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --cxxflags) |  | ||||||
|       echo @GRID_CXXFLAGS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --ldflags) |  | ||||||
|       echo @GRID_LDFLAGS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --libs) |  | ||||||
|       echo @GRID_LIBS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --summary) |  | ||||||
|       echo "" |  | ||||||
|       echo "@GRID_SUMMARY@" |  | ||||||
|       echo "" |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     *) |  | ||||||
|       usage |  | ||||||
|       exit 1 |  | ||||||
|     ;; |  | ||||||
|   esac |  | ||||||
|   shift |  | ||||||
| done |  | ||||||
|  |  | ||||||
| exit 0 |  | ||||||
| @@ -1,37 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/DisableWarnings.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef DISABLE_WARNINGS_H |  | ||||||
| #define DISABLE_WARNINGS_H |  | ||||||
|  |  | ||||||
|  //disables and intel compiler specific warning (in json.hpp) |  | ||||||
| #pragma warning disable 488   |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
| @@ -41,9 +41,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/GridQCDcore.h> | #include <Grid/GridQCDcore.h> | ||||||
| #include <Grid/qcd/action/Action.h> | #include <Grid/qcd/action/Action.h> | ||||||
| #include <Grid/qcd/utils/GaugeFix.h> |  | ||||||
| #include <Grid/qcd/smearing/Smearing.h> | #include <Grid/qcd/smearing/Smearing.h> | ||||||
| #include <Grid/parallelIO/MetaData.h> |  | ||||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -38,7 +38,28 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_BASE_H | #ifndef GRID_BASE_H | ||||||
| #define GRID_BASE_H | #define GRID_BASE_H | ||||||
|  |  | ||||||
| #include <Grid/GridStd.h> | /////////////////// | ||||||
|  | // Std C++ dependencies | ||||||
|  | /////////////////// | ||||||
|  | #include <cassert> | ||||||
|  | #include <complex> | ||||||
|  | #include <vector> | ||||||
|  | #include <iostream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <random> | ||||||
|  | #include <functional> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <signal.h> | ||||||
|  | #include <ctime> | ||||||
|  | #include <sys/time.h> | ||||||
|  | #include <chrono> | ||||||
|  |  | ||||||
|  | /////////////////// | ||||||
|  | // Grid headers | ||||||
|  | /////////////////// | ||||||
|  | #include "Config.h" | ||||||
|  |  | ||||||
| #include <Grid/perfmon/Timer.h> | #include <Grid/perfmon/Timer.h> | ||||||
| #include <Grid/perfmon/PerfCount.h> | #include <Grid/perfmon/PerfCount.h> | ||||||
|   | |||||||
| @@ -1,29 +0,0 @@ | |||||||
| #ifndef GRID_STD_H |  | ||||||
| #define GRID_STD_H |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Std C++ dependencies |  | ||||||
| /////////////////// |  | ||||||
| #include <cassert> |  | ||||||
| #include <complex> |  | ||||||
| #include <vector> |  | ||||||
| #include <string> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <random> |  | ||||||
| #include <functional> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <signal.h> |  | ||||||
| #include <ctime> |  | ||||||
| #include <sys/time.h> |  | ||||||
| #include <chrono> |  | ||||||
| #include <zlib.h> |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Grid config |  | ||||||
| /////////////////// |  | ||||||
| #include "Config.h" |  | ||||||
|  |  | ||||||
| #endif /* GRID_STD_H */ |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| #pragma once |  | ||||||
| #if defined __GNUC__ |  | ||||||
| #pragma GCC diagnostic push |  | ||||||
| #pragma GCC diagnostic ignored "-Wdeprecated-declarations" |  | ||||||
| #endif |  | ||||||
| #include <Grid/Eigen/Dense> |  | ||||||
| #if defined __GNUC__ |  | ||||||
| #pragma GCC diagnostic pop |  | ||||||
| #endif |  | ||||||
| @@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3 | |||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
| if BUILD_COMMS_MPIT | if BUILD_COMMS_MPI3L | ||||||
|   extra_sources+=communicator/Communicator_mpit.cc |   extra_sources+=communicator/Communicator_mpi3_leader.cc | ||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -235,7 +235,7 @@ namespace Grid { | |||||||
| 	Field tmp(in._grid); | 	Field tmp(in._grid); | ||||||
|  |  | ||||||
| 	_Mat.MeooeDag(in,tmp); | 	_Mat.MeooeDag(in,tmp); | ||||||
|         _Mat.MooeeInvDag(tmp,out); | 	_Mat.MooeeInvDag(tmp,out); | ||||||
| 	_Mat.MeooeDag(out,tmp); | 	_Mat.MeooeDag(out,tmp); | ||||||
|  |  | ||||||
| 	_Mat.MooeeDag(in,out); | 	_Mat.MooeeDag(in,out); | ||||||
|   | |||||||
| @@ -197,9 +197,8 @@ namespace Grid { | |||||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { |     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||||
|  |  | ||||||
|       GridBase *grid=in._grid; |       GridBase *grid=in._grid; | ||||||
|  | //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | ||||||
|       // std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | ||||||
|       //std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; |  | ||||||
|  |  | ||||||
|       int vol=grid->gSites(); |       int vol=grid->gSites(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,7 +16,7 @@ | |||||||
| #define INCLUDED_ALG_REMEZ_H | #define INCLUDED_ALG_REMEZ_H | ||||||
|  |  | ||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <Grid/GridStd.h> | #include <Config.h> | ||||||
|  |  | ||||||
| #ifdef HAVE_LIBGMP | #ifdef HAVE_LIBGMP | ||||||
| #include "bigfloat.h" | #include "bigfloat.h" | ||||||
|   | |||||||
							
								
								
									
										137
									
								
								lib/algorithms/densematrix/DenseMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								lib/algorithms/densematrix/DenseMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/DenseMatrix.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef GRID_DENSE_MATRIX_H | ||||||
|  | #define GRID_DENSE_MATRIX_H | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     // Matrix untils | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | template<class T> using DenseVector = std::vector<T>; | ||||||
|  | template<class T> using DenseMatrix = DenseVector<DenseVector<T> >; | ||||||
|  |  | ||||||
|  | template<class T> void Size(DenseVector<T> & vec, int &N)  | ||||||
|  | {  | ||||||
|  |   N= vec.size(); | ||||||
|  | } | ||||||
|  | template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)  | ||||||
|  | {  | ||||||
|  |   N= mat.size(); | ||||||
|  |   M= mat[0].size(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)  | ||||||
|  | {  | ||||||
|  |   int M; Size(mat,N,M); | ||||||
|  |   assert(N==M); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class T> void Resize(DenseVector<T > & mat, int N) {  | ||||||
|  |   mat.resize(N); | ||||||
|  | } | ||||||
|  | template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {  | ||||||
|  |   mat.resize(N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     mat[i].resize(M); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | template<class T> void Fill(DenseMatrix<T> & mat, T&val) {  | ||||||
|  |   int N,M; | ||||||
|  |   Size(mat,N,M); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |   for(int j=0;j<M;j++){ | ||||||
|  |     mat[i][j] = val; | ||||||
|  |   }} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Transpose of a matrix **/ | ||||||
|  | template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){ | ||||||
|  |   int N,M; | ||||||
|  |   Size(mat,N,M); | ||||||
|  |   DenseMatrix<T> C; Resize(C,M,N); | ||||||
|  |   for(int i=0;i<M;i++){ | ||||||
|  |   for(int j=0;j<N;j++){ | ||||||
|  |     C[i][j] = mat[j][i]; | ||||||
|  |   }}  | ||||||
|  |   return C; | ||||||
|  | } | ||||||
|  | /** Set DenseMatrix to unit matrix **/ | ||||||
|  | template<class T> void Unity(DenseMatrix<T> &A){ | ||||||
|  |   int N;  SizeSquare(A,N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     for(int j=0;j<N;j++){ | ||||||
|  |       if ( i==j ) A[i][j] = 1; | ||||||
|  |       else        A[i][j] = 0; | ||||||
|  |     }  | ||||||
|  |   }  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Add C * I to matrix **/ | ||||||
|  | template<class T> | ||||||
|  | void PlusUnit(DenseMatrix<T> & A,T c){ | ||||||
|  |   int dim;  SizeSquare(A,dim); | ||||||
|  |   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** return the Hermitian conjugate of matrix **/ | ||||||
|  | template<class T> | ||||||
|  | DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){ | ||||||
|  |  | ||||||
|  |   int dim; SizeSquare(mat,dim); | ||||||
|  |  | ||||||
|  |   DenseMatrix<T> C; Resize(C,dim,dim); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<dim;i++){ | ||||||
|  |     for(int j=0;j<dim;j++){ | ||||||
|  |       C[i][j] = conj(mat[j][i]); | ||||||
|  |     }  | ||||||
|  |   }  | ||||||
|  |   return C; | ||||||
|  | } | ||||||
|  | /**Get a square submatrix**/ | ||||||
|  | template <class T> | ||||||
|  | DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end) | ||||||
|  | { | ||||||
|  |   DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st); | ||||||
|  |  | ||||||
|  |   for(int i = row_st; i<row_end; i++){ | ||||||
|  |   for(int j = col_st; j<col_end; j++){ | ||||||
|  |     H[i-row_st][j-col_st]=A[i][j]; | ||||||
|  |   }} | ||||||
|  |   return H; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #include "Householder.h" | ||||||
|  | #include "Francis.h" | ||||||
|  |  | ||||||
|  | #endif | ||||||
|  |  | ||||||
							
								
								
									
										525
									
								
								lib/algorithms/densematrix/Francis.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										525
									
								
								lib/algorithms/densematrix/Francis.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,525 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/Francis.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef FRANCIS_H | ||||||
|  | #define FRANCIS_H | ||||||
|  |  | ||||||
|  | #include <cstdlib> | ||||||
|  | #include <string> | ||||||
|  | #include <cmath> | ||||||
|  | #include <iostream> | ||||||
|  | #include <sstream> | ||||||
|  | #include <stdexcept> | ||||||
|  | #include <fstream> | ||||||
|  | #include <complex> | ||||||
|  | #include <algorithm> | ||||||
|  |  | ||||||
|  | //#include <timer.h> | ||||||
|  | //#include <lapacke.h> | ||||||
|  | //#include <Eigen/Dense> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||||
|  | template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |   Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. | ||||||
|  | H = | ||||||
|  |       x  x  x  x  x  x  x  x  x | ||||||
|  |       x  x  x  x  x  x  x  x  x | ||||||
|  |       0  x  x  x  x  x  x  x  x | ||||||
|  |       0  0  x  x  x  x  x  x  x | ||||||
|  |       0  0  0  x  x  x  x  x  x | ||||||
|  |       0  0  0  0  x  x  x  x  x | ||||||
|  |       0  0  0  0  0  x  x  x  x | ||||||
|  |       0  0  0  0  0  0  x  x  x | ||||||
|  |       0  0  0  0  0  0  0  x  x | ||||||
|  | Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. | ||||||
|  | **/ | ||||||
|  | template <class T> | ||||||
|  | int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||||
|  | { | ||||||
|  |   DenseMatrix<T> H = Hin;  | ||||||
|  |  | ||||||
|  |   int N ; SizeSquare(H,N); | ||||||
|  |   int M = N; | ||||||
|  |  | ||||||
|  |   Fill(evals,0); | ||||||
|  |   Fill(evecs,0); | ||||||
|  |  | ||||||
|  |   T s,t,x=0,y=0,z=0; | ||||||
|  |   T u,d; | ||||||
|  |   T apd,amd,bc; | ||||||
|  |   DenseVector<T> p(N,0); | ||||||
|  |   T nrm = Norm(H);    ///DenseMatrix Norm | ||||||
|  |   int n, m; | ||||||
|  |   int e = 0; | ||||||
|  |   int it = 0; | ||||||
|  |   int tot_it = 0; | ||||||
|  |   int l = 0; | ||||||
|  |   int r = 0; | ||||||
|  |   DenseMatrix<T> P; Resize(P,N,N); Unity(P); | ||||||
|  |   DenseVector<int> trows(N,0); | ||||||
|  |  | ||||||
|  |   /// Check if the matrix is really hessenberg, if not abort | ||||||
|  |   RealD sth = 0; | ||||||
|  |   for(int j=0;j<N;j++){ | ||||||
|  |     for(int i=j+2;i<N;i++){ | ||||||
|  |       sth = abs(H[i][j]); | ||||||
|  |       if(sth > small){ | ||||||
|  | 	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; | ||||||
|  | 	exit(1); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   do{ | ||||||
|  |     std::cout << "Francis QR Step N = " << N << std::endl; | ||||||
|  |     /** Check for convergence | ||||||
|  |       x  x  x  x  x | ||||||
|  |       0  x  x  x  x | ||||||
|  |       0  0  x  x  x | ||||||
|  |       0  0  x  x  x | ||||||
|  |       0  0  0  0  x | ||||||
|  |       for this matrix l = 4 | ||||||
|  |      **/ | ||||||
|  |     do{ | ||||||
|  |       l = Chop_subdiag(H,nrm,e,small); | ||||||
|  |       r = 0;    ///May have converged on more than one eval | ||||||
|  |       ///Single eval | ||||||
|  |       if(l == N-1){ | ||||||
|  |         evals[e] = H[l][l]; | ||||||
|  |         N--; e++; r++; it = 0; | ||||||
|  |       } | ||||||
|  |       ///RealD eval | ||||||
|  |       if(l == N-2){ | ||||||
|  |         trows[l+1] = 1;    ///Needed for UTSolve | ||||||
|  |         apd = H[l][l] + H[l+1][l+1]; | ||||||
|  |         amd = H[l][l] - H[l+1][l+1]; | ||||||
|  |         bc =  (T)4.0*H[l+1][l]*H[l][l+1]; | ||||||
|  |         evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) ); | ||||||
|  |         evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); | ||||||
|  |         N-=2; e+=2; r++; it = 0; | ||||||
|  |       } | ||||||
|  |     } while(r>0); | ||||||
|  |  | ||||||
|  |     if(N ==0) break; | ||||||
|  |  | ||||||
|  |     DenseVector<T > ck; Resize(ck,3); | ||||||
|  |     DenseVector<T> v;   Resize(v,3); | ||||||
|  |  | ||||||
|  |     for(int m = N-3; m >= l; m--){ | ||||||
|  |       ///Starting vector essentially random shift. | ||||||
|  |       if(it%10 == 0 && N >= 3 && it > 0){ | ||||||
|  |         s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||||
|  |         t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||||
|  |         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||||
|  |         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||||
|  |         z = H[m+1][m]*H[m+2][m+1]; | ||||||
|  |       } | ||||||
|  |       ///Starting vector implicit Q theorem | ||||||
|  |       else{ | ||||||
|  |         s = (H[N-2][N-2] + H[N-1][N-1]); | ||||||
|  |         t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); | ||||||
|  |         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||||
|  |         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||||
|  |         z = H[m+1][m]*H[m+2][m+1]; | ||||||
|  |       } | ||||||
|  |       ck[0] = x; ck[1] = y; ck[2] = z; | ||||||
|  |  | ||||||
|  |       if(m == l) break; | ||||||
|  |  | ||||||
|  |       /** Some stupid thing from numerical recipies, seems to work**/ | ||||||
|  |       // PAB.. for heaven's sake quote page, purpose, evidence it works. | ||||||
|  |       //       what sort of comment is that!?!?!? | ||||||
|  |       u=abs(H[m][m-1])*(abs(y)+abs(z)); | ||||||
|  |       d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); | ||||||
|  |       if ((T)abs(u+d) == (T)abs(d) ){ | ||||||
|  | 	l = m; break; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       //if (u < small){l = m; break;} | ||||||
|  |     } | ||||||
|  |     if(it > 100000){ | ||||||
|  |      std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; | ||||||
|  |      std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||||
|  |       exit(1); | ||||||
|  |     } | ||||||
|  |     normalize(ck);    ///Normalization cancels in PHP anyway | ||||||
|  |     T beta; | ||||||
|  |     Householder_vector<T >(ck, 0, 2, v, beta); | ||||||
|  |     Householder_mult<T >(H,v,beta,0,l,l+2,0); | ||||||
|  |     Householder_mult<T >(H,v,beta,0,l,l+2,1); | ||||||
|  |     ///Accumulate eigenvector | ||||||
|  |     Householder_mult<T >(P,v,beta,0,l,l+2,1); | ||||||
|  |     int sw = 0;      ///Are we on the last row? | ||||||
|  |     for(int k=l;k<N-2;k++){ | ||||||
|  |       x = H[k+1][k]; | ||||||
|  |       y = H[k+2][k]; | ||||||
|  |       z = (T)0.0; | ||||||
|  |       if(k+3 <= N-1){ | ||||||
|  | 	z = H[k+3][k]; | ||||||
|  |       } else{ | ||||||
|  | 	sw = 1;  | ||||||
|  | 	v[2] = (T)0.0; | ||||||
|  |       } | ||||||
|  |       ck[0] = x; ck[1] = y; ck[2] = z; | ||||||
|  |       normalize(ck); | ||||||
|  |       Householder_vector<T >(ck, 0, 2-sw, v, beta); | ||||||
|  |       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0); | ||||||
|  |       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1); | ||||||
|  |       ///Accumulate eigenvector | ||||||
|  |       Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1); | ||||||
|  |     } | ||||||
|  |     it++; | ||||||
|  |     tot_it++; | ||||||
|  |   }while(N > 1); | ||||||
|  |   N = evals.size(); | ||||||
|  |   ///Annoying - UT solves in reverse order; | ||||||
|  |   DenseVector<T> tmp; Resize(tmp,N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     tmp[i] = evals[N-i-1]; | ||||||
|  |   }  | ||||||
|  |   evals = tmp; | ||||||
|  |   UTeigenvectors(H, trows, evals, evecs); | ||||||
|  |   for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);} | ||||||
|  |   return tot_it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||||
|  | { | ||||||
|  |   /** | ||||||
|  |   Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. | ||||||
|  |   H = | ||||||
|  |   x  x  0  0  0  0 | ||||||
|  |   x  x  x  0  0  0 | ||||||
|  |   0  x  x  x  0  0 | ||||||
|  |   0  0  x  x  x  0 | ||||||
|  |   0  0  0  x  x  x | ||||||
|  |   0  0  0  0  x  x | ||||||
|  |   Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/ | ||||||
|  |   return my_Wilkinson(Hin, evals, evecs, small, small); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol) | ||||||
|  | { | ||||||
|  |   int N; SizeSquare(Hin,N); | ||||||
|  |   int M = N; | ||||||
|  |  | ||||||
|  |   ///I don't want to modify the input but matricies must be passed by reference | ||||||
|  |   //Scale a matrix by its "norm" | ||||||
|  |   //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm); | ||||||
|  |   DenseMatrix<T> H;  H = Hin; | ||||||
|  |    | ||||||
|  |   RealD Hnorm = abs(Norm(Hin)); | ||||||
|  |   H = H * (1.0 / Hnorm); | ||||||
|  |  | ||||||
|  |   // TODO use openmp and memset | ||||||
|  |   Fill(evals,0); | ||||||
|  |   Fill(evecs,0); | ||||||
|  |  | ||||||
|  |   T s, t, x = 0, y = 0, z = 0; | ||||||
|  |   T u, d; | ||||||
|  |   T apd, amd, bc; | ||||||
|  |   DenseVector<T> p; Resize(p,N); Fill(p,0); | ||||||
|  |  | ||||||
|  |   T nrm = Norm(H);    ///DenseMatrix Norm | ||||||
|  |   int n, m; | ||||||
|  |   int e = 0; | ||||||
|  |   int it = 0; | ||||||
|  |   int tot_it = 0; | ||||||
|  |   int l = 0; | ||||||
|  |   int r = 0; | ||||||
|  |   DenseMatrix<T> P; Resize(P,N,N); | ||||||
|  |   Unity(P); | ||||||
|  |   DenseVector<int> trows(N, 0); | ||||||
|  |   /// Check if the matrix is really symm tridiag | ||||||
|  |   RealD sth = 0; | ||||||
|  |   for(int j = 0; j < N; ++j) | ||||||
|  |   { | ||||||
|  |     for(int i = j + 2; i < N; ++i) | ||||||
|  |     { | ||||||
|  |       if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) | ||||||
|  |       { | ||||||
|  | 	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; | ||||||
|  | 	std::cout << "Warning tridiagonalize and call again" << std::endl; | ||||||
|  |         // exit(1); // see what is going on | ||||||
|  |         //return; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   do{ | ||||||
|  |     do{ | ||||||
|  |       //Jasper | ||||||
|  |       //Check if the subdiagonal term is small enough (<small) | ||||||
|  |       //if true then it is converged. | ||||||
|  |       //check start from H.dim - e - 1 | ||||||
|  |       //How to deal with more than 2 are converged? | ||||||
|  |       //What if Chop_symm_subdiag return something int the middle? | ||||||
|  |       //-------------- | ||||||
|  |       l = Chop_symm_subdiag(H,nrm, e, small); | ||||||
|  |       r = 0;    ///May have converged on more than one eval | ||||||
|  |       //Jasper | ||||||
|  |       //In this case | ||||||
|  |       // x  x  0  0  0  0 | ||||||
|  |       // x  x  x  0  0  0 | ||||||
|  |       // 0  x  x  x  0  0 | ||||||
|  |       // 0  0  x  x  x  0 | ||||||
|  |       // 0  0  0  x  x  0 | ||||||
|  |       // 0  0  0  0  0  x  <- l | ||||||
|  |       //-------------- | ||||||
|  |       ///Single eval | ||||||
|  |       if(l == N - 1) | ||||||
|  |       { | ||||||
|  |         evals[e] = H[l][l]; | ||||||
|  |         N--; | ||||||
|  |         e++; | ||||||
|  |         r++; | ||||||
|  |         it = 0; | ||||||
|  |       } | ||||||
|  |       //Jasper | ||||||
|  |       // x  x  0  0  0  0 | ||||||
|  |       // x  x  x  0  0  0 | ||||||
|  |       // 0  x  x  x  0  0 | ||||||
|  |       // 0  0  x  x  0  0 | ||||||
|  |       // 0  0  0  0  x  x  <- l | ||||||
|  |       // 0  0  0  0  x  x | ||||||
|  |       //-------------- | ||||||
|  |       ///RealD eval | ||||||
|  |       if(l == N - 2) | ||||||
|  |       { | ||||||
|  |         trows[l + 1] = 1;    ///Needed for UTSolve | ||||||
|  |         apd = H[l][l] + H[l + 1][ l + 1]; | ||||||
|  |         amd = H[l][l] - H[l + 1][l + 1]; | ||||||
|  |         bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1]; | ||||||
|  |         evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc)); | ||||||
|  |         evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc)); | ||||||
|  |         N -= 2; | ||||||
|  |         e += 2; | ||||||
|  |         r++; | ||||||
|  |         it = 0; | ||||||
|  |       } | ||||||
|  |     }while(r > 0); | ||||||
|  |     //Jasper | ||||||
|  |     //Already converged | ||||||
|  |     //-------------- | ||||||
|  |     if(N == 0) break; | ||||||
|  |  | ||||||
|  |     DenseVector<T> ck,v; Resize(ck,2); Resize(v,2); | ||||||
|  |  | ||||||
|  |     for(int m = N - 3; m >= l; m--) | ||||||
|  |     { | ||||||
|  |       ///Starting vector essentially random shift. | ||||||
|  |       if(it%10 == 0 && N >= 3 && it > 0) | ||||||
|  |       { | ||||||
|  |         t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); | ||||||
|  |         x = H[m][m] - t; | ||||||
|  |         z = H[m + 1][m]; | ||||||
|  |       } else { | ||||||
|  |       ///Starting vector implicit Q theorem | ||||||
|  |         d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; | ||||||
|  |         t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]  | ||||||
|  | 	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); | ||||||
|  |         x = H[m][m] - t; | ||||||
|  |         z = H[m + 1][m]; | ||||||
|  |       } | ||||||
|  |       //Jasper | ||||||
|  |       //why it is here???? | ||||||
|  |       //----------------------- | ||||||
|  |       if(m == l) | ||||||
|  |         break; | ||||||
|  |  | ||||||
|  |       u = abs(H[m][m - 1]) * (abs(y) + abs(z)); | ||||||
|  |       d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); | ||||||
|  |       if ((T)abs(u + d) == (T)abs(d)) | ||||||
|  |       { | ||||||
|  |         l = m; | ||||||
|  |         break; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     //Jasper | ||||||
|  |     if(it > 1000000) | ||||||
|  |     { | ||||||
|  |       std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; | ||||||
|  |       std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||||
|  |       exit(1); | ||||||
|  |     } | ||||||
|  |     // | ||||||
|  |     T s, c; | ||||||
|  |     Givens_calc<T>(x, z, c, s); | ||||||
|  |     Givens_mult<T>(H, l, l + 1, c, -s, 0); | ||||||
|  |     Givens_mult<T>(H, l, l + 1, c,  s, 1); | ||||||
|  |     Givens_mult<T>(P, l, l + 1, c,  s, 1); | ||||||
|  |     // | ||||||
|  |     for(int k = l; k < N - 2; ++k) | ||||||
|  |     { | ||||||
|  |       x = H.A[k + 1][k]; | ||||||
|  |       z = H.A[k + 2][k]; | ||||||
|  |       Givens_calc<T>(x, z, c, s); | ||||||
|  |       Givens_mult<T>(H, k + 1, k + 2, c, -s, 0); | ||||||
|  |       Givens_mult<T>(H, k + 1, k + 2, c,  s, 1); | ||||||
|  |       Givens_mult<T>(P, k + 1, k + 2, c,  s, 1); | ||||||
|  |     } | ||||||
|  |     it++; | ||||||
|  |     tot_it++; | ||||||
|  |   }while(N > 1); | ||||||
|  |  | ||||||
|  |   N = evals.size(); | ||||||
|  |   ///Annoying - UT solves in reverse order; | ||||||
|  |   DenseVector<T> tmp(N); | ||||||
|  |   for(int i = 0; i < N; ++i) | ||||||
|  |     tmp[i] = evals[N-i-1]; | ||||||
|  |   evals = tmp; | ||||||
|  |   // | ||||||
|  |   UTeigenvectors(H, trows, evals, evecs); | ||||||
|  |   //UTSymmEigenvectors(H, trows, evals, evecs); | ||||||
|  |   for(int i = 0; i < evals.size(); ++i) | ||||||
|  |   { | ||||||
|  |     evecs[i] = P * evecs[i]; | ||||||
|  |     normalize(evecs[i]); | ||||||
|  |     evals[i] = evals[i] * Hnorm; | ||||||
|  |   } | ||||||
|  |   // // FIXME this is to test | ||||||
|  |   // Hin.write("evecs3", evecs); | ||||||
|  |   // Hin.write("evals3", evals); | ||||||
|  |   // // check rsd | ||||||
|  |   // for(int i = 0; i < M; i++) { | ||||||
|  |   //   vector<T> Aevec = Hin * evecs[i]; | ||||||
|  |   //   RealD norm2(0.); | ||||||
|  |   //   for(int j = 0; j < M; j++) { | ||||||
|  |   //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); | ||||||
|  |   //   } | ||||||
|  |   // } | ||||||
|  |   return tot_it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |   turn a matrix A = | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   into | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   0  x  x  x  x | ||||||
|  |   0  0  x  x  x | ||||||
|  |   0  0  0  x  x | ||||||
|  |   with householder rotations | ||||||
|  |   Slow. | ||||||
|  |   */ | ||||||
|  |   int N ; SizeSquare(A,N); | ||||||
|  |   DenseVector<T > p; Resize(p,N); Fill(p,0); | ||||||
|  |  | ||||||
|  |   for(int k=start;k<N-2;k++){ | ||||||
|  |     //cerr << "hess" << k << std::endl; | ||||||
|  |     DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); | ||||||
|  |     for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column | ||||||
|  |     normalize(ck);    ///Normalization cancels in PHP anyway | ||||||
|  |     T beta; | ||||||
|  |     Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector | ||||||
|  |     Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA | ||||||
|  |     Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H | ||||||
|  |     ///Accumulate eigenvector | ||||||
|  |     Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H | ||||||
|  |   } | ||||||
|  |   /*for(int l=0;l<N-2;l++){ | ||||||
|  |     for(int k=l+2;k<N;k++){ | ||||||
|  |     A(0,k,l); | ||||||
|  |     } | ||||||
|  |     }*/ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||||
|  | ///Tridiagonalize a matrix | ||||||
|  |   int N; SizeSquare(A,N); | ||||||
|  |   Hess(A,Q,start); | ||||||
|  |   /*for(int l=0;l<N-2;l++){ | ||||||
|  |     for(int k=l+2;k<N;k++){ | ||||||
|  |     A(0,l,k); | ||||||
|  |     } | ||||||
|  |     }*/ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | void ForceTridiagonal(DenseMatrix<T> &A){ | ||||||
|  | ///Tridiagonalize a matrix | ||||||
|  |   int N ; SizeSquare(A,N); | ||||||
|  |   for(int l=0;l<N-2;l++){ | ||||||
|  |     for(int k=l+2;k<N;k++){ | ||||||
|  |       A[l][k]=0; | ||||||
|  |       A[k][l]=0; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  |   ///Solve a symmetric eigensystem, not necessarily in tridiagonal form | ||||||
|  |   int N; SizeSquare(Ain,N); | ||||||
|  |   DenseMatrix<T > A; A = Ain; | ||||||
|  |   DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q); | ||||||
|  |   Tri(A,Q,0); | ||||||
|  |   int it = my_Wilkinson<T>(A, evals, evecs, small); | ||||||
|  |   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||||
|  |   return it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  |   return my_Wilkinson(Ain, evals, evecs, small); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  |   return my_SymmEigensystem(Ain, evals, evecs, small); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  | ///Solve a general eigensystem, not necessarily in tridiagonal form | ||||||
|  |   int N = Ain.dim; | ||||||
|  |   DenseMatrix<T > A(N); A = Ain; | ||||||
|  |   DenseMatrix<T > Q(N);Q.Unity(); | ||||||
|  |   Hess(A,Q,0); | ||||||
|  |   int it = QReigensystem<T>(A, evals, evecs, small); | ||||||
|  |   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||||
|  |   return it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
							
								
								
									
										242
									
								
								lib/algorithms/densematrix/Householder.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								lib/algorithms/densematrix/Householder.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,242 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/Householder.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef HOUSEHOLDER_H | ||||||
|  | #define HOUSEHOLDER_H | ||||||
|  |  | ||||||
|  | #define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||||
|  | #define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||||
|  | #define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||||
|  |  | ||||||
|  | #include <cstdlib> | ||||||
|  | #include <string> | ||||||
|  | #include <cmath> | ||||||
|  | #include <iostream> | ||||||
|  | #include <sstream> | ||||||
|  | #include <stdexcept> | ||||||
|  | #include <fstream> | ||||||
|  | #include <complex> | ||||||
|  | #include <algorithm> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  | /** Comparison function for finding the max element in a vector **/ | ||||||
|  | template <class T> bool cf(T i, T j) {  | ||||||
|  |   return abs(i) < abs(j);  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**  | ||||||
|  | 	Calculate a real Givens angle  | ||||||
|  |  **/ | ||||||
|  | template <class T> inline void Givens_calc(T y, T z, T &c, T &s){ | ||||||
|  |  | ||||||
|  |   RealD mz = (RealD)abs(z); | ||||||
|  |    | ||||||
|  |   if(mz==0.0){ | ||||||
|  |     c = 1; s = 0; | ||||||
|  |   } | ||||||
|  |   if(mz >= (RealD)abs(y)){ | ||||||
|  |     T t = -y/z; | ||||||
|  |     s = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||||
|  |     c = s * t; | ||||||
|  |   } else { | ||||||
|  |     T t = -z/y; | ||||||
|  |     c = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||||
|  |     s = c * t; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir) | ||||||
|  | { | ||||||
|  |   int q ; SizeSquare(A,q); | ||||||
|  |  | ||||||
|  |   if(dir == 0){ | ||||||
|  |     for(int j=0;j<q;j++){ | ||||||
|  |       T nu = A[i][j]; | ||||||
|  |       T w  = A[k][j]; | ||||||
|  |       A[i][j] = (c*nu + s*w); | ||||||
|  |       A[k][j] = (-s*nu + c*w); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if(dir == 1){ | ||||||
|  |     for(int j=0;j<q;j++){ | ||||||
|  |       T nu = A[j][i]; | ||||||
|  |       T w  = A[j][k]; | ||||||
|  |       A[j][i] = (c*nu - s*w); | ||||||
|  |       A[j][k] = (s*nu + c*w); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	from input = x; | ||||||
|  | 	Compute the complex Householder vector, v, such that | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  |  | ||||||
|  | 	P | x |    | x | k = 0 | ||||||
|  | 	| x |    | 0 |  | ||||||
|  | 	| x | =  | 0 | | ||||||
|  | 	| x |    | 0 | j = 3 | ||||||
|  | 	| x |	   | x | | ||||||
|  |  | ||||||
|  | 	These are the "Unreduced" Householder vectors. | ||||||
|  |  | ||||||
|  |  **/ | ||||||
|  | template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta) | ||||||
|  | { | ||||||
|  |   int N ; Size(input,N); | ||||||
|  |   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> ); | ||||||
|  |  | ||||||
|  |   if(abs(m) > 0.0){ | ||||||
|  |     T alpha = 0; | ||||||
|  |  | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = input[i]/m; | ||||||
|  |       alpha = alpha + v[i]*conj(v[i]); | ||||||
|  |     } | ||||||
|  |     alpha = sqrt(alpha); | ||||||
|  |     beta = (T)1.0/(alpha*(alpha + abs(v[k]) )); | ||||||
|  |  | ||||||
|  |     if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha; | ||||||
|  |     else                 v[k] = -alpha; | ||||||
|  |   } else{ | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = 0.0; | ||||||
|  |     }  | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	from input = x; | ||||||
|  | 	Compute the complex Householder vector, v, such that | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  |  | ||||||
|  | 	Px = alpha*e_dir | ||||||
|  |  | ||||||
|  | 	These are the "Unreduced" Householder vectors. | ||||||
|  |  | ||||||
|  |  **/ | ||||||
|  |  | ||||||
|  | template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta) | ||||||
|  | { | ||||||
|  |   int N = input.size(); | ||||||
|  |   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); | ||||||
|  |    | ||||||
|  |   if(abs(m) > 0.0){ | ||||||
|  |     T alpha = 0; | ||||||
|  |  | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = input[i]/m; | ||||||
|  |       alpha = alpha + v[i]*conj(v[i]); | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     alpha = sqrt(alpha); | ||||||
|  |     beta = 1.0/(alpha*(alpha + abs(v[dir]) )); | ||||||
|  | 	 | ||||||
|  |     if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; | ||||||
|  |     else                  v[dir] = -alpha; | ||||||
|  |   }else{ | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = 0.0; | ||||||
|  |     }  | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	Compute the product PA if trans = 0 | ||||||
|  | 	AP if trans = 1 | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  | 	start at element l of matrix A | ||||||
|  | 	v is of length j - k + 1 of v are nonzero | ||||||
|  |  **/ | ||||||
|  |  | ||||||
|  | template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans) | ||||||
|  | { | ||||||
|  |   int N ; SizeSquare(A,N); | ||||||
|  |  | ||||||
|  |   if(abs(beta) > 0.0){ | ||||||
|  |     for(int p=l; p<N; p++){ | ||||||
|  |       T s = 0; | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p]; | ||||||
|  | 	s *= beta; | ||||||
|  | 	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);} | ||||||
|  |       } else { | ||||||
|  | 	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];} | ||||||
|  | 	s *= beta; | ||||||
|  | 	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);} | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	Compute the product PA if trans = 0 | ||||||
|  | 	AP if trans = 1 | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  | 	start at element l of matrix A | ||||||
|  | 	v is of length j - k + 1 of v are nonzero | ||||||
|  | 	A is tridiagonal | ||||||
|  |  **/ | ||||||
|  | template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans) | ||||||
|  | { | ||||||
|  |   if(abs(beta) > 0.0){ | ||||||
|  |  | ||||||
|  |     int N ; SizeSquare(A,N); | ||||||
|  |  | ||||||
|  |     DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);  | ||||||
|  |  | ||||||
|  |     T s; | ||||||
|  |     for(int p=l; p<M; p++){ | ||||||
|  |       s = 0; | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p]; | ||||||
|  |       }else{ | ||||||
|  | 	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i]; | ||||||
|  |       } | ||||||
|  |       s = beta*s; | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k]; | ||||||
|  |       }else{ | ||||||
|  | 	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     for(int p=l; p<M; p++){ | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p]; | ||||||
|  |       }else{ | ||||||
|  | 	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i]; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | } | ||||||
|  | #endif | ||||||
| @@ -33,8 +33,6 @@ directory | |||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| // Block conjugate gradient. Dimension zero should be the block direction | // Block conjugate gradient. Dimension zero should be the block direction | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -42,280 +40,25 @@ template <class Field> | |||||||
| class BlockConjugateGradient : public OperatorFunction<Field> { | class BlockConjugateGradient : public OperatorFunction<Field> { | ||||||
|  public: |  public: | ||||||
|  |  | ||||||
|  |  | ||||||
|   typedef typename Field::scalar_type scomplex; |   typedef typename Field::scalar_type scomplex; | ||||||
|  |  | ||||||
|   int blockDim ; |   const int blockDim = 0; | ||||||
|   int Nblock; |  | ||||||
|  |  | ||||||
|   BlockCGtype CGtype; |   int Nblock; | ||||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. |   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||||
|                            // Defaults true. |                            // Defaults true. | ||||||
|   RealD Tolerance; |   RealD Tolerance; | ||||||
|   Integer MaxIterations; |   Integer MaxIterations; | ||||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion |   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||||
|    |    | ||||||
|   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) |   BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) |     : Tolerance(tol), | ||||||
|   {}; |     MaxIterations(maxit), | ||||||
|  |     ErrorOnNoConverge(err_on_no_conv){}; | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Thin QR factorisation (google it) |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, |  | ||||||
| 		 Eigen::MatrixXcd &C, |  | ||||||
| 		 Eigen::MatrixXcd &Cinv, |  | ||||||
| 		 Field & Q, |  | ||||||
| 		 const Field & R) |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   //Dimensions |  | ||||||
|   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock |  | ||||||
|   // |  | ||||||
|   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) |  | ||||||
|   // |  | ||||||
|   //   Q  C = R => Q = R C^{-1} |  | ||||||
|   // |  | ||||||
|   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  |  | ||||||
|   // |  | ||||||
|   // Set C = L^{dag}, and then Q^dag Q = ident  |  | ||||||
|   // |  | ||||||
|   // Checks: |  | ||||||
|   // Cdag C = Rdag R ; passes. |  | ||||||
|   // QdagQ  = 1      ; passes |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Cholesky from Eigen |  | ||||||
|   // There exists a ldlt that is documented as more stable |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  |  | ||||||
|  |  | ||||||
|   C    = L.adjoint(); |  | ||||||
|   Cinv = C.inverse(); |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Q = R C^{-1} |  | ||||||
|   // |  | ||||||
|   // Q_j  = R_i Cinv(i,j)  |  | ||||||
|   // |  | ||||||
|   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // FIXME:: make a sliceMulMatrix to avoid zero vector |  | ||||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); |  | ||||||
| } |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Call one of several implementations |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
| { | { | ||||||
|   if ( CGtype == BlockCGrQ ) { |   int Orthog = 0; // First dimension is block dim | ||||||
|     BlockCGrQsolve(Linop,Src,Psi); |  | ||||||
|   } else if (CGtype == BlockCG ) { |  | ||||||
|     BlockCGsolve(Linop,Src,Psi); |  | ||||||
|   } else if (CGtype == CGmultiRHS ) { |  | ||||||
|     CGmultiRHSsolve(Linop,Src,Psi); |  | ||||||
|   } else { |  | ||||||
|     assert(0); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////// |  | ||||||
| // BlockCGrQ implementation: |  | ||||||
| //-------------------------- |  | ||||||
| // X is guess/Solution |  | ||||||
| // B is RHS |  | ||||||
| // Solve A X_i = B_i    ;        i refers to Nblock index |  | ||||||
| //////////////////////////////////////////////////////////////////////////// |  | ||||||
| void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   Nblock = B._grid->_fdimensions[Orthog]; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |  | ||||||
|  |  | ||||||
|   X.checkerboard = B.checkerboard; |  | ||||||
|   conformable(X, B); |  | ||||||
|  |  | ||||||
|   Field tmp(B); |  | ||||||
|   Field Q(B); |  | ||||||
|   Field D(B); |  | ||||||
|   Field Z(B); |  | ||||||
|   Field AD(B); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   // Initial residual computation & set up |  | ||||||
|   std::vector<RealD> residuals(Nblock); |  | ||||||
|   std::vector<RealD> ssq(Nblock); |  | ||||||
|  |  | ||||||
|   sliceNorm(ssq,B,Orthog); |  | ||||||
|   RealD sssum=0; |  | ||||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,B,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,X,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   /************************************************************************ |  | ||||||
|    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) |  | ||||||
|    ************************************************************************ |  | ||||||
|    * Dimensions: |  | ||||||
|    * |  | ||||||
|    *   X,B==(Nferm x Nblock) |  | ||||||
|    *   A==(Nferm x Nferm) |  | ||||||
|    *   |  | ||||||
|    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site |  | ||||||
|    *  |  | ||||||
|    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) |  | ||||||
|    * for k:  |  | ||||||
|    *   Z  = AD |  | ||||||
|    *   M  = [D^dag Z]^{-1} |  | ||||||
|    *   X  = X + D MC |  | ||||||
|    *   QS = Q - ZM |  | ||||||
|    *   D  = Q + D S^dag |  | ||||||
|    *   C  = S C |  | ||||||
|    */ |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   // Initial block: initial search dir is guess |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; |  | ||||||
|  |  | ||||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) |  | ||||||
|  |  | ||||||
|   Linop.HermOp(X, AD); |  | ||||||
|   tmp = B - AD;   |  | ||||||
|   //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; |  | ||||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); |  | ||||||
|   //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_C " << m_C<<std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl; |  | ||||||
|   D=Q; |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   // Timers |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   GridStopWatch sliceInnerTimer; |  | ||||||
|   GridStopWatch sliceMaddTimer; |  | ||||||
|   GridStopWatch QRTimer; |  | ||||||
|   GridStopWatch MatrixTimer; |  | ||||||
|   GridStopWatch SolverTimer; |  | ||||||
|   SolverTimer.Start(); |  | ||||||
|  |  | ||||||
|   int k; |  | ||||||
|   for (k = 1; k <= MaxIterations; k++) { |  | ||||||
|  |  | ||||||
|     //3. Z  = AD |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     Linop.HermOp(D, Z);       |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|     //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl; |  | ||||||
|  |  | ||||||
|     //4. M  = [D^dag Z]^{-1} |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     m_M       = m_DZ.inverse(); |  | ||||||
|     //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl; |  | ||||||
|      |  | ||||||
|     //5. X  = X + D MC |  | ||||||
|     m_tmp     = m_M * m_C; |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     //6. QS = Q - ZM |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|     QRTimer.Start(); |  | ||||||
|     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); |  | ||||||
|     QRTimer.Stop(); |  | ||||||
|      |  | ||||||
|     //7. D  = Q + D S^dag |  | ||||||
|     m_tmp = m_S.adjoint(); |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     //8. C  = S C |  | ||||||
|     m_C = m_S*m_C; |  | ||||||
|      |  | ||||||
|     /********************* |  | ||||||
|      * convergence monitor |  | ||||||
|      ********************* |  | ||||||
|      */ |  | ||||||
|     m_rr = m_C.adjoint() * m_C; |  | ||||||
|  |  | ||||||
|     RealD max_resid=0; |  | ||||||
|     RealD rrsum=0; |  | ||||||
|     RealD rr; |  | ||||||
|  |  | ||||||
|     for(int b=0;b<Nblock;b++) { |  | ||||||
|       rrsum+=real(m_rr(b,b)); |  | ||||||
|       rr = real(m_rr(b,b))/ssq[b]; |  | ||||||
|       if ( rr > max_resid ) max_resid = rr; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum |  | ||||||
| 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; |  | ||||||
|  |  | ||||||
|     if ( max_resid < Tolerance*Tolerance ) {  |  | ||||||
|  |  | ||||||
|       SolverTimer.Stop(); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; |  | ||||||
|  |  | ||||||
|       for(int b=0;b<Nblock;b++){ |  | ||||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " |  | ||||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |  | ||||||
|  |  | ||||||
|       Linop.HermOp(X, AD); |  | ||||||
|       AD = AD-B; |  | ||||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; |  | ||||||
| 	     |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; |  | ||||||
|  |  | ||||||
|   if (ErrorOnNoConverge) assert(0); |  | ||||||
|   IterationsToComplete = k; |  | ||||||
| } |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
| @@ -419,9 +162,8 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi | |||||||
|      ********************* |      ********************* | ||||||
|      */ |      */ | ||||||
|     RealD max_resid=0; |     RealD max_resid=0; | ||||||
|     RealD rr; |  | ||||||
|     for(int b=0;b<Nblock;b++){ |     for(int b=0;b<Nblock;b++){ | ||||||
|       rr = real(m_rr(b,b))/ssq[b]; |       RealD rr = real(m_rr(b,b))/ssq[b]; | ||||||
|       if ( rr > max_resid ) max_resid = rr; |       if ( rr > max_resid ) max_resid = rr; | ||||||
|     } |     } | ||||||
|      |      | ||||||
| @@ -431,14 +173,13 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi | |||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; |       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||||
|       for(int b=0;b<Nblock;b++){ |       for(int b=0;b<Nblock;b++){ | ||||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; |  | ||||||
|       } |       } | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|       Linop.HermOp(Psi, AP); |       Linop.HermOp(Psi, AP); | ||||||
|       AP = AP-Src; |       AP = AP-Src; | ||||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; |       std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
| @@ -456,13 +197,35 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi | |||||||
|   if (ErrorOnNoConverge) assert(0); |   if (ErrorOnNoConverge) assert(0); | ||||||
|   IterationsToComplete = k; |   IterationsToComplete = k; | ||||||
| } | } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction | // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||||
| // Use this for spread out across nodes |  | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | template <class Field> | ||||||
|  | class MultiRHSConjugateGradient : public OperatorFunction<Field> { | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |   typedef typename Field::scalar_type scomplex; | ||||||
|  |  | ||||||
|  |   const int blockDim = 0; | ||||||
|  |  | ||||||
|  |   int Nblock; | ||||||
|  |   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||||
|  |                            // Defaults true. | ||||||
|  |   RealD Tolerance; | ||||||
|  |   Integer MaxIterations; | ||||||
|  |   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||||
|  |    | ||||||
|  |    MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||||
|  |     : Tolerance(tol), | ||||||
|  |     MaxIterations(maxit), | ||||||
|  |     ErrorOnNoConverge(err_on_no_conv){}; | ||||||
|  |  | ||||||
|  | void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
| { | { | ||||||
|   int Orthog = blockDim; // First dimension is block dim |   int Orthog = 0; // First dimension is block dim | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
| @@ -522,10 +285,12 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field & | |||||||
|     MatrixTimer.Stop(); |     MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|     // Alpha |     // Alpha | ||||||
|  |     //    sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog); | ||||||
|     sliceInnerTimer.Start(); |     sliceInnerTimer.Start(); | ||||||
|     sliceInnerProductVector(v_pAp,P,AP,Orthog); |     sliceInnerProductVector(v_pAp,P,AP,Orthog); | ||||||
|     sliceInnerTimer.Stop(); |     sliceInnerTimer.Stop(); | ||||||
|     for(int b=0;b<Nblock;b++){ |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       //      std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl; | ||||||
|       v_alpha[b] = v_rr[b]/real(v_pAp[b]); |       v_alpha[b] = v_rr[b]/real(v_pAp[b]); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -567,7 +332,7 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field & | |||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; |       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; | ||||||
|       for(int b=0;b<Nblock;b++){ |       for(int b=0;b<Nblock;b++){ | ||||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||||
|       } |       } | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
| @@ -593,8 +358,9 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field & | |||||||
|   if (ErrorOnNoConverge) assert(0); |   if (ErrorOnNoConverge) assert(0); | ||||||
|   IterationsToComplete = k; |   IterationsToComplete = k; | ||||||
| } | } | ||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -123,11 +123,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|       p = p * b + r; |       p = p * b + r; | ||||||
|  |  | ||||||
|       LinalgTimer.Stop(); |       LinalgTimer.Stop(); | ||||||
|  |  | ||||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k |       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||||
|                 << " residual " << cp << " target " << rsq << std::endl; |                 << " residual " << cp << " target " << rsq << std::endl; | ||||||
|       std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl; |  | ||||||
|       std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl; |  | ||||||
|  |  | ||||||
|       // Stopping condition |       // Stopping condition | ||||||
|       if (cp <= rsq) { |       if (cp <= rsq) { | ||||||
| @@ -135,6 +132,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|         Linop.HermOpAndNorm(psi, mmp, d, qq); |         Linop.HermOpAndNorm(psi, mmp, d, qq); | ||||||
|         p = mmp - src; |         p = mmp - src; | ||||||
|  |  | ||||||
|  |         RealD mmpnorm = sqrt(norm2(mmp)); | ||||||
|  |         RealD psinorm = sqrt(norm2(psi)); | ||||||
|         RealD srcnorm = sqrt(norm2(src)); |         RealD srcnorm = sqrt(norm2(src)); | ||||||
|         RealD resnorm = sqrt(norm2(p)); |         RealD resnorm = sqrt(norm2(p)); | ||||||
|         RealD true_residual = resnorm / srcnorm; |         RealD true_residual = resnorm / srcnorm; | ||||||
| @@ -158,10 +157,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     } |     } | ||||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" |     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||||
|               << std::endl; |               << std::endl; | ||||||
|  |  | ||||||
|     if (ErrorOnNoConverge) assert(0); |     if (ErrorOnNoConverge) assert(0); | ||||||
|     IterationsToComplete = k; |     IterationsToComplete = k; | ||||||
|  |  | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										81
									
								
								lib/algorithms/iterative/EigenSort.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								lib/algorithms/iterative/EigenSort.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/EigenSort.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef GRID_EIGENSORT_H | ||||||
|  | #define GRID_EIGENSORT_H | ||||||
|  |  | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     // Eigen sorter to begin with | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | template<class Field> | ||||||
|  | class SortEigen { | ||||||
|  |  private: | ||||||
|  |    | ||||||
|  | //hacking for testing for now | ||||||
|  |  private: | ||||||
|  |   static bool less_lmd(RealD left,RealD right){ | ||||||
|  |     return left > right; | ||||||
|  |   }   | ||||||
|  |   static bool less_pair(std::pair<RealD,Field const*>& left, | ||||||
|  |                         std::pair<RealD,Field const*>& right){ | ||||||
|  |     return left.first > (right.first); | ||||||
|  |   }   | ||||||
|  |    | ||||||
|  |    | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |   void push(DenseVector<RealD>& lmd, | ||||||
|  |             DenseVector<Field>& evec,int N) { | ||||||
|  |     DenseVector<Field> cpy(lmd.size(),evec[0]._grid); | ||||||
|  |     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; | ||||||
|  |      | ||||||
|  |     DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());     | ||||||
|  |     for(int i=0;i<lmd.size();++i) | ||||||
|  |       emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); | ||||||
|  |  | ||||||
|  |     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); | ||||||
|  |  | ||||||
|  |     typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); | ||||||
|  |     for(int i=0;i<N;++i){ | ||||||
|  |       lmd[i]=it->first; | ||||||
|  |       evec[i]=*(it->second); | ||||||
|  |       ++it; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   void push(DenseVector<RealD>& lmd,int N) { | ||||||
|  |     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); | ||||||
|  |   } | ||||||
|  |   bool saturated(RealD lmd, RealD thrs) { | ||||||
|  |     return fabs(lmd) > fabs(thrs); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -11,7 +11,7 @@ int PointerCache::victim; | |||||||
|  |  | ||||||
| void *PointerCache::Insert(void *ptr,size_t bytes) { | void *PointerCache::Insert(void *ptr,size_t bytes) { | ||||||
|  |  | ||||||
|   if (bytes < 4096 ) return ptr; |   if (bytes < 4096 ) return NULL; | ||||||
|  |  | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
|   assert(omp_in_parallel()==0); |   assert(omp_in_parallel()==0); | ||||||
|   | |||||||
| @@ -92,34 +92,18 @@ public: | |||||||
|     size_type bytes = __n*sizeof(_Tp); |     size_type bytes = __n*sizeof(_Tp); | ||||||
|  |  | ||||||
|     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); |     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); | ||||||
|     //    if ( ptr != NULL )  |      | ||||||
|     //      std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl; |  | ||||||
|  |  | ||||||
|     ////////////////// |  | ||||||
|     // Hack 2MB align; could make option probably doesn't need configurability |  | ||||||
|     ////////////////// |  | ||||||
| //define GRID_ALLOC_ALIGN (128) |  | ||||||
| #define GRID_ALLOC_ALIGN (2*1024*1024) |  | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); | ||||||
| #else | #else | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); | ||||||
| #endif | #endif | ||||||
|     //    std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl; |  | ||||||
|     // First touch optimise in threaded loop |  | ||||||
|     uint8_t *cp = (uint8_t *)ptr; |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp parallel for |  | ||||||
| #endif |  | ||||||
|     for(size_type n=0;n<bytes;n+=4096){ |  | ||||||
|       cp[n]=0; |  | ||||||
|     } |  | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void deallocate(pointer __p, size_type __n) {  |   void deallocate(pointer __p, size_type __n) {  | ||||||
|     size_type bytes = __n * sizeof(_Tp); |     size_type bytes = __n * sizeof(_Tp); | ||||||
|  |  | ||||||
|     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); |     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); | ||||||
|  |  | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
| @@ -198,19 +182,10 @@ public: | |||||||
|   pointer allocate(size_type __n, const void* _p= 0)  |   pointer allocate(size_type __n, const void* _p= 0)  | ||||||
|   { |   { | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); |     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); | ||||||
| #else | #else | ||||||
|     _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); |     _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); | ||||||
| #endif | #endif | ||||||
|     size_type bytes = __n*sizeof(_Tp); |  | ||||||
|     uint8_t *cp = (uint8_t *)ptr; |  | ||||||
|     if ( ptr ) {  |  | ||||||
|     // One touch per 4k page, static OMP loop to catch same loop order |  | ||||||
| #pragma omp parallel for schedule(static) |  | ||||||
|       for(size_type n=0;n<bytes;n+=4096){ |  | ||||||
| 	cp[n]=0; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|   void deallocate(pointer __p, size_type) {  |   void deallocate(pointer __p, size_type) {  | ||||||
|   | |||||||
| @@ -6,9 +6,8 @@ | |||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|     Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|     Author: Guido Cossu <guido.cossu@ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -50,6 +49,7 @@ public: | |||||||
|  |  | ||||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; |     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||||
|  |  | ||||||
|  |  | ||||||
|     // Physics Grid information. |     // Physics Grid information. | ||||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. |     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||||
|     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal |     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal | ||||||
| @@ -62,12 +62,13 @@ public: | |||||||
|     int _isites; |     int _isites; | ||||||
|     int _fsites;                  // _isites*_osites = product(dimensions). |     int _fsites;                  // _isites*_osites = product(dimensions). | ||||||
|     int _gsites; |     int _gsites; | ||||||
|     std::vector<int> _slice_block;// subslice information |     std::vector<int> _slice_block;   // subslice information | ||||||
|     std::vector<int> _slice_stride; |     std::vector<int> _slice_stride; | ||||||
|     std::vector<int> _slice_nblock; |     std::vector<int> _slice_nblock; | ||||||
|  |  | ||||||
|     std::vector<int> _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] |     // Might need these at some point | ||||||
|     std::vector<int> _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 |     //    std::vector<int> _lstart;     // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] | ||||||
|  |     //    std::vector<int> _lend;       // local end of array in gcoors    _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
| @@ -98,7 +99,7 @@ public: | |||||||
|     virtual int oIndex(std::vector<int> &coor) |     virtual int oIndex(std::vector<int> &coor) | ||||||
|     { |     { | ||||||
|         int idx=0; |         int idx=0; | ||||||
|         // Works with either global or local coordinates | 	// Works with either global or local coordinates | ||||||
|         for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); |         for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||||
|         return idx; |         return idx; | ||||||
|     } |     } | ||||||
| @@ -120,12 +121,6 @@ public: | |||||||
|       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); |       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) { |  | ||||||
|       lcoor.resize(_ndimension); |  | ||||||
|       for (int d = 0; d < _ndimension; d++) |  | ||||||
|         lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
|     // SIMD lane addressing |     // SIMD lane addressing | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
| @@ -133,7 +128,6 @@ public: | |||||||
|     { |     { | ||||||
|       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); |       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     inline int PermuteDim(int dimension){ |     inline int PermuteDim(int dimension){ | ||||||
|       return _simd_layout[dimension]>1; |       return _simd_layout[dimension]>1; | ||||||
|     } |     } | ||||||
| @@ -151,15 +145,15 @@ public: | |||||||
|       // Distance should be either 0,1,2.. |       // Distance should be either 0,1,2.. | ||||||
|       // |       // | ||||||
|       if ( _simd_layout[dimension] > 2 ) {  |       if ( _simd_layout[dimension] > 2 ) {  | ||||||
|         for(int d=0;d<_ndimension;d++){ | 	for(int d=0;d<_ndimension;d++){ | ||||||
|           if ( d != dimension ) assert ( (_simd_layout[d]==1)  ); | 	  if ( d != dimension ) assert ( (_simd_layout[d]==1)  ); | ||||||
|         } | 	} | ||||||
|         permute_type = RotateBit; // How to specify distance; this is not just direction. | 	permute_type = RotateBit; // How to specify distance; this is not just direction. | ||||||
|         return permute_type; | 	return permute_type; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       for(int d=_ndimension-1;d>dimension;d--){ |       for(int d=_ndimension-1;d>dimension;d--){ | ||||||
|         if (_simd_layout[d]>1 ) permute_type++; | 	if (_simd_layout[d]>1 ) permute_type++; | ||||||
|       } |       } | ||||||
|       return permute_type; |       return permute_type; | ||||||
|     } |     } | ||||||
| @@ -174,31 +168,11 @@ public: | |||||||
|     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  |     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||||
|     inline int Nd    (void) const { return _ndimension;}; |     inline int Nd    (void) const { return _ndimension;}; | ||||||
|  |  | ||||||
|     inline const std::vector<int> LocalStarts(void)             { return _lstart;    }; |  | ||||||
|     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; |     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; | ||||||
|     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; |     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; | ||||||
|     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; |     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; | ||||||
|     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; |     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////// |  | ||||||
|     // Utility to print the full decomposition details  |  | ||||||
|     //////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|     void show_decomposition(){ |  | ||||||
|       std::cout << GridLogMessage << "\tFull Dimensions    : " << _fdimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tSIMD layout        : " << _simd_layout << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tGlobal Dimensions  : " << _gdimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tLocal Dimensions   : " << _ldimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tOuter strides      : " << _ostride << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInner strides      : " << _istride << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tiSites             : " << _isites << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\toSites             : " << _osites << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tlSites             : " << lSites() << std::endl;         |  | ||||||
|       std::cout << GridLogMessage << "\tgSites             : " << gSites() << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tNd                 : " << _ndimension << std::endl;              |  | ||||||
|     }  |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
|     // Global addressing |     // Global addressing | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
| @@ -210,15 +184,12 @@ public: | |||||||
|       assert(lidx<lSites()); |       assert(lidx<lSites()); | ||||||
|       Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); |       Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ |     void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ | ||||||
|       gidx=0; |       gidx=0; | ||||||
|       int mult=1; |       int mult=1; | ||||||
|       for(int mu=0;mu<_ndimension;mu++) { |       for(int mu=0;mu<_ndimension;mu++) { | ||||||
|         gidx+=mult*gcoor[mu]; | 	gidx+=mult*gcoor[mu]; | ||||||
|         mult*=_gdimensions[mu]; | 	mult*=_gdimensions[mu]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) |     void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) | ||||||
| @@ -226,9 +197,9 @@ public: | |||||||
|       pcoor.resize(_ndimension); |       pcoor.resize(_ndimension); | ||||||
|       lcoor.resize(_ndimension); |       lcoor.resize(_ndimension); | ||||||
|       for(int mu=0;mu<_ndimension;mu++){ |       for(int mu=0;mu<_ndimension;mu++){ | ||||||
|         int _fld  = _fdimensions[mu]/_processors[mu]; | 	int _fld  = _fdimensions[mu]/_processors[mu]; | ||||||
|         pcoor[mu] = gcoor[mu]/_fld; | 	pcoor[mu] = gcoor[mu]/_fld; | ||||||
|         lcoor[mu] = gcoor[mu]%_fld; | 	lcoor[mu] = gcoor[mu]%_fld; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) |     void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) | ||||||
| @@ -240,9 +211,9 @@ public: | |||||||
|       /* |       /* | ||||||
|       std::vector<int> cblcoor(lcoor); |       std::vector<int> cblcoor(lcoor); | ||||||
|       for(int d=0;d<cblcoor.size();d++){ |       for(int d=0;d<cblcoor.size();d++){ | ||||||
|         if( this->CheckerBoarded(d) ) { | 	if( this->CheckerBoarded(d) ) { | ||||||
|           cblcoor[d] = lcoor[d]/2; | 	  cblcoor[d] = lcoor[d]/2; | ||||||
|         } | 	} | ||||||
|       } |       } | ||||||
|       */ |       */ | ||||||
|       i_idx= iIndex(lcoor); |       i_idx= iIndex(lcoor); | ||||||
| @@ -268,7 +239,7 @@ public: | |||||||
|     { |     { | ||||||
|       RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); |       RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); | ||||||
|       if(CheckerBoarded(0)){ |       if(CheckerBoarded(0)){ | ||||||
|         fcoor[0] = fcoor[0]*2+cb; | 	fcoor[0] = fcoor[0]*2+cb; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) |     void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) | ||||||
|   | |||||||
| @@ -62,81 +62,73 @@ public: | |||||||
|       return shift; |       return shift; | ||||||
|     } |     } | ||||||
|     GridCartesian(const std::vector<int> &dimensions, |     GridCartesian(const std::vector<int> &dimensions, | ||||||
|                   const std::vector<int> &simd_layout, | 		  const std::vector<int> &simd_layout, | ||||||
|                   const std::vector<int> &processor_grid) : GridBase(processor_grid) | 		  const std::vector<int> &processor_grid | ||||||
|  | 		  ) : GridBase(processor_grid) | ||||||
|     { |     { | ||||||
|       /////////////////////// |         /////////////////////// | ||||||
|       // Grid information |         // Grid information | ||||||
|       /////////////////////// |         /////////////////////// | ||||||
|       _ndimension = dimensions.size(); |         _ndimension = dimensions.size(); | ||||||
|  |              | ||||||
|  |         _fdimensions.resize(_ndimension); | ||||||
|  |         _gdimensions.resize(_ndimension); | ||||||
|  |         _ldimensions.resize(_ndimension); | ||||||
|  |         _rdimensions.resize(_ndimension); | ||||||
|  |         _simd_layout.resize(_ndimension); | ||||||
|  |              | ||||||
|  |         _ostride.resize(_ndimension); | ||||||
|  |         _istride.resize(_ndimension); | ||||||
|  |              | ||||||
|  |         _fsites = _gsites = _osites = _isites = 1; | ||||||
|  |  | ||||||
|       _fdimensions.resize(_ndimension); |         for(int d=0;d<_ndimension;d++){ | ||||||
|       _gdimensions.resize(_ndimension); | 	  _fdimensions[d] = dimensions[d]; // Global dimensions | ||||||
|       _ldimensions.resize(_ndimension); | 	  _gdimensions[d] = _fdimensions[d]; // Global dimensions | ||||||
|       _rdimensions.resize(_ndimension); | 	  _simd_layout[d] = simd_layout[d]; | ||||||
|       _simd_layout.resize(_ndimension); | 	  _fsites = _fsites * _fdimensions[d]; | ||||||
|       _lstart.resize(_ndimension); | 	  _gsites = _gsites * _gdimensions[d]; | ||||||
|       _lend.resize(_ndimension); |  | ||||||
|  |  | ||||||
|       _ostride.resize(_ndimension); | 	  //FIXME check for exact division | ||||||
|       _istride.resize(_ndimension); |  | ||||||
|  |  | ||||||
|       _fsites = _gsites = _osites = _isites = 1; | 	  // Use a reduced simd grid | ||||||
|  | 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions | ||||||
|       for (int d = 0; d < _ndimension; d++) | 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition | ||||||
|       { | 	  _osites *= _rdimensions[d]; | ||||||
|         _fdimensions[d] = dimensions[d];   // Global dimensions | 	  _isites *= _simd_layout[d]; | ||||||
|         _gdimensions[d] = _fdimensions[d]; // Global dimensions |                  | ||||||
|         _simd_layout[d] = simd_layout[d]; | 	  // Addressing support | ||||||
|         _fsites = _fsites * _fdimensions[d]; | 	  if ( d==0 ) { | ||||||
|         _gsites = _gsites * _gdimensions[d]; | 	    _ostride[d] = 1; | ||||||
|  | 	    _istride[d] = 1; | ||||||
|         // Use a reduced simd grid | 	  } else { | ||||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions | 	    _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | 	    _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||||
|  | 	  } | ||||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition |  | ||||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); |  | ||||||
|  |  | ||||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; |  | ||||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; |  | ||||||
|         _osites *= _rdimensions[d]; |  | ||||||
|         _isites *= _simd_layout[d]; |  | ||||||
|  |  | ||||||
|         // Addressing support |  | ||||||
|         if (d == 0) |  | ||||||
|         { |  | ||||||
|           _ostride[d] = 1; |  | ||||||
|           _istride[d] = 1; |  | ||||||
|         } |         } | ||||||
|         else |          | ||||||
|         { |         /////////////////////// | ||||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; |         // subplane information | ||||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; |         /////////////////////// | ||||||
|  |         _slice_block.resize(_ndimension); | ||||||
|  |         _slice_stride.resize(_ndimension); | ||||||
|  |         _slice_nblock.resize(_ndimension); | ||||||
|  |              | ||||||
|  |         int block =1; | ||||||
|  |         int nblock=1; | ||||||
|  |         for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||||
|  |              | ||||||
|  |         for(int d=0;d<_ndimension;d++){ | ||||||
|  |             nblock/=_rdimensions[d]; | ||||||
|  |             _slice_block[d] =block; | ||||||
|  |             _slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||||
|  |             _slice_nblock[d]=nblock; | ||||||
|  |             block = block*_rdimensions[d]; | ||||||
|         } |         } | ||||||
|       } |  | ||||||
|  |  | ||||||
|       /////////////////////// |  | ||||||
|       // subplane information |  | ||||||
|       /////////////////////// |  | ||||||
|       _slice_block.resize(_ndimension); |  | ||||||
|       _slice_stride.resize(_ndimension); |  | ||||||
|       _slice_nblock.resize(_ndimension); |  | ||||||
|  |  | ||||||
|       int block = 1; |  | ||||||
|       int nblock = 1; |  | ||||||
|       for (int d = 0; d < _ndimension; d++) |  | ||||||
|         nblock *= _rdimensions[d]; |  | ||||||
|  |  | ||||||
|       for (int d = 0; d < _ndimension; d++) |  | ||||||
|       { |  | ||||||
|         nblock /= _rdimensions[d]; |  | ||||||
|         _slice_block[d] = block; |  | ||||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; |  | ||||||
|         _slice_nblock[d] = nblock; |  | ||||||
|         block = block * _rdimensions[d]; |  | ||||||
|       } |  | ||||||
|     }; |     }; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -131,155 +131,132 @@ public: | |||||||
|       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); |       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); | ||||||
|     } |     } | ||||||
|     void Init(const std::vector<int> &dimensions, |     void Init(const std::vector<int> &dimensions, | ||||||
|               const std::vector<int> &simd_layout, | 	      const std::vector<int> &simd_layout, | ||||||
|               const std::vector<int> &processor_grid, | 	      const std::vector<int> &processor_grid, | ||||||
|               const std::vector<int> &checker_dim_mask, | 	      const std::vector<int> &checker_dim_mask, | ||||||
|               int checker_dim) | 	      int checker_dim) | ||||||
|     { |     { | ||||||
|       /////////////////////// |     /////////////////////// | ||||||
|       // Grid information |     // Grid information | ||||||
|       /////////////////////// |     /////////////////////// | ||||||
|       _checker_dim = checker_dim; |       _checker_dim = checker_dim; | ||||||
|       assert(checker_dim_mask[checker_dim] == 1); |       assert(checker_dim_mask[checker_dim]==1); | ||||||
|       _ndimension = dimensions.size(); |       _ndimension = dimensions.size(); | ||||||
|       assert(checker_dim_mask.size() == _ndimension); |       assert(checker_dim_mask.size()==_ndimension); | ||||||
|       assert(processor_grid.size() == _ndimension); |       assert(processor_grid.size()==_ndimension); | ||||||
|       assert(simd_layout.size() == _ndimension); |       assert(simd_layout.size()==_ndimension); | ||||||
|  |        | ||||||
|       _fdimensions.resize(_ndimension); |       _fdimensions.resize(_ndimension); | ||||||
|       _gdimensions.resize(_ndimension); |       _gdimensions.resize(_ndimension); | ||||||
|       _ldimensions.resize(_ndimension); |       _ldimensions.resize(_ndimension); | ||||||
|       _rdimensions.resize(_ndimension); |       _rdimensions.resize(_ndimension); | ||||||
|       _simd_layout.resize(_ndimension); |       _simd_layout.resize(_ndimension); | ||||||
|       _lstart.resize(_ndimension); |        | ||||||
|       _lend.resize(_ndimension); |  | ||||||
|  |  | ||||||
|       _ostride.resize(_ndimension); |       _ostride.resize(_ndimension); | ||||||
|       _istride.resize(_ndimension); |       _istride.resize(_ndimension); | ||||||
|  |        | ||||||
|       _fsites = _gsites = _osites = _isites = 1; |       _fsites = _gsites = _osites = _isites = 1; | ||||||
|  | 	 | ||||||
|  |       _checker_dim_mask=checker_dim_mask; | ||||||
|  |  | ||||||
|       _checker_dim_mask = checker_dim_mask; |       for(int d=0;d<_ndimension;d++){ | ||||||
|  | 	_fdimensions[d] = dimensions[d]; | ||||||
|  | 	_gdimensions[d] = _fdimensions[d]; | ||||||
|  | 	_fsites = _fsites * _fdimensions[d]; | ||||||
|  | 	_gsites = _gsites * _gdimensions[d]; | ||||||
|  |          | ||||||
|  | 	if (d==_checker_dim) { | ||||||
|  | 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard | ||||||
|  | 	} | ||||||
|  | 	_ldimensions[d] = _gdimensions[d]/_processors[d]; | ||||||
|  |  | ||||||
|       for (int d = 0; d < _ndimension; d++) | 	// Use a reduced simd grid | ||||||
|       { | 	_simd_layout[d] = simd_layout[d]; | ||||||
|         _fdimensions[d] = dimensions[d]; | 	_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; | ||||||
|         _gdimensions[d] = _fdimensions[d]; | 	assert(_rdimensions[d]>0); | ||||||
|         _fsites = _fsites * _fdimensions[d]; |  | ||||||
|         _gsites = _gsites * _gdimensions[d]; |  | ||||||
|  |  | ||||||
|         if (d == _checker_dim) | 	// all elements of a simd vector must have same checkerboard. | ||||||
|         { | 	// If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||||
|           assert((_gdimensions[d] & 0x1) == 0); | 	if ( _simd_layout[d]>1 ) { | ||||||
|           _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard | 	  if ( checker_dim_mask[d] ) {  | ||||||
|         } | 	    assert( (_rdimensions[d]&0x1) == 0 ); | ||||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; | 	  } | ||||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | 	} | ||||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; |  | ||||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; |  | ||||||
|  |  | ||||||
|         // Use a reduced simd grid | 	_osites *= _rdimensions[d]; | ||||||
|         _simd_layout[d] = simd_layout[d]; | 	_isites *= _simd_layout[d]; | ||||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer |          | ||||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | 	// Addressing support | ||||||
|         assert(_rdimensions[d] > 0); | 	if ( d==0 ) { | ||||||
|  | 	  _ostride[d] = 1; | ||||||
|  | 	  _istride[d] = 1; | ||||||
|  | 	} else { | ||||||
|  | 	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||||
|  | 	  _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|         // all elements of a simd vector must have same checkerboard. |  | ||||||
|         // If Ls vectorised, this must still be the case; e.g. dwf rb5d |  | ||||||
|         if (_simd_layout[d] > 1) |  | ||||||
|         { |  | ||||||
|           if (checker_dim_mask[d]) |  | ||||||
|           { |  | ||||||
|             assert((_rdimensions[d] & 0x1) == 0); |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         _osites *= _rdimensions[d]; |  | ||||||
|         _isites *= _simd_layout[d]; |  | ||||||
|  |  | ||||||
|         // Addressing support |  | ||||||
|         if (d == 0) |  | ||||||
|         { |  | ||||||
|           _ostride[d] = 1; |  | ||||||
|           _istride[d] = 1; |  | ||||||
|         } |  | ||||||
|         else |  | ||||||
|         { |  | ||||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; |  | ||||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; |  | ||||||
|         } |  | ||||||
|       } |       } | ||||||
|  |              | ||||||
|       //////////////////////////////////////////////////////////////////////////////////////////// |       //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|       // subplane information |       // subplane information | ||||||
|       //////////////////////////////////////////////////////////////////////////////////////////// |       //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|       _slice_block.resize(_ndimension); |       _slice_block.resize(_ndimension); | ||||||
|       _slice_stride.resize(_ndimension); |       _slice_stride.resize(_ndimension); | ||||||
|       _slice_nblock.resize(_ndimension); |       _slice_nblock.resize(_ndimension); | ||||||
|  |          | ||||||
|       int block = 1; |       int block =1; | ||||||
|       int nblock = 1; |       int nblock=1; | ||||||
|       for (int d = 0; d < _ndimension; d++) |       for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||||
|         nblock *= _rdimensions[d]; |        | ||||||
|  |       for(int d=0;d<_ndimension;d++){ | ||||||
|       for (int d = 0; d < _ndimension; d++) | 	nblock/=_rdimensions[d]; | ||||||
|       { | 	_slice_block[d] =block; | ||||||
|         nblock /= _rdimensions[d]; | 	_slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||||
|         _slice_block[d] = block; | 	_slice_nblock[d]=nblock; | ||||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | 	block = block*_rdimensions[d]; | ||||||
|         _slice_nblock[d] = nblock; |  | ||||||
|         block = block * _rdimensions[d]; |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Create a checkerboard lookup table |       // Create a checkerboard lookup table | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       int rvol = 1; |       int rvol = 1; | ||||||
|       for (int d = 0; d < _ndimension; d++) |       for(int d=0;d<_ndimension;d++){ | ||||||
|       { | 	rvol=rvol * _rdimensions[d]; | ||||||
|         rvol = rvol * _rdimensions[d]; |  | ||||||
|       } |       } | ||||||
|       _checker_board.resize(rvol); |       _checker_board.resize(rvol); | ||||||
|       for (int osite = 0; osite < _osites; osite++) |       for(int osite=0;osite<_osites;osite++){ | ||||||
|       { | 	_checker_board[osite] = CheckerBoardFromOindex (osite); | ||||||
|         _checker_board[osite] = CheckerBoardFromOindex(osite); |  | ||||||
|       } |       } | ||||||
|  |        | ||||||
|     }; |     }; | ||||||
|  | protected: | ||||||
|   protected: |  | ||||||
|     virtual int oIndex(std::vector<int> &coor) |     virtual int oIndex(std::vector<int> &coor) | ||||||
|     { |     { | ||||||
|       int idx = 0; |       int idx=0; | ||||||
|       for (int d = 0; d < _ndimension; d++) |       for(int d=0;d<_ndimension;d++) { | ||||||
|       { | 	if( d==_checker_dim ) { | ||||||
|         if (d == _checker_dim) | 	  idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]); | ||||||
|         { | 	} else { | ||||||
|           idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); | 	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||||
|         } | 	} | ||||||
|         else |  | ||||||
|         { |  | ||||||
|           idx += _ostride[d] * (coor[d] % _rdimensions[d]); |  | ||||||
|         } |  | ||||||
|       } |       } | ||||||
|       return idx; |       return idx; | ||||||
|     }; |     }; | ||||||
|  |          | ||||||
|     virtual int iIndex(std::vector<int> &lcoor) |     virtual int iIndex(std::vector<int> &lcoor) | ||||||
|     { |     { | ||||||
|       int idx = 0; |         int idx=0; | ||||||
|       for (int d = 0; d < _ndimension; d++) |         for(int d=0;d<_ndimension;d++) { | ||||||
|       { | 	  if( d==_checker_dim ) { | ||||||
|         if (d == _checker_dim) | 	    idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); | ||||||
|         { | 	  } else {  | ||||||
|           idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); | 	    idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); | ||||||
|         } | 	  } | ||||||
|         else | 	} | ||||||
|         { |         return idx; | ||||||
|           idx += _istride[d] * (lcoor[d] / _rdimensions[d]); |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|       return idx; |  | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -26,10 +26,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <fcntl.h> |  | ||||||
| #include <unistd.h> |  | ||||||
| #include <limits.h> |  | ||||||
| #include <sys/mman.h> |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -37,11 +33,8 @@ namespace Grid { | |||||||
| // Info that is setup once and indept of cartesian layout | // Info that is setup once and indept of cartesian layout | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
| void *              CartesianCommunicator::ShmCommBuf; | void *              CartesianCommunicator::ShmCommBuf; | ||||||
| uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 1024LL*1024LL*1024LL;  | uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024;  | ||||||
| CartesianCommunicator::CommunicatorPolicy_t   | CartesianCommunicator::CommunicatorPolicy_t  CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | ||||||
| CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; |  | ||||||
| int CartesianCommunicator::nCommThreads = -1; |  | ||||||
| int CartesianCommunicator::Hugepages = 0; |  | ||||||
|  |  | ||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| // Alloc, free shmem region | // Alloc, free shmem region | ||||||
| @@ -67,7 +60,6 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { | |||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| // Grid information queries | // Grid information queries | ||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| int                      CartesianCommunicator::Dimensions(void)         { return _ndimension; }; |  | ||||||
| int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | ||||||
| int                      CartesianCommunicator::BossRank(void)          { return 0; }; | int                      CartesianCommunicator::BossRank(void)          { return 0; }; | ||||||
| int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | ||||||
| @@ -96,43 +88,24 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | |||||||
|   GlobalSumVector((double *)c,2*N); |   GlobalSumVector((double *)c,2*N); | ||||||
| } | } | ||||||
|  |  | ||||||
| #if !defined( GRID_COMMS_MPI3)  | #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | ||||||
|  |  | ||||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||||
| int                      CartesianCommunicator::RankCount(void)    { return ProcessorCount();}; |  | ||||||
| #endif |  | ||||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, |  | ||||||
| 						     int xmit_to_rank, |  | ||||||
| 						     void *recv, |  | ||||||
| 						     int recv_from_rank, |  | ||||||
| 						     int bytes, int dir) |  | ||||||
| { |  | ||||||
|   std::vector<CommsRequest_t> list; |  | ||||||
|   // Discard the "dir" |  | ||||||
|   SendToRecvFromBegin   (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); |  | ||||||
|   SendToRecvFromComplete(list); |  | ||||||
|   return 2.0*bytes; |  | ||||||
| } |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 							 void *xmit, | 						       void *xmit, | ||||||
| 							 int xmit_to_rank, | 						       int xmit_to_rank, | ||||||
| 							 void *recv, | 						       void *recv, | ||||||
| 							 int recv_from_rank, | 						       int recv_from_rank, | ||||||
| 							 int bytes, int dir) | 						       int bytes) | ||||||
| { | { | ||||||
|   // Discard the "dir" |  | ||||||
|   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); |   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||||
|   return 2.0*bytes; |   return 2.0*bytes; | ||||||
| } | } | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | ||||||
| { | { | ||||||
|   SendToRecvFromComplete(waitall); |   SendToRecvFromComplete(waitall); | ||||||
| } | } | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if !defined( GRID_COMMS_MPI3)  |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::StencilBarrier(void){}; | void CartesianCommunicator::StencilBarrier(void){}; | ||||||
|  |  | ||||||
| commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector; | commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector; | ||||||
| @@ -146,25 +119,8 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { | |||||||
|   return NULL; |   return NULL; | ||||||
| } | } | ||||||
| void CartesianCommunicator::ShmInitGeneric(void){ | void CartesianCommunicator::ShmInitGeneric(void){ | ||||||
| #if 1 |  | ||||||
|  |  | ||||||
|   int mmap_flag = MAP_SHARED | MAP_ANONYMOUS; |  | ||||||
| #ifdef MAP_HUGETLB |  | ||||||
|   if ( Hugepages ) mmap_flag |= MAP_HUGETLB; |  | ||||||
| #endif |  | ||||||
|   ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);  |  | ||||||
|   if (ShmCommBuf == (void *)MAP_FAILED) { |  | ||||||
|     perror("mmap failed "); |  | ||||||
|     exit(EXIT_FAILURE);   |  | ||||||
|   } |  | ||||||
| #ifdef MADV_HUGEPAGE |  | ||||||
|   if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); |  | ||||||
| #endif |  | ||||||
| #else  |  | ||||||
|   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); |   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); | ||||||
|   ShmCommBuf=(void *)&ShmBufStorageVector[0]; |   ShmCommBuf=(void *)&ShmBufStorageVector[0]; | ||||||
| #endif |  | ||||||
|   bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -38,7 +38,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifdef GRID_COMMS_MPI3 | #ifdef GRID_COMMS_MPI3 | ||||||
| #include <mpi.h> | #include <mpi.h> | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_MPIT | #ifdef GRID_COMMS_MPI3L | ||||||
| #include <mpi.h> | #include <mpi.h> | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_SHMEM | #ifdef GRID_COMMS_SHMEM | ||||||
| @@ -50,24 +50,12 @@ namespace Grid { | |||||||
| class CartesianCommunicator { | class CartesianCommunicator { | ||||||
|   public:     |   public:     | ||||||
|  |  | ||||||
|  |   // 65536 ranks per node adequate for now | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   // Isend/Irecv/Wait, or Sendrecv blocking |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; |  | ||||||
|   static CommunicatorPolicy_t CommunicatorPolicy; |  | ||||||
|   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////// |  | ||||||
|   // Up to 65536 ranks per node adequate for now |  | ||||||
|   // 128MB shared memory for comms enought for 48^4 local vol comms |   // 128MB shared memory for comms enought for 48^4 local vol comms | ||||||
|   // Give external control (command line override?) of this |   // Give external control (command line override?) of this | ||||||
|   /////////////////////////////////////////// |  | ||||||
|   static const int MAXLOG2RANKSPERNODE = 16;             |   static const int      MAXLOG2RANKSPERNODE = 16;             | ||||||
|   static uint64_t  MAX_MPI_SHM_BYTES; |   static uint64_t MAX_MPI_SHM_BYTES; | ||||||
|   static int       nCommThreads; |  | ||||||
|   // use explicit huge pages |  | ||||||
|   static int       Hugepages; |  | ||||||
|  |  | ||||||
|   // Communicator should know nothing of the physics grid, only processor grid. |   // Communicator should know nothing of the physics grid, only processor grid. | ||||||
|   int              _Nprocessors;     // How many in all |   int              _Nprocessors;     // How many in all | ||||||
| @@ -76,18 +64,14 @@ class CartesianCommunicator { | |||||||
|   std::vector<int> _processor_coor;  // linear processor coordinate |   std::vector<int> _processor_coor;  // linear processor coordinate | ||||||
|   unsigned long _ndimension; |   unsigned long _ndimension; | ||||||
|  |  | ||||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) | #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L) | ||||||
|   static MPI_Comm communicator_world; |   static MPI_Comm communicator_world; | ||||||
|  |          MPI_Comm communicator; | ||||||
|   MPI_Comm              communicator; |  | ||||||
|   std::vector<MPI_Comm> communicator_halo; |  | ||||||
|  |  | ||||||
|   typedef MPI_Request CommsRequest_t; |   typedef MPI_Request CommsRequest_t; | ||||||
| #else  | #else  | ||||||
|   typedef int CommsRequest_t; |   typedef int CommsRequest_t; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////// | ||||||
|   // Helper functionality for SHM Windows common to all other impls |   // Helper functionality for SHM Windows common to all other impls | ||||||
|   //////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////// | ||||||
| @@ -133,7 +117,11 @@ class CartesianCommunicator { | |||||||
|   ///////////////////////////////// |   ///////////////////////////////// | ||||||
|   static void * ShmCommBuf; |   static void * ShmCommBuf; | ||||||
|  |  | ||||||
|    |   // Isend/Irecv/Wait, or Sendrecv blocking | ||||||
|  |   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; | ||||||
|  |   static CommunicatorPolicy_t CommunicatorPolicy; | ||||||
|  |   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } | ||||||
|  |  | ||||||
|   size_t heap_top; |   size_t heap_top; | ||||||
|   size_t heap_bytes; |   size_t heap_bytes; | ||||||
|  |  | ||||||
| @@ -160,7 +148,6 @@ class CartesianCommunicator { | |||||||
|   int  RankFromProcessorCoor(std::vector<int> &coor); |   int  RankFromProcessorCoor(std::vector<int> &coor); | ||||||
|   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); |   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); | ||||||
|    |    | ||||||
|   int                      Dimensions(void)        ; |  | ||||||
|   int                      IsBoss(void)            ; |   int                      IsBoss(void)            ; | ||||||
|   int                      BossRank(void)          ; |   int                      BossRank(void)          ; | ||||||
|   int                      ThisRank(void)          ; |   int                      ThisRank(void)          ; | ||||||
| @@ -168,7 +155,6 @@ class CartesianCommunicator { | |||||||
|   const std::vector<int> & ProcessorGrid(void)     ; |   const std::vector<int> & ProcessorGrid(void)     ; | ||||||
|   int                      ProcessorCount(void)    ; |   int                      ProcessorCount(void)    ; | ||||||
|   int                      NodeCount(void)    ; |   int                      NodeCount(void)    ; | ||||||
|   int                      RankCount(void)    ; |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid |   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||||
| @@ -189,8 +175,6 @@ class CartesianCommunicator { | |||||||
|   void GlobalSumVector(ComplexF *c,int N); |   void GlobalSumVector(ComplexF *c,int N); | ||||||
|   void GlobalSum(ComplexD &c); |   void GlobalSum(ComplexD &c); | ||||||
|   void GlobalSumVector(ComplexD *c,int N); |   void GlobalSumVector(ComplexD *c,int N); | ||||||
|   void GlobalXOR(uint32_t &); |  | ||||||
|   void GlobalXOR(uint64_t &); |  | ||||||
|    |    | ||||||
|   template<class obj> void GlobalSum(obj &o){ |   template<class obj> void GlobalSum(obj &o){ | ||||||
|     typedef typename obj::scalar_type scalar_type; |     typedef typename obj::scalar_type scalar_type; | ||||||
| @@ -223,21 +207,14 @@ class CartesianCommunicator { | |||||||
|    |    | ||||||
|   void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); |   void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||||
|  |  | ||||||
|   double StencilSendToRecvFrom(void *xmit, |  | ||||||
| 			       int xmit_to_rank, |  | ||||||
| 			       void *recv, |  | ||||||
| 			       int recv_from_rank, |  | ||||||
| 			       int bytes,int dir); |  | ||||||
|  |  | ||||||
|   double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, |   double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 				    void *xmit, | 				  void *xmit, | ||||||
| 				    int xmit_to_rank, | 				  int xmit_to_rank, | ||||||
| 				    void *recv, | 				  void *recv, | ||||||
| 				    int recv_from_rank, | 				  int recv_from_rank, | ||||||
| 				    int bytes,int dir); | 				  int bytes); | ||||||
|    |    | ||||||
|    |   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||||
|   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i); |  | ||||||
|   void StencilBarrier(void); |   void StencilBarrier(void); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -83,14 +83,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
|   | |||||||
| @@ -37,12 +37,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <sys/ipc.h> | #include <sys/ipc.h> | ||||||
| #include <sys/shm.h> | #include <sys/shm.h> | ||||||
| #include <sys/mman.h> | #include <sys/mman.h> | ||||||
| #include <zlib.h> | //#include <zlib.h> | ||||||
| #ifdef HAVE_NUMAIF_H | #ifndef SHM_HUGETLB | ||||||
| #include <numaif.h> | #define SHM_HUGETLB 04000 | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -66,7 +65,6 @@ std::vector<int> CartesianCommunicator::MyGroup; | |||||||
| std::vector<void *> CartesianCommunicator::ShmCommBufs; | std::vector<void *> CartesianCommunicator::ShmCommBufs; | ||||||
|  |  | ||||||
| int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | ||||||
| int CartesianCommunicator::RankCount(void)    { return WorldSize;}; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef FORCE_COMMS | #undef FORCE_COMMS | ||||||
| @@ -198,46 +196,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|   ShmCommBuf = 0; |   ShmCommBuf = 0; | ||||||
|   ShmCommBufs.resize(ShmSize); |   ShmCommBufs.resize(ShmSize); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | #if 1 | ||||||
|   // Hugetlbf and others map filesystems as mappable huge pages |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| #ifdef GRID_MPI3_SHMMMAP |  | ||||||
|   char shm_name [NAME_MAX]; |  | ||||||
|   for(int r=0;r<ShmSize;r++){ |  | ||||||
|      |  | ||||||
|     size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; |  | ||||||
|     sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",GroupRank,r); |  | ||||||
|     //sprintf(shm_name,"/var/lib/hugetlbfs/group/wheel/pagesize-2MB/" "Grid_mpi3_shm_%d_%d",GroupRank,r); |  | ||||||
|     //    printf("Opening file %s \n",shm_name); |  | ||||||
|     int fd=open(shm_name,O_RDWR|O_CREAT,0666); |  | ||||||
|     if ( fd == -1) {  |  | ||||||
|       printf("open %s failed\n",shm_name); |  | ||||||
|       perror("open hugetlbfs"); |  | ||||||
|       exit(0); |  | ||||||
|     } |  | ||||||
|     int mmap_flag = MAP_SHARED ; |  | ||||||
| #ifdef MAP_POPULATE     |  | ||||||
|     mmap_flag|=MAP_POPULATE; |  | ||||||
| #endif |  | ||||||
| #ifdef MAP_HUGETLB |  | ||||||
|     if ( Hugepages ) mmap_flag |= MAP_HUGETLB; |  | ||||||
| #endif |  | ||||||
|     void *ptr = (void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);  |  | ||||||
|     if ( ptr == (void *)MAP_FAILED ) {     |  | ||||||
|       printf("mmap %s failed\n",shm_name); |  | ||||||
|       perror("failed mmap");      assert(0);     |  | ||||||
|     } |  | ||||||
|     assert(((uint64_t)ptr&0x3F)==0); |  | ||||||
|     ShmCommBufs[r] =ptr; |  | ||||||
|      |  | ||||||
|   } |  | ||||||
| #endif |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case |  | ||||||
|   // tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for  |  | ||||||
|   // the posix shm virtual file system |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| #ifdef GRID_MPI3_SHMOPEN |  | ||||||
|   char shm_name [NAME_MAX]; |   char shm_name [NAME_MAX]; | ||||||
|   if ( ShmRank == 0 ) { |   if ( ShmRank == 0 ) { | ||||||
|     for(int r=0;r<ShmSize;r++){ |     for(int r=0;r<ShmSize;r++){ | ||||||
| @@ -250,39 +209,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); |       int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); | ||||||
|       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } |       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } | ||||||
|       ftruncate(fd, size); |       ftruncate(fd, size); | ||||||
|        |  | ||||||
|       int mmap_flag = MAP_SHARED; |  | ||||||
| #ifdef MAP_POPULATE  |  | ||||||
|       mmap_flag |= MAP_POPULATE; |  | ||||||
| #endif |  | ||||||
| #ifdef MAP_HUGETLB |  | ||||||
|       if (Hugepages) mmap_flag |= MAP_HUGETLB; |  | ||||||
| #endif |  | ||||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); |  | ||||||
|  |  | ||||||
|       if ( ptr == (void * )MAP_FAILED ) {       perror("failed mmap");      assert(0);    } |       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||||||
|  |       if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    } | ||||||
|       assert(((uint64_t)ptr&0x3F)==0); |       assert(((uint64_t)ptr&0x3F)==0); | ||||||
|  |       ShmCommBufs[r] =ptr; | ||||||
| // Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h |  | ||||||
| #if 0 |  | ||||||
| //#ifdef HAVE_NUMAIF_H |  | ||||||
| 	int status; |  | ||||||
| 	int flags=MPOL_MF_MOVE; |  | ||||||
| #ifdef KNL |  | ||||||
| 	int nodes=1; // numa domain == MCDRAM |  | ||||||
| 	// Find out if in SNC2,SNC4 mode ? |  | ||||||
| #else |  | ||||||
| 	int nodes=r; // numa domain == MPI ID |  | ||||||
| #endif |  | ||||||
| 	unsigned long count=1; |  | ||||||
| 	for(uint64_t page=0;page<size;page+=4096){ |  | ||||||
| 	  void *pages = (void *) ( page + (uint64_t)ptr ); |  | ||||||
| 	  uint64_t *cow_it = (uint64_t *)pages;	*cow_it = 1; |  | ||||||
| 	  ierr= move_pages(0,count, &pages,&nodes,&status,flags); |  | ||||||
| 	  if (ierr && (page==0)) perror("numa relocate command failed"); |  | ||||||
| 	} |  | ||||||
| #endif |  | ||||||
| 	ShmCommBufs[r] =ptr; |  | ||||||
|        |        | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -304,32 +235,21 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       ShmCommBufs[r] =ptr; |       ShmCommBufs[r] =ptr; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| #endif |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | #else | ||||||
|   // SHMGET SHMAT and SHM_HUGETLB flag |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| #ifdef GRID_MPI3_SHMGET |  | ||||||
|   std::vector<int> shmids(ShmSize); |   std::vector<int> shmids(ShmSize); | ||||||
|  |  | ||||||
|   if ( ShmRank == 0 ) { |   if ( ShmRank == 0 ) { | ||||||
|     for(int r=0;r<ShmSize;r++){ |     for(int r=0;r<ShmSize;r++){ | ||||||
|       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; |       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; | ||||||
|       key_t key   = IPC_PRIVATE; |       key_t key   = 0x4545 + r; | ||||||
|       int flags = IPC_CREAT | SHM_R | SHM_W; |       if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { | ||||||
| #ifdef SHM_HUGETLB |  | ||||||
|       if (Hugepages) flags|=SHM_HUGETLB; |  | ||||||
| #endif |  | ||||||
|       if ((shmids[r]= shmget(key,size, flags)) ==-1) { |  | ||||||
| 	int errsv = errno; | 	int errsv = errno; | ||||||
| 	printf("Errno %d\n",errsv); | 	printf("Errno %d\n",errsv); | ||||||
| 	printf("key   %d\n",key); |  | ||||||
| 	printf("size  %lld\n",size); |  | ||||||
| 	printf("flags %d\n",flags); |  | ||||||
| 	perror("shmget"); | 	perror("shmget"); | ||||||
| 	exit(1); | 	exit(1); | ||||||
|       } else {  |  | ||||||
| 	printf("shmid: 0x%x\n", shmids[r]); |  | ||||||
|       } |       } | ||||||
|  |       printf("shmid: 0x%x\n", shmids[r]); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   MPI_Barrier(ShmComm); |   MPI_Barrier(ShmComm); | ||||||
| @@ -454,14 +374,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | |||||||
| {  | {  | ||||||
|   int ierr; |   int ierr; | ||||||
|   communicator=communicator_world; |   communicator=communicator_world; | ||||||
|  |  | ||||||
|   _ndimension = processors.size(); |   _ndimension = processors.size(); | ||||||
|  |  | ||||||
|   communicator_halo.resize (2*_ndimension); |  | ||||||
|   for(int i=0;i<_ndimension*2;i++){ |  | ||||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
|   // Assert power of two shm_size. |   // Assert power of two shm_size. | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
| @@ -595,14 +509,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| @@ -684,27 +590,13 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, |  | ||||||
| 						     int dest, |  | ||||||
| 						     void *recv, |  | ||||||
| 						     int from, |  | ||||||
| 						     int bytes,int dir) |  | ||||||
| { |  | ||||||
|   std::vector<CommsRequest_t> list; |  | ||||||
|   double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); |  | ||||||
|   StencilSendToRecvFromComplete(list,dir); |  | ||||||
|   return offbytes; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 							 void *xmit, | 						       void *xmit, | ||||||
| 							 int dest, | 						       int dest, | ||||||
| 							 void *recv, | 						       void *recv, | ||||||
| 							 int from, | 						       int from, | ||||||
| 							 int bytes,int dir) | 						       int bytes) | ||||||
| { | { | ||||||
|   assert(dir < communicator_halo.size()); |  | ||||||
|  |  | ||||||
|   MPI_Request xrq; |   MPI_Request xrq; | ||||||
|   MPI_Request rrq; |   MPI_Request rrq; | ||||||
|  |  | ||||||
| @@ -723,26 +615,26 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|   gfrom = MPI_UNDEFINED; |   gfrom = MPI_UNDEFINED; | ||||||
| #endif | #endif | ||||||
|   if ( gfrom ==MPI_UNDEFINED) { |   if ( gfrom ==MPI_UNDEFINED) { | ||||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq); |     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(rrq); |     list.push_back(rrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( gdest == MPI_UNDEFINED ) { |   if ( gdest == MPI_UNDEFINED ) { | ||||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq); |     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(xrq); |     list.push_back(xrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) {  |   if ( CommunicatorPolicy == CommunicatorPolicySequential ) {  | ||||||
|     this->StencilSendToRecvFromComplete(list,dir); |     this->StencilSendToRecvFromComplete(list); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   return off_node_bytes; |   return off_node_bytes; | ||||||
| } | } | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | ||||||
| { | { | ||||||
|   SendToRecvFromComplete(waitall); |   SendToRecvFromComplete(waitall); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,286 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/communicator/Communicator_mpi.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/GridCore.h> |  | ||||||
| #include <Grid/GridQCDcore.h> |  | ||||||
| #include <Grid/qcd/action/ActionCore.h> |  | ||||||
| #include <mpi.h> |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Info that is setup once and indept of cartesian layout |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| MPI_Comm CartesianCommunicator::communicator_world; |  | ||||||
|  |  | ||||||
| // Should error check all MPI calls. |  | ||||||
| void CartesianCommunicator::Init(int *argc, char ***argv) { |  | ||||||
|   int flag; |  | ||||||
|   int provided; |  | ||||||
|   MPI_Initialized(&flag); // needed to coexist with other libs apparently |  | ||||||
|   if ( !flag ) { |  | ||||||
|     MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); |  | ||||||
|     if ( provided != MPI_THREAD_MULTIPLE ) { |  | ||||||
|       QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); |  | ||||||
|   ShmInitGeneric(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) |  | ||||||
| { |  | ||||||
|   _ndimension = processors.size(); |  | ||||||
|   std::vector<int> periodic(_ndimension,1); |  | ||||||
|  |  | ||||||
|   _Nprocessors=1; |  | ||||||
|   _processors = processors; |  | ||||||
|   _processor_coor.resize(_ndimension); |  | ||||||
|    |  | ||||||
|   MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); |  | ||||||
|   MPI_Comm_rank(communicator,&_processor); |  | ||||||
|   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<_ndimension;i++){ |  | ||||||
|     _Nprocessors*=_processors[i]; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   communicator_halo.resize (2*_ndimension); |  | ||||||
|   for(int i=0;i<_ndimension*2;i++){ |  | ||||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   int Size;  |  | ||||||
|   MPI_Comm_size(communicator,&Size); |  | ||||||
|    |  | ||||||
|   assert(Size==_Nprocessors); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSumVector(float *f,int N) |  | ||||||
| { |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(double &d) |  | ||||||
| { |  | ||||||
|   int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSumVector(double *d,int N) |  | ||||||
| { |  | ||||||
|   int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) |  | ||||||
| { |  | ||||||
|   int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) |  | ||||||
| { |  | ||||||
|   int rank; |  | ||||||
|   int ierr=MPI_Cart_rank  (communicator, &coor[0], &rank); |  | ||||||
|   assert(ierr==0); |  | ||||||
|   return rank; |  | ||||||
| } |  | ||||||
| void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) |  | ||||||
| { |  | ||||||
|   coor.resize(_ndimension); |  | ||||||
|   int ierr=MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Basic Halo comms primitive |  | ||||||
| void CartesianCommunicator::SendToRecvFrom(void *xmit, |  | ||||||
| 					   int dest, |  | ||||||
| 					   void *recv, |  | ||||||
| 					   int from, |  | ||||||
| 					   int bytes) |  | ||||||
| { |  | ||||||
|   std::vector<CommsRequest_t> reqs(0); |  | ||||||
|   SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); |  | ||||||
|   SendToRecvFromComplete(reqs); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, |  | ||||||
| 					   void *recv, |  | ||||||
| 					   int sender, |  | ||||||
| 					   int receiver, |  | ||||||
| 					   int bytes) |  | ||||||
| { |  | ||||||
|   MPI_Status stat; |  | ||||||
|   assert(sender != receiver); |  | ||||||
|   int tag = sender; |  | ||||||
|   if ( _processor == sender ) { |  | ||||||
|     MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); |  | ||||||
|   } |  | ||||||
|   if ( _processor == receiver ) {  |  | ||||||
|     MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Basic Halo comms primitive |  | ||||||
| void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, |  | ||||||
| 						void *xmit, |  | ||||||
| 						int dest, |  | ||||||
| 						void *recv, |  | ||||||
| 						int from, |  | ||||||
| 						int bytes) |  | ||||||
| { |  | ||||||
|   int myrank = _processor; |  | ||||||
|   int ierr; |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  |  | ||||||
|     MPI_Request xrq; |  | ||||||
|     MPI_Request rrq; |  | ||||||
|  |  | ||||||
|     ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); |  | ||||||
|     ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); |  | ||||||
|      |  | ||||||
|     assert(ierr==0); |  | ||||||
|     list.push_back(xrq); |  | ||||||
|     list.push_back(rrq); |  | ||||||
|   } else {  |  | ||||||
|     // Give the CPU to MPI immediately; can use threads to overlap optionally |  | ||||||
|     ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, |  | ||||||
| 		      recv,bytes,MPI_CHAR,from, from, |  | ||||||
| 		      communicator,MPI_STATUS_IGNORE); |  | ||||||
|     assert(ierr==0); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) |  | ||||||
| { |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  |  | ||||||
|     int nreq=list.size(); |  | ||||||
|     std::vector<MPI_Status> status(nreq); |  | ||||||
|     int ierr = MPI_Waitall(nreq,&list[0],&status[0]); |  | ||||||
|     assert(ierr==0); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::Barrier(void) |  | ||||||
| { |  | ||||||
|   int ierr = MPI_Barrier(communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::Broadcast(int root,void* data, int bytes) |  | ||||||
| { |  | ||||||
|   int ierr=MPI_Bcast(data, |  | ||||||
| 		     bytes, |  | ||||||
| 		     MPI_BYTE, |  | ||||||
| 		     root, |  | ||||||
| 		     communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   // Should only be used prior to Grid Init finished. |  | ||||||
|   // Check for this? |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
| int CartesianCommunicator::RankWorld(void){  |  | ||||||
|   int r;  |  | ||||||
|   MPI_Comm_rank(communicator_world,&r); |  | ||||||
|   return r; |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) |  | ||||||
| { |  | ||||||
|   int ierr= MPI_Bcast(data, |  | ||||||
| 		      bytes, |  | ||||||
| 		      MPI_BYTE, |  | ||||||
| 		      root, |  | ||||||
| 		      communicator_world); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, |  | ||||||
| 							 void *xmit, |  | ||||||
| 							 int xmit_to_rank, |  | ||||||
| 							 void *recv, |  | ||||||
| 							 int recv_from_rank, |  | ||||||
| 							 int bytes,int dir) |  | ||||||
| { |  | ||||||
|   int myrank = _processor; |  | ||||||
|   int ierr; |  | ||||||
|   assert(dir < communicator_halo.size()); |  | ||||||
|    |  | ||||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; |  | ||||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally |  | ||||||
|   MPI_Request req[2]; |  | ||||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); |  | ||||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); |  | ||||||
|  |  | ||||||
|   list.push_back(req[0]); |  | ||||||
|   list.push_back(req[1]); |  | ||||||
|   return 2.0*bytes; |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) |  | ||||||
| {  |  | ||||||
|   int nreq=waitall.size(); |  | ||||||
|   MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); |  | ||||||
| }; |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, |  | ||||||
| 						    int xmit_to_rank, |  | ||||||
| 						    void *recv, |  | ||||||
| 						    int recv_from_rank, |  | ||||||
| 						    int bytes,int dir) |  | ||||||
| { |  | ||||||
|   int myrank = _processor; |  | ||||||
|   int ierr; |  | ||||||
|   assert(dir < communicator_halo.size()); |  | ||||||
|    |  | ||||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; |  | ||||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally |  | ||||||
|   MPI_Request req[2]; |  | ||||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); |  | ||||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); |  | ||||||
|   MPI_Waitall(2, req, MPI_STATUSES_IGNORE); |  | ||||||
|   return 2.0*bytes; |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| @@ -59,8 +59,6 @@ void CartesianCommunicator::GlobalSum(double &){} | |||||||
| void CartesianCommunicator::GlobalSum(uint32_t &){} | void CartesianCommunicator::GlobalSum(uint32_t &){} | ||||||
| void CartesianCommunicator::GlobalSum(uint64_t &){} | void CartesianCommunicator::GlobalSum(uint64_t &){} | ||||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &){} |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &){} |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||||
| 					   void *recv, | 					   void *recv, | ||||||
|   | |||||||
| @@ -42,7 +42,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/cshift/Cshift_mpi.h> | #include <Grid/cshift/Cshift_mpi.h> | ||||||
| #endif  | #endif  | ||||||
|  |  | ||||||
| #ifdef GRID_COMMS_MPIT | #ifdef GRID_COMMS_MPI3L | ||||||
| #include <Grid/cshift/Cshift_mpi.h> | #include <Grid/cshift/Cshift_mpi.h> | ||||||
| #endif  | #endif  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										12276
									
								
								lib/json/json.hpp
									
									
									
									
									
								
							
							
						
						
									
										12276
									
								
								lib/json/json.hpp
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -235,74 +235,64 @@ public: | |||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////////////// | ||||||
|   // Constructor requires "grid" passed. |     // Constructor requires "grid" passed. | ||||||
|   // what about a default grid? |     // what about a default grid? | ||||||
|   ////////////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////////////// | ||||||
|   Lattice(GridBase *grid) : _odata(grid->oSites()) { |     Lattice(GridBase *grid) : _odata(grid->oSites()) { | ||||||
|     _grid = grid; |         _grid = grid; | ||||||
|     //        _odata.reserve(_grid->oSites()); |     //        _odata.reserve(_grid->oSites()); | ||||||
|     //        _odata.resize(_grid->oSites()); |     //        _odata.resize(_grid->oSites()); | ||||||
|     //      std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl; |     //      std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl; | ||||||
|     assert((((uint64_t)&_odata[0])&0xF) ==0); |         assert((((uint64_t)&_odata[0])&0xF) ==0); | ||||||
|     checkerboard=0; |         checkerboard=0; | ||||||
|   } |  | ||||||
|    |  | ||||||
|   Lattice(const Lattice& r){ // copy constructor |  | ||||||
|     _grid = r._grid; |  | ||||||
|     checkerboard = r.checkerboard; |  | ||||||
|     _odata.resize(_grid->oSites());// essential |  | ||||||
|     parallel_for(int ss=0;ss<_grid->oSites();ss++){ |  | ||||||
|       _odata[ss]=r._odata[ss]; |  | ||||||
|     }  	 |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|    |  | ||||||
|    |  | ||||||
|   virtual ~Lattice(void) = default; |  | ||||||
|      |  | ||||||
|   void reset(GridBase* grid) { |  | ||||||
|     if (_grid != grid) { |  | ||||||
|       _grid = grid; |  | ||||||
|       _odata.resize(grid->oSites()); |  | ||||||
|       checkerboard = 0; |  | ||||||
|     } |     } | ||||||
|   } |  | ||||||
|    |  | ||||||
|  |  | ||||||
|   template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ |     Lattice(const Lattice& r){ // copy constructor | ||||||
|     parallel_for(int ss=0;ss<_grid->oSites();ss++){ |     	_grid = r._grid; | ||||||
|       this->_odata[ss]=r; |     	checkerboard = r.checkerboard; | ||||||
|  |     	_odata.resize(_grid->oSites());// essential | ||||||
|  | 	parallel_for(int ss=0;ss<_grid->oSites();ss++){ | ||||||
|  |             _odata[ss]=r._odata[ss]; | ||||||
|  |         }  	 | ||||||
|     } |     } | ||||||
|     return *this; |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){ |     virtual ~Lattice(void) = default; | ||||||
|     this->checkerboard = r.checkerboard; |  | ||||||
|     conformable(*this,r); |  | ||||||
|      |      | ||||||
|     parallel_for(int ss=0;ss<_grid->oSites();ss++){ |     template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ | ||||||
|       this->_odata[ss]=r._odata[ss]; |       parallel_for(int ss=0;ss<_grid->oSites();ss++){ | ||||||
|  |             this->_odata[ss]=r; | ||||||
|  |         } | ||||||
|  |         return *this; | ||||||
|     } |     } | ||||||
|     return *this; |     template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){ | ||||||
|   } |       this->checkerboard = r.checkerboard; | ||||||
|    |       conformable(*this,r); | ||||||
|   // *=,+=,-= operators inherit behvour from correspond */+/- operation |        | ||||||
|   template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) { |       parallel_for(int ss=0;ss<_grid->oSites();ss++){ | ||||||
|     *this = (*this)*r; |             this->_odata[ss]=r._odata[ss]; | ||||||
|     return *this; |         } | ||||||
|   } |         return *this; | ||||||
|    |     } | ||||||
|   template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) { |  | ||||||
|     *this = (*this)-r; |     // *=,+=,-= operators inherit behvour from correspond */+/- operation | ||||||
|     return *this; |     template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) { | ||||||
|   } |         *this = (*this)*r; | ||||||
|   template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) { |         return *this; | ||||||
|     *this = (*this)+r; |     } | ||||||
|     return *this; |  | ||||||
|   } |     template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) { | ||||||
| }; // class Lattice |         *this = (*this)-r; | ||||||
|    |         return *this; | ||||||
|  |     } | ||||||
|  |     template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) { | ||||||
|  |         *this = (*this)+r; | ||||||
|  |         return *this; | ||||||
|  |     } | ||||||
|  |  }; // class Lattice | ||||||
|  |  | ||||||
|   template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ |   template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ | ||||||
|     std::vector<int> gcoor; |     std::vector<int> gcoor; | ||||||
|     typedef typename vobj::scalar_object sobj; |     typedef typename vobj::scalar_object sobj; | ||||||
| @@ -320,7 +310,7 @@ public: | |||||||
|     } |     } | ||||||
|     return stream; |     return stream; | ||||||
|   } |   } | ||||||
|    |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,37 +1,45 @@ | |||||||
| /************************************************************************************* |     /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./lib/lattice/Lattice_reduction.h |     Source file: ./lib/lattice/Lattice_reduction.h | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or |     the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. |     (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |     This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |     You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #ifndef GRID_LATTICE_REDUCTION_H | #ifndef GRID_LATTICE_REDUCTION_H | ||||||
| #define GRID_LATTICE_REDUCTION_H | #define GRID_LATTICE_REDUCTION_H | ||||||
|  |  | ||||||
| #include <Grid/Grid_Eigen_Dense.h> | #include <Grid/Eigen/Dense> | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| #ifdef GRID_WARN_SUBOPTIMAL | #ifdef GRID_WARN_SUBOPTIMAL | ||||||
| #warning "Optimisation alert all these reduction loops are NOT threaded " | #warning "Optimisation alert all these reduction loops are NOT threaded " | ||||||
| #endif      | #endif      | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Deterministic Reduction operations |     // Deterministic Reduction operations | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | ||||||
|   ComplexD nrm = innerProduct(arg,arg); |   ComplexD nrm = innerProduct(arg,arg); | ||||||
|   return std::real(nrm);  |   return std::real(nrm);  | ||||||
| @@ -328,8 +336,6 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | |||||||
|   typedef typename vobj::vector_type vector_type; |   typedef typename vobj::vector_type vector_type; | ||||||
|   typedef typename vobj::tensor_reduced tensor_reduced; |   typedef typename vobj::tensor_reduced tensor_reduced; | ||||||
|    |    | ||||||
|   scalar_type zscale(scale); |  | ||||||
|  |  | ||||||
|   GridBase *grid  = X._grid; |   GridBase *grid  = X._grid; | ||||||
|  |  | ||||||
|   int Nsimd  =grid->Nsimd(); |   int Nsimd  =grid->Nsimd(); | ||||||
| @@ -355,7 +361,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | |||||||
|       grid->iCoorFromIindex(icoor,l); |       grid->iCoorFromIindex(icoor,l); | ||||||
|       int ldx =r+icoor[orthogdim]*rd; |       int ldx =r+icoor[orthogdim]*rd; | ||||||
|       scalar_type *as =(scalar_type *)&av; |       scalar_type *as =(scalar_type *)&av; | ||||||
|       as[l] = scalar_type(a[ldx])*zscale; |       as[l] = scalar_type(a[ldx])*scale; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     tensor_reduced at; at=av; |     tensor_reduced at; at=av; | ||||||
| @@ -369,7 +375,74 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | |||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y, | ||||||
|  | 			     int Orthog,RealD scale=1.0)  | ||||||
|  | {     | ||||||
|  |   // FIXME: Implementation is slow | ||||||
|  |   // Best base the linear combination by constructing a  | ||||||
|  |   // set of vectors of size grid->_rdimensions[Orthog]. | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_type scalar_type; | ||||||
|  |   typedef typename vobj::vector_type vector_type; | ||||||
|  |    | ||||||
|  |   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||||
|  |    | ||||||
|  |   GridBase *FullGrid  = X._grid; | ||||||
|  |   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||||
|  |    | ||||||
|  |   Lattice<vobj> Xslice(SliceGrid); | ||||||
|  |   Lattice<vobj> Rslice(SliceGrid); | ||||||
|  |   // If we based this on Cshift it would work for spread out | ||||||
|  |   // but it would be even slower | ||||||
|  |   for(int i=0;i<Nblock;i++){ | ||||||
|  |     ExtractSlice(Rslice,Y,i,Orthog); | ||||||
|  |     ExtractSlice(Xslice,X,i,Orthog); | ||||||
|  |     Rslice = Rslice + Xslice*(scale*a[i]); | ||||||
|  |     InsertSlice(Rslice,R,i,Orthog); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | ||||||
|  |   { | ||||||
|  |     // FIXME: Implementation is slow | ||||||
|  |     // Look at localInnerProduct implementation, | ||||||
|  |     // and do inside a site loop with block strided iterators | ||||||
|  |     typedef typename vobj::scalar_object sobj; | ||||||
|  |     typedef typename vobj::scalar_type scalar_type; | ||||||
|  |     typedef typename vobj::vector_type vector_type; | ||||||
|  |     typedef typename vobj::tensor_reduced scalar; | ||||||
|  |     typedef typename scalar::scalar_object  scomplex; | ||||||
|  |    | ||||||
|  |     int Nblock = lhs._grid->GlobalDimensions()[Orthog]; | ||||||
|  |  | ||||||
|  |     vec.resize(Nblock); | ||||||
|  |     std::vector<scomplex> sip(Nblock); | ||||||
|  |     Lattice<scalar> IP(lhs._grid);  | ||||||
|  |  | ||||||
|  |     IP=localInnerProduct(lhs,rhs); | ||||||
|  |     sliceSum(IP,sip,Orthog); | ||||||
|  |    | ||||||
|  |     for(int ss=0;ss<Nblock;ss++){ | ||||||
|  |       vec[ss] = TensorRemove(sip[ss]); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | ////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // FIXME: Implementation is slow | ||||||
|  | // If we based this on Cshift it would work for spread out | ||||||
|  | // but it would be even slower | ||||||
|  | // | ||||||
|  | // Repeated extract slice is inefficient | ||||||
|  | // | ||||||
|  | // Best base the linear combination by constructing a  | ||||||
|  | // set of vectors of size grid->_rdimensions[Orthog]. | ||||||
|  | ////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
| inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) | inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) | ||||||
| { | { | ||||||
|   int NN    = BlockSolverGrid->_ndimension; |   int NN    = BlockSolverGrid->_ndimension; | ||||||
| @@ -388,7 +461,7 @@ inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or | |||||||
|   } |   } | ||||||
|   return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);  |   return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);  | ||||||
| } | } | ||||||
| */ |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)  | static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)  | ||||||
| @@ -398,175 +471,60 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice | |||||||
|   typedef typename vobj::vector_type vector_type; |   typedef typename vobj::vector_type vector_type; | ||||||
|  |  | ||||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; |   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||||
|  |    | ||||||
|   GridBase *FullGrid  = X._grid; |   GridBase *FullGrid  = X._grid; | ||||||
|   //  GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); |   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||||
|  |    | ||||||
|   //  Lattice<vobj> Xslice(SliceGrid); |   Lattice<vobj> Xslice(SliceGrid); | ||||||
|   //  Lattice<vobj> Rslice(SliceGrid); |   Lattice<vobj> Rslice(SliceGrid); | ||||||
|  |    | ||||||
|   assert( FullGrid->_simd_layout[Orthog]==1); |   for(int i=0;i<Nblock;i++){ | ||||||
|   int nh =  FullGrid->_ndimension; |     ExtractSlice(Rslice,Y,i,Orthog); | ||||||
|   //  int nl = SliceGrid->_ndimension; |     for(int j=0;j<Nblock;j++){ | ||||||
|   int nl = nh-1; |       ExtractSlice(Xslice,X,j,Orthog); | ||||||
|  |       Rslice = Rslice + Xslice*(scale*aa(j,i)); | ||||||
|   //FIXME package in a convenient iterator |     } | ||||||
|   //Should loop over a plane orthogonal to direction "Orthog" |     InsertSlice(Rslice,R,i,Orthog); | ||||||
|   int stride=FullGrid->_slice_stride[Orthog]; |  | ||||||
|   int block =FullGrid->_slice_block [Orthog]; |  | ||||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; |  | ||||||
|   int ostride=FullGrid->_ostride[Orthog]; |  | ||||||
| #pragma omp parallel  |  | ||||||
|   { |  | ||||||
|     std::vector<vobj> s_x(Nblock); |  | ||||||
|  |  | ||||||
| #pragma omp for collapse(2) |  | ||||||
|     for(int n=0;n<nblock;n++){ |  | ||||||
|     for(int b=0;b<block;b++){ |  | ||||||
|       int o  = n*stride + b; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<Nblock;i++){ |  | ||||||
| 	s_x[i] = X[o+i*ostride]; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       vobj dot; |  | ||||||
|       for(int i=0;i<Nblock;i++){ |  | ||||||
| 	dot = Y[o+i*ostride]; |  | ||||||
| 	for(int j=0;j<Nblock;j++){ |  | ||||||
| 	  dot = dot + s_x[j]*(scale*aa(j,i)); |  | ||||||
| 	} |  | ||||||
| 	R[o+i*ostride]=dot; |  | ||||||
|       } |  | ||||||
|     }} |  | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)  |  | ||||||
| {     |  | ||||||
|   typedef typename vobj::scalar_object sobj; |  | ||||||
|   typedef typename vobj::scalar_type scalar_type; |  | ||||||
|   typedef typename vobj::vector_type vector_type; |  | ||||||
|  |  | ||||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; |  | ||||||
|  |  | ||||||
|   GridBase *FullGrid  = X._grid; |  | ||||||
|   //  GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); |  | ||||||
|   //  Lattice<vobj> Xslice(SliceGrid); |  | ||||||
|   //  Lattice<vobj> Rslice(SliceGrid); |  | ||||||
|  |  | ||||||
|   assert( FullGrid->_simd_layout[Orthog]==1); |  | ||||||
|   int nh =  FullGrid->_ndimension; |  | ||||||
|   //  int nl = SliceGrid->_ndimension; |  | ||||||
|   int nl=1; |  | ||||||
|  |  | ||||||
|   //FIXME package in a convenient iterator |  | ||||||
|   //Should loop over a plane orthogonal to direction "Orthog" |  | ||||||
|   int stride=FullGrid->_slice_stride[Orthog]; |  | ||||||
|   int block =FullGrid->_slice_block [Orthog]; |  | ||||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; |  | ||||||
|   int ostride=FullGrid->_ostride[Orthog]; |  | ||||||
| #pragma omp parallel  |  | ||||||
|   { |  | ||||||
|     std::vector<vobj> s_x(Nblock); |  | ||||||
|  |  | ||||||
| #pragma omp for collapse(2) |  | ||||||
|     for(int n=0;n<nblock;n++){ |  | ||||||
|     for(int b=0;b<block;b++){ |  | ||||||
|       int o  = n*stride + b; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<Nblock;i++){ |  | ||||||
| 	s_x[i] = X[o+i*ostride]; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       vobj dot; |  | ||||||
|       for(int i=0;i<Nblock;i++){ |  | ||||||
| 	dot = s_x[0]*(scale*aa(0,i)); |  | ||||||
| 	for(int j=1;j<Nblock;j++){ |  | ||||||
| 	  dot = dot + s_x[j]*(scale*aa(j,i)); |  | ||||||
| 	} |  | ||||||
| 	R[o+i*ostride]=dot; |  | ||||||
|       } |  | ||||||
|     }} |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| static void sliceInnerProductMatrix(  Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | static void sliceInnerProductMatrix(  Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | ||||||
| { | { | ||||||
|  |   // FIXME: Implementation is slow | ||||||
|  |   // Not sure of best solution.. think about it | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|   typedef typename vobj::scalar_type scalar_type; |   typedef typename vobj::scalar_type scalar_type; | ||||||
|   typedef typename vobj::vector_type vector_type; |   typedef typename vobj::vector_type vector_type; | ||||||
|    |    | ||||||
|   GridBase *FullGrid  = lhs._grid; |   GridBase *FullGrid  = lhs._grid; | ||||||
|   //  GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); |   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||||
|    |    | ||||||
|   int Nblock = FullGrid->GlobalDimensions()[Orthog]; |   int Nblock = FullGrid->GlobalDimensions()[Orthog]; | ||||||
|    |    | ||||||
|   //  Lattice<vobj> Lslice(SliceGrid); |   Lattice<vobj> Lslice(SliceGrid); | ||||||
|   //  Lattice<vobj> Rslice(SliceGrid); |   Lattice<vobj> Rslice(SliceGrid); | ||||||
|    |    | ||||||
|   mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); |   mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |    | ||||||
|   assert( FullGrid->_simd_layout[Orthog]==1); |  | ||||||
|   int nh =  FullGrid->_ndimension; |  | ||||||
|   //  int nl = SliceGrid->_ndimension; |  | ||||||
|   int nl = nh-1; |  | ||||||
|  |  | ||||||
|   //FIXME package in a convenient iterator |  | ||||||
|   //Should loop over a plane orthogonal to direction "Orthog" |  | ||||||
|   int stride=FullGrid->_slice_stride[Orthog]; |  | ||||||
|   int block =FullGrid->_slice_block [Orthog]; |  | ||||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; |  | ||||||
|   int ostride=FullGrid->_ostride[Orthog]; |  | ||||||
|  |  | ||||||
|   typedef typename vobj::vector_typeD vector_typeD; |  | ||||||
|  |  | ||||||
| #pragma omp parallel  |  | ||||||
|   { |  | ||||||
|     std::vector<vobj> Left(Nblock); |  | ||||||
|     std::vector<vobj> Right(Nblock); |  | ||||||
|     Eigen::MatrixXcd  mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
| #pragma omp for collapse(2) |  | ||||||
|     for(int n=0;n<nblock;n++){ |  | ||||||
|     for(int b=0;b<block;b++){ |  | ||||||
|  |  | ||||||
|       int o  = n*stride + b; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<Nblock;i++){ |  | ||||||
| 	Left [i] = lhs[o+i*ostride]; |  | ||||||
| 	Right[i] = rhs[o+i*ostride]; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       for(int i=0;i<Nblock;i++){ |  | ||||||
|       for(int j=0;j<Nblock;j++){ |  | ||||||
| 	auto tmp = innerProduct(Left[i],Right[j]); |  | ||||||
| 	//	vector_typeD rtmp = TensorRemove(tmp); |  | ||||||
| 	auto rtmp = TensorRemove(tmp); |  | ||||||
| 	mat_thread(i,j) += Reduce(rtmp); |  | ||||||
|       }} |  | ||||||
|     }} |  | ||||||
| #pragma omp critical |  | ||||||
|     { |  | ||||||
|       mat += mat_thread; |  | ||||||
|     }   |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   for(int i=0;i<Nblock;i++){ |   for(int i=0;i<Nblock;i++){ | ||||||
|   for(int j=0;j<Nblock;j++){ |     ExtractSlice(Lslice,lhs,i,Orthog); | ||||||
|     ComplexD sum = mat(i,j); |     for(int j=0;j<Nblock;j++){ | ||||||
|     FullGrid->GlobalSum(sum); |       ExtractSlice(Rslice,rhs,j,Orthog); | ||||||
|     mat(i,j)=sum; |       mat(i,j) = innerProduct(Lslice,Rslice); | ||||||
|   }} |     } | ||||||
|  |   } | ||||||
|  | #undef FORCE_DIAG | ||||||
|  | #ifdef FORCE_DIAG | ||||||
|  |   for(int i=0;i<Nblock;i++){ | ||||||
|  |     for(int j=0;j<Nblock;j++){ | ||||||
|  |       if ( i != j ) mat(i,j)=0.0; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | #endif | ||||||
|   return; |   return; | ||||||
| } | } | ||||||
|  |  | ||||||
| } /*END NAMESPACE GRID*/ | } /*END NAMESPACE GRID*/ | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -6,8 +6,8 @@ | |||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|     Author: Guido Cossu <guido.cossu@ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -75,55 +75,6 @@ namespace Grid { | |||||||
|     return multiplicity; |     return multiplicity; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|    |  | ||||||
| // merge of April 11 2017 |  | ||||||
| //<<<<<<< HEAD |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   // this function is necessary for the LS vectorised field |  | ||||||
|   inline int RNGfillable_general(GridBase *coarse,GridBase *fine) |  | ||||||
|   { |  | ||||||
|     int rngdims = coarse->_ndimension; |  | ||||||
|      |  | ||||||
|     // trivially extended in higher dims, with locality guaranteeing RNG state is local to node |  | ||||||
|     int lowerdims   = fine->_ndimension - coarse->_ndimension;  assert(lowerdims >= 0); |  | ||||||
|     // assumes that the higher dimensions are not using more processors |  | ||||||
|     // all further divisions are local |  | ||||||
|     for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1); |  | ||||||
|     for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]); |  | ||||||
|      |  | ||||||
|  |  | ||||||
|     // then divide the number of local sites |  | ||||||
|     // check that the total number of sims agree, meanse the iSites are the same |  | ||||||
|     assert(fine->Nsimd() == coarse->Nsimd()); |  | ||||||
|  |  | ||||||
|     // check that the two grids divide cleanly |  | ||||||
|     assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() ); |  | ||||||
|  |  | ||||||
|     return fine->lSites() / coarse->lSites(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /* |  | ||||||
|   // Wrap seed_seq to give common interface with random_device |  | ||||||
|   class fixedSeed { |  | ||||||
|   public: |  | ||||||
|     typedef std::seed_seq::result_type result_type; |  | ||||||
|     std::seed_seq src; |  | ||||||
|      |  | ||||||
|     fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {}; |  | ||||||
|  |  | ||||||
|     result_type operator () (void){ |  | ||||||
|       std::vector<result_type> list(1); |  | ||||||
|       src.generate(list.begin(),list.end()); |  | ||||||
|       return list[0]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| ======= |  | ||||||
| >>>>>>> develop |  | ||||||
|   */ |  | ||||||
|    |  | ||||||
|   // real scalars are one component |   // real scalars are one component | ||||||
|   template<class scalar,class distribution,class generator>  |   template<class scalar,class distribution,class generator>  | ||||||
|   void fillScalar(scalar &s,distribution &dist,generator & gen) |   void fillScalar(scalar &s,distribution &dist,generator & gen) | ||||||
| @@ -158,7 +109,7 @@ namespace Grid { | |||||||
| #ifdef RNG_SITMO | #ifdef RNG_SITMO | ||||||
|     typedef sitmo::prng_engine 	RngEngine; |     typedef sitmo::prng_engine 	RngEngine; | ||||||
|     typedef uint64_t    	RngStateType; |     typedef uint64_t    	RngStateType; | ||||||
|     static const int    	RngStateCount = 13; |     static const int    	RngStateCount = 4; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     std::vector<RngEngine>                             _generators; |     std::vector<RngEngine>                             _generators; | ||||||
| @@ -213,7 +164,7 @@ namespace Grid { | |||||||
|       ss<<eng; |       ss<<eng; | ||||||
|       ss.seekg(0,ss.beg); |       ss.seekg(0,ss.beg); | ||||||
|       for(int i=0;i<RngStateCount;i++){ |       for(int i=0;i<RngStateCount;i++){ | ||||||
|         ss>>saved[i]; | 	ss>>saved[i]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void GetState(std::vector<RngStateType> & saved,int gen) { |     void GetState(std::vector<RngStateType> & saved,int gen) { | ||||||
| @@ -223,7 +174,7 @@ namespace Grid { | |||||||
|       assert(saved.size()==RngStateCount); |       assert(saved.size()==RngStateCount); | ||||||
|       std::stringstream ss; |       std::stringstream ss; | ||||||
|       for(int i=0;i<RngStateCount;i++){ |       for(int i=0;i<RngStateCount;i++){ | ||||||
|         ss<< saved[i]<<" "; | 	ss<< saved[i]<<" "; | ||||||
|       } |       } | ||||||
|       ss.seekg(0,ss.beg); |       ss.seekg(0,ss.beg); | ||||||
|       ss>>eng; |       ss>>eng; | ||||||
| @@ -264,7 +215,7 @@ namespace Grid { | |||||||
|  |  | ||||||
|       dist[0].reset(); |       dist[0].reset(); | ||||||
|       for(int idx=0;idx<words;idx++){ |       for(int idx=0;idx<words;idx++){ | ||||||
|   fillScalar(buf[idx],dist[0],_generators[0]); | 	fillScalar(buf[idx],dist[0],_generators[0]); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); |       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); | ||||||
| @@ -296,7 +247,7 @@ namespace Grid { | |||||||
|       RealF *pointer=(RealF *)&l; |       RealF *pointer=(RealF *)&l; | ||||||
|       dist[0].reset(); |       dist[0].reset(); | ||||||
|       for(int i=0;i<2*vComplexF::Nsimd();i++){ |       for(int i=0;i<2*vComplexF::Nsimd();i++){ | ||||||
|   fillScalar(pointer[i],dist[0],_generators[0]); | 	fillScalar(pointer[i],dist[0],_generators[0]); | ||||||
|       } |       } | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); |       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); | ||||||
|     } |     } | ||||||
| @@ -304,7 +255,7 @@ namespace Grid { | |||||||
|       RealD *pointer=(RealD *)&l; |       RealD *pointer=(RealD *)&l; | ||||||
|       dist[0].reset(); |       dist[0].reset(); | ||||||
|       for(int i=0;i<2*vComplexD::Nsimd();i++){ |       for(int i=0;i<2*vComplexD::Nsimd();i++){ | ||||||
|   fillScalar(pointer[i],dist[0],_generators[0]); | 	fillScalar(pointer[i],dist[0],_generators[0]); | ||||||
|       } |       } | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); |       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); | ||||||
|     } |     } | ||||||
| @@ -312,7 +263,7 @@ namespace Grid { | |||||||
|       RealF *pointer=(RealF *)&l; |       RealF *pointer=(RealF *)&l; | ||||||
|       dist[0].reset(); |       dist[0].reset(); | ||||||
|       for(int i=0;i<vRealF::Nsimd();i++){ |       for(int i=0;i<vRealF::Nsimd();i++){ | ||||||
|   fillScalar(pointer[i],dist[0],_generators[0]); | 	fillScalar(pointer[i],dist[0],_generators[0]); | ||||||
|       } |       } | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); |       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); | ||||||
|     } |     } | ||||||
| @@ -324,7 +275,7 @@ namespace Grid { | |||||||
|       } |       } | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); |       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); | ||||||
|     } |     } | ||||||
|      |  | ||||||
|     void SeedFixedIntegers(const std::vector<int> &seeds){ |     void SeedFixedIntegers(const std::vector<int> &seeds){ | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); |       CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); | ||||||
|       std::seed_seq src(seeds.begin(),seeds.end()); |       std::seed_seq src(seeds.begin(),seeds.end()); | ||||||
| @@ -333,20 +284,18 @@ namespace Grid { | |||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   class GridParallelRNG : public GridRNGbase { |   class GridParallelRNG : public GridRNGbase { | ||||||
|  |  | ||||||
|     double _time_counter; |  | ||||||
|  |  | ||||||
|   public: |   public: | ||||||
|     GridBase *_grid; |     GridBase *_grid; | ||||||
|     unsigned int _vol; |     int _vol; | ||||||
|  |   public: | ||||||
|  |  | ||||||
|     int generator_idx(int os,int is) { |     int generator_idx(int os,int is){ | ||||||
|       return is*_grid->oSites()+os; |       return is*_grid->oSites()+os; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     GridParallelRNG(GridBase *grid) : GridRNGbase() { |     GridParallelRNG(GridBase *grid) : GridRNGbase() { | ||||||
|       _grid = grid; |       _grid=grid; | ||||||
|       _vol  =_grid->iSites()*_grid->oSites(); |       _vol =_grid->iSites()*_grid->oSites(); | ||||||
|  |  | ||||||
|       _generators.resize(_vol); |       _generators.resize(_vol); | ||||||
|       _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); |       _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); | ||||||
| @@ -360,34 +309,33 @@ namespace Grid { | |||||||
|       typedef typename vobj::scalar_object scalar_object; |       typedef typename vobj::scalar_object scalar_object; | ||||||
|       typedef typename vobj::scalar_type scalar_type; |       typedef typename vobj::scalar_type scalar_type; | ||||||
|       typedef typename vobj::vector_type vector_type; |       typedef typename vobj::vector_type vector_type; | ||||||
|  |        | ||||||
|  |       int multiplicity = RNGfillable(_grid,l._grid); | ||||||
|  |  | ||||||
|       double inner_time_counter = usecond(); |       int     Nsimd =_grid->Nsimd(); | ||||||
|  |       int     osites=_grid->oSites(); | ||||||
|       int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid |       int words=sizeof(scalar_object)/sizeof(scalar_type); | ||||||
|       int Nsimd  = _grid->Nsimd();  // guaranteed to be the same for l._grid too |  | ||||||
|       int osites = _grid->oSites();  // guaranteed to be <= l._grid->oSites() by a factor multiplicity |  | ||||||
|       int words  = sizeof(scalar_object) / sizeof(scalar_type); |  | ||||||
|  |  | ||||||
|       parallel_for(int ss=0;ss<osites;ss++){ |       parallel_for(int ss=0;ss<osites;ss++){ | ||||||
|         std::vector<scalar_object> buf(Nsimd); |  | ||||||
|         for (int m = 0; m < multiplicity; m++) {  // Draw from same generator multiplicity times |  | ||||||
|  |  | ||||||
|           int sm = multiplicity * ss + m;  // Maps the generator site to the fine site | 	std::vector<scalar_object> buf(Nsimd); | ||||||
|  | 	for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times | ||||||
|  |  | ||||||
|           for (int si = 0; si < Nsimd; si++) { | 	  int sm=multiplicity*ss+m;      // Maps the generator site to the fine site | ||||||
|              |  | ||||||
|             int gdx = generator_idx(ss, si);  // index of generator state | 	  for(int si=0;si<Nsimd;si++){ | ||||||
|             scalar_type *pointer = (scalar_type *)&buf[si]; | 	    int gdx = generator_idx(ss,si); // index of generator state | ||||||
|             dist[gdx].reset(); | 	    scalar_type *pointer = (scalar_type *)&buf[si]; | ||||||
|             for (int idx = 0; idx < words; idx++)  | 	    dist[gdx].reset(); | ||||||
|               fillScalar(pointer[idx], dist[gdx], _generators[gdx]); | 	    for(int idx=0;idx<words;idx++){ | ||||||
|           } | 	      fillScalar(pointer[idx],dist[gdx],_generators[gdx]); | ||||||
|           // merge into SIMD lanes, FIXME suboptimal implementation | 	    } | ||||||
|           merge(l._odata[sm], buf); | 	  } | ||||||
|         } |  | ||||||
|  | 	  // merge into SIMD lanes | ||||||
|  | 	  merge(l._odata[sm],buf); | ||||||
|  | 	} | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       _time_counter += usecond()- inner_time_counter; |  | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     void SeedFixedIntegers(const std::vector<int> &seeds){ |     void SeedFixedIntegers(const std::vector<int> &seeds){ | ||||||
| @@ -464,12 +412,6 @@ namespace Grid { | |||||||
|       } |       } | ||||||
| #endif | #endif | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void Report(){ |  | ||||||
|       std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////// | ||||||
|     // Support for rigorous test of RNG's |     // Support for rigorous test of RNG's | ||||||
|     // Return uniform random uint32_t from requested site generator |     // Return uniform random uint32_t from requested site generator | ||||||
| @@ -477,6 +419,7 @@ namespace Grid { | |||||||
|     uint32_t GlobalU01(int gsite){ |     uint32_t GlobalU01(int gsite){ | ||||||
|  |  | ||||||
|       uint32_t the_number; |       uint32_t the_number; | ||||||
|  |  | ||||||
|       // who |       // who | ||||||
|       std::vector<int> gcoor; |       std::vector<int> gcoor; | ||||||
|       int rank,o_idx,i_idx; |       int rank,o_idx,i_idx; | ||||||
|   | |||||||
| @@ -551,10 +551,7 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | |||||||
|  |  | ||||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||||
| template<typename vobj, typename sobj> | template<typename vobj, typename sobj> | ||||||
| typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type  | typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){ | ||||||
| unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in) |  | ||||||
| { |  | ||||||
|  |  | ||||||
|   typedef typename vobj::vector_type vtype; |   typedef typename vobj::vector_type vtype; | ||||||
|    |    | ||||||
|   GridBase* in_grid = in._grid; |   GridBase* in_grid = in._grid; | ||||||
| @@ -593,54 +590,6 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in) | |||||||
|     extract1(in_vobj, out_ptrs, 0); |     extract1(in_vobj, out_ptrs, 0); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order |  | ||||||
| template<typename vobj, typename sobj> |  | ||||||
| typename std::enable_if<isSIMDvectorized<vobj>::value  |  | ||||||
|                     && !isSIMDvectorized<sobj>::value, void>::type  |  | ||||||
| vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out) |  | ||||||
| { |  | ||||||
|  |  | ||||||
|   typedef typename vobj::vector_type vtype; |  | ||||||
|    |  | ||||||
|   GridBase* grid = out._grid; |  | ||||||
|   assert(in.size()==grid->lSites()); |  | ||||||
|    |  | ||||||
|   int ndim     = grid->Nd(); |  | ||||||
|   int nsimd    = vtype::Nsimd(); |  | ||||||
|  |  | ||||||
|   std::vector<std::vector<int> > icoor(nsimd); |  | ||||||
|        |  | ||||||
|   for(int lane=0; lane < nsimd; lane++){ |  | ||||||
|     icoor[lane].resize(ndim); |  | ||||||
|     grid->iCoorFromIindex(icoor[lane],lane); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index |  | ||||||
|     //Assemble vector of pointers to output elements |  | ||||||
|     std::vector<sobj*> ptrs(nsimd); |  | ||||||
|  |  | ||||||
|     std::vector<int> ocoor(ndim); |  | ||||||
|     grid->oCoorFromOindex(ocoor, oidx); |  | ||||||
|  |  | ||||||
|     std::vector<int> lcoor(grid->Nd()); |  | ||||||
|        |  | ||||||
|     for(int lane=0; lane < nsimd; lane++){ |  | ||||||
|  |  | ||||||
|       for(int mu=0;mu<ndim;mu++){ |  | ||||||
| 	lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu]; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       int lex; |  | ||||||
|       Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions); |  | ||||||
|       ptrs[lane] = &in[lex]; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     //pack from those ptrs |  | ||||||
|     vobj vecobj; |  | ||||||
|     merge1(vecobj, ptrs, 0); |  | ||||||
|     out._odata[oidx] = vecobj;  |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //Convert a Lattice from one precision to another | //Convert a Lattice from one precision to another | ||||||
| template<class VobjOut, class VobjIn> | template<class VobjOut, class VobjIn> | ||||||
| @@ -666,7 +615,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | |||||||
|   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); |   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); | ||||||
|   unvectorizeToLexOrdArray(in_slex_conv, in); |   unvectorizeToLexOrdArray(in_slex_conv, in); | ||||||
|      |      | ||||||
|   parallel_for(uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ |   parallel_for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ | ||||||
|     std::vector<int> out_ocoor(ndim); |     std::vector<int> out_ocoor(ndim); | ||||||
|     out_grid->oCoorFromOindex(out_ocoor, out_oidx); |     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -62,20 +62,14 @@ namespace Grid { | |||||||
|     return ret; |     return ret; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ |   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ | ||||||
|     Lattice<obj> ret(rhs._grid); |     Lattice<obj> ret(rhs._grid); | ||||||
|     ret.checkerboard = rhs.checkerboard; |     ret.checkerboard = rhs.checkerboard; | ||||||
|     conformable(ret,rhs); |     conformable(ret,rhs); | ||||||
|     parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ |     parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ | ||||||
|       ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); |       ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return ret; |     return ret; | ||||||
|  |  | ||||||
|      |  | ||||||
|      |  | ||||||
|  |  | ||||||
|      |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -30,7 +30,6 @@ directory | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/util/CompilerCompatible.h> |  | ||||||
|  |  | ||||||
| #include <cxxabi.h> | #include <cxxabi.h> | ||||||
| #include <memory> | #include <memory> | ||||||
| @@ -95,7 +94,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | |||||||
| //////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////// | ||||||
| void Grid_quiesce_nodes(void) { | void Grid_quiesce_nodes(void) { | ||||||
|   int me = 0; |   int me = 0; | ||||||
| #if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) | #if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L) | ||||||
|   MPI_Comm_rank(MPI_COMM_WORLD, &me); |   MPI_Comm_rank(MPI_COMM_WORLD, &me); | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_SHMEM | #ifdef GRID_COMMS_SHMEM | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,716 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/parallelIO/IldgIO.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef GRID_ILDG_IO_H |  | ||||||
| #define GRID_ILDG_IO_H |  | ||||||
|  |  | ||||||
| #ifdef HAVE_LIME |  | ||||||
| #include <algorithm> |  | ||||||
| #include <fstream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <iostream> |  | ||||||
| #include <map> |  | ||||||
|  |  | ||||||
| #include <pwd.h> |  | ||||||
| #include <sys/utsname.h> |  | ||||||
| #include <unistd.h> |  | ||||||
|  |  | ||||||
| //C-Lime is a must have for this functionality |  | ||||||
| extern "C" {   |  | ||||||
| #include "lime.h" |  | ||||||
| } |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
| namespace QCD { |  | ||||||
|  |  | ||||||
|   ///////////////////////////////// |  | ||||||
|   // Encode word types as strings |  | ||||||
|   ///////////////////////////////// |  | ||||||
|  template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); } |  | ||||||
|  template<> inline std::string ScidacWordMnemonic<double>  (void){ return std::string("D"); } |  | ||||||
|  template<> inline std::string ScidacWordMnemonic<float>   (void){ return std::string("F"); } |  | ||||||
|  template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); } |  | ||||||
|  template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); } |  | ||||||
|  template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); } |  | ||||||
|  template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); } |  | ||||||
|  |  | ||||||
|   ///////////////////////////////////////// |  | ||||||
|   // Encode a generic tensor as a string |  | ||||||
|   ///////////////////////////////////////// |  | ||||||
|  template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {  |  | ||||||
|  |  | ||||||
|    typedef typename getPrecision<vobj>::real_scalar_type stype; |  | ||||||
|  |  | ||||||
|    int _ColourN       = indexRank<ColourIndex,vobj>(); |  | ||||||
|    int _ColourScalar  =  isScalar<ColourIndex,vobj>(); |  | ||||||
|    int _ColourVector  =  isVector<ColourIndex,vobj>(); |  | ||||||
|    int _ColourMatrix  =  isMatrix<ColourIndex,vobj>(); |  | ||||||
|  |  | ||||||
|    int _SpinN       = indexRank<SpinIndex,vobj>(); |  | ||||||
|    int _SpinScalar  =  isScalar<SpinIndex,vobj>(); |  | ||||||
|    int _SpinVector  =  isVector<SpinIndex,vobj>(); |  | ||||||
|    int _SpinMatrix  =  isMatrix<SpinIndex,vobj>(); |  | ||||||
|  |  | ||||||
|    int _LorentzN       = indexRank<LorentzIndex,vobj>(); |  | ||||||
|    int _LorentzScalar  =  isScalar<LorentzIndex,vobj>(); |  | ||||||
|    int _LorentzVector  =  isVector<LorentzIndex,vobj>(); |  | ||||||
|    int _LorentzMatrix  =  isMatrix<LorentzIndex,vobj>(); |  | ||||||
|  |  | ||||||
|    std::stringstream stream; |  | ||||||
|  |  | ||||||
|    stream << "GRID_"; |  | ||||||
|    stream << ScidacWordMnemonic<stype>(); |  | ||||||
|  |  | ||||||
|    //   std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl; |  | ||||||
|    //   std::cout << " Spin    N/S/V/M : " << _SpinN   <<" "<<_SpinScalar   <<"/"<<_SpinVector   <<"/"<<_SpinMatrix<<std::endl; |  | ||||||
|    //   std::cout << " Colour  N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl; |  | ||||||
|  |  | ||||||
|    if ( _LorentzVector )   stream << "_LorentzVector"<<_LorentzN; |  | ||||||
|    if ( _LorentzMatrix )   stream << "_LorentzMatrix"<<_LorentzN; |  | ||||||
|  |  | ||||||
|    if ( _SpinVector )   stream << "_SpinVector"<<_SpinN; |  | ||||||
|    if ( _SpinMatrix )   stream << "_SpinMatrix"<<_SpinN; |  | ||||||
|  |  | ||||||
|    if ( _ColourVector )   stream << "_ColourVector"<<_ColourN; |  | ||||||
|    if ( _ColourMatrix )   stream << "_ColourMatrix"<<_ColourN; |  | ||||||
|  |  | ||||||
|    if ( _ColourScalar && _LorentzScalar && _SpinScalar )   stream << "_Complex"; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|    typesize = sizeof(typename vobj::scalar_type); |  | ||||||
|  |  | ||||||
|    if ( _ColourMatrix ) typesize*= _ColourN*_ColourN; |  | ||||||
|    else                 typesize*= _ColourN; |  | ||||||
|  |  | ||||||
|    if ( _SpinMatrix )   typesize*= _SpinN*_SpinN; |  | ||||||
|    else                 typesize*= _SpinN; |  | ||||||
|  |  | ||||||
|    colors    = _ColourN; |  | ||||||
|    spins     = _SpinN; |  | ||||||
|    datacount = _LorentzN; |  | ||||||
|  |  | ||||||
|    return stream.str(); |  | ||||||
|  } |  | ||||||
|   |  | ||||||
|  template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {  |  | ||||||
|    return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount); |  | ||||||
|  }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  //////////////////////////////////////////////////////////// |  | ||||||
|  // Helper to fill out metadata |  | ||||||
|  //////////////////////////////////////////////////////////// |  | ||||||
|  template<class vobj> void ScidacMetaData(Lattice<vobj> & field, |  | ||||||
| 					  FieldMetaData &header, |  | ||||||
| 					  scidacRecord & _scidacRecord, |  | ||||||
| 					  scidacFile   & _scidacFile)  |  | ||||||
|  { |  | ||||||
|    typedef typename getPrecision<vobj>::real_scalar_type stype; |  | ||||||
|  |  | ||||||
|    ///////////////////////////////////// |  | ||||||
|    // Pull Grid's metadata |  | ||||||
|    ///////////////////////////////////// |  | ||||||
|    PrepareMetaData(field,header); |  | ||||||
|  |  | ||||||
|    ///////////////////////////////////// |  | ||||||
|    // Scidac Private File structure |  | ||||||
|    ///////////////////////////////////// |  | ||||||
|    _scidacFile              = scidacFile(field._grid); |  | ||||||
|  |  | ||||||
|    ///////////////////////////////////// |  | ||||||
|    // Scidac Private Record structure |  | ||||||
|    ///////////////////////////////////// |  | ||||||
|    scidacRecord sr; |  | ||||||
|    sr.datatype   = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount); |  | ||||||
|    sr.date       = header.creation_date; |  | ||||||
|    sr.precision  = ScidacWordMnemonic<stype>(); |  | ||||||
|    sr.recordtype = GRID_IO_FIELD; |  | ||||||
|  |  | ||||||
|    _scidacRecord = sr; |  | ||||||
|  |  | ||||||
|    std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
|  /////////////////////////////////////////////////////// |  | ||||||
|  // Scidac checksum |  | ||||||
|  /////////////////////////////////////////////////////// |  | ||||||
|  static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb) |  | ||||||
|  { |  | ||||||
|    uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16); |  | ||||||
|    uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); |  | ||||||
|    if ( scidac_csuma !=scidac_checksuma) return 0; |  | ||||||
|    if ( scidac_csumb !=scidac_checksumb) return 0; |  | ||||||
|     return 1; |  | ||||||
|  } |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Lime, ILDG and Scidac I/O classes |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| class GridLimeReader : public BinaryIO { |  | ||||||
|  public: |  | ||||||
|    /////////////////////////////////////////////////// |  | ||||||
|    // FIXME: format for RNG? Now just binary out instead |  | ||||||
|    /////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|    FILE       *File; |  | ||||||
|    LimeReader *LimeR; |  | ||||||
|    std::string filename; |  | ||||||
|  |  | ||||||
|    ///////////////////////////////////////////// |  | ||||||
|    // Open the file |  | ||||||
|    ///////////////////////////////////////////// |  | ||||||
|    void open(std::string &_filename)  |  | ||||||
|    { |  | ||||||
|      filename= _filename; |  | ||||||
|      File = fopen(filename.c_str(), "r"); |  | ||||||
|      LimeR = limeCreateReader(File); |  | ||||||
|    } |  | ||||||
|    ///////////////////////////////////////////// |  | ||||||
|    // Close the file |  | ||||||
|    ///////////////////////////////////////////// |  | ||||||
|    void close(void){ |  | ||||||
|      fclose(File); |  | ||||||
|      //     limeDestroyReader(LimeR); |  | ||||||
|    } |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   // Read a generic lattice field and verify checksum |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   template<class vobj> |  | ||||||
|   void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) |  | ||||||
|   { |  | ||||||
|     typedef typename vobj::scalar_object sobj; |  | ||||||
|     scidacChecksum scidacChecksum_; |  | ||||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; |  | ||||||
|  |  | ||||||
|     std::string format = getFormatString<vobj>(); |  | ||||||
|  |  | ||||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl; |  | ||||||
| 	 |  | ||||||
|       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	off_t offset= ftell(File); |  | ||||||
| 	BinarySimpleMunger<sobj,sobj> munge; |  | ||||||
| 	BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); |  | ||||||
|  |  | ||||||
| 	///////////////////////////////////////////// |  | ||||||
| 	// Insist checksum is next record |  | ||||||
| 	///////////////////////////////////////////// |  | ||||||
| 	readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); |  | ||||||
|  |  | ||||||
| 	///////////////////////////////////////////// |  | ||||||
| 	// Verify checksums |  | ||||||
| 	///////////////////////////////////////////// |  | ||||||
| 	scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); |  | ||||||
| 	return; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   // Read a generic serialisable object |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   template<class serialisable_object> |  | ||||||
|   void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) |  | ||||||
|   { |  | ||||||
|     std::string xmlstring; |  | ||||||
|     // should this be a do while; can we miss a first record?? |  | ||||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  |  | ||||||
|  |  | ||||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) |  | ||||||
|  |  | ||||||
|       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { |  | ||||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); |  | ||||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     |  | ||||||
| 	XmlReader RD(&xmlc[0],""); |  | ||||||
| 	read(RD,object_name,object); |  | ||||||
| 	return; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|     }   |  | ||||||
|     assert(0); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class GridLimeWriter : public BinaryIO { |  | ||||||
|  public: |  | ||||||
|    /////////////////////////////////////////////////// |  | ||||||
|    // FIXME: format for RNG? Now just binary out instead |  | ||||||
|    /////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|    FILE       *File; |  | ||||||
|    LimeWriter *LimeW; |  | ||||||
|    std::string filename; |  | ||||||
|  |  | ||||||
|    void open(std::string &_filename) {  |  | ||||||
|      filename= _filename; |  | ||||||
|      File = fopen(filename.c_str(), "w"); |  | ||||||
|      LimeW = limeCreateWriter(File); assert(LimeW != NULL ); |  | ||||||
|    } |  | ||||||
|    ///////////////////////////////////////////// |  | ||||||
|    // Close the file |  | ||||||
|    ///////////////////////////////////////////// |  | ||||||
|    void close(void) { |  | ||||||
|      fclose(File); |  | ||||||
|      //  limeDestroyWriter(LimeW); |  | ||||||
|    } |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   // Lime utility functions |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) |  | ||||||
|   { |  | ||||||
|     LimeRecordHeader *h; |  | ||||||
|     h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize); |  | ||||||
|     assert(limeWriteRecordHeader(h, LimeW) >= 0); |  | ||||||
|     limeDestroyHeader(h); |  | ||||||
|     return LIME_SUCCESS; |  | ||||||
|   } |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   // Write a generic serialisable object |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   template<class serialisable_object> |  | ||||||
|   void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) |  | ||||||
|   { |  | ||||||
|     std::string xmlstring; |  | ||||||
|     { |  | ||||||
|       XmlWriter WR("",""); |  | ||||||
|       write(WR,object_name,object); |  | ||||||
|       xmlstring = WR.XmlString(); |  | ||||||
|     } |  | ||||||
|     uint64_t nbytes = xmlstring.size(); |  | ||||||
|     int err; |  | ||||||
|     LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL); |  | ||||||
|  |  | ||||||
|     err=limeWriteRecordHeader(h, LimeW);                    assert(err>=0); |  | ||||||
|     err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); |  | ||||||
|     err=limeWriterCloseRecord(LimeW);                       assert(err>=0); |  | ||||||
|     limeDestroyHeader(h); |  | ||||||
|   } |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   // Write a generic lattice field and csum |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   template<class vobj> |  | ||||||
|   void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) |  | ||||||
|   { |  | ||||||
|     //////////////////////////////////////////// |  | ||||||
|     // Create record header |  | ||||||
|     //////////////////////////////////////////// |  | ||||||
|     typedef typename vobj::scalar_object sobj; |  | ||||||
|     int err; |  | ||||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; |  | ||||||
|     uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; |  | ||||||
|     createLimeRecordHeader(record_name, 0, 0, PayloadSize); |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////////// |  | ||||||
|     // NB: FILE and iostream are jointly writing disjoint sequences in the |  | ||||||
|     // the same file through different file handles (integer units). |  | ||||||
|     //  |  | ||||||
|     // These are both buffered, so why I think this code is right is as follows. |  | ||||||
|     // |  | ||||||
|     // i)  write record header to FILE *File, telegraphing the size.  |  | ||||||
|     // ii) ftell reads the offset from FILE *File . |  | ||||||
|     // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. |  | ||||||
|     //      Closes iostream and flushes. |  | ||||||
|     // iv) fseek on FILE * to end of this disjoint section. |  | ||||||
|     //  v) Continue writing scidac record. |  | ||||||
|     //////////////////////////////////////////////////////////////////// |  | ||||||
|     off_t offset = ftell(File); |  | ||||||
|     std::string format = getFormatString<vobj>(); |  | ||||||
|     BinarySimpleMunger<sobj,sobj> munge; |  | ||||||
|     BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); |  | ||||||
|     err=limeWriterCloseRecord(LimeW);  assert(err>=0); |  | ||||||
|     //////////////////////////////////////// |  | ||||||
|     // Write checksum element, propagaing forward from the BinaryIO |  | ||||||
|     // Always pair a checksum with a binary object, and close message |  | ||||||
|     //////////////////////////////////////// |  | ||||||
|     scidacChecksum checksum; |  | ||||||
|     std::stringstream streama; streama << std::hex << scidac_csuma; |  | ||||||
|     std::stringstream streamb; streamb << std::hex << scidac_csumb; |  | ||||||
|     checksum.suma= streama.str(); |  | ||||||
|     checksum.sumb= streamb.str(); |  | ||||||
|     std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; |  | ||||||
|     writeLimeObject(0,1,checksum,std::string("scidacChecksum"    ),std::string(SCIDAC_CHECKSUM)); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class ScidacWriter : public GridLimeWriter { |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|    template<class SerialisableUserFile> |  | ||||||
|    void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) |  | ||||||
|    { |  | ||||||
|      scidacFile    _scidacFile(grid); |  | ||||||
|      writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); |  | ||||||
|      writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); |  | ||||||
|    } |  | ||||||
|   //////////////////////////////////////////////// |  | ||||||
|   // Write generic lattice field in scidac format |  | ||||||
|   //////////////////////////////////////////////// |  | ||||||
|    template <class vobj, class userRecord> |  | ||||||
|   void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)  |  | ||||||
|   { |  | ||||||
|     typedef typename vobj::scalar_object sobj; |  | ||||||
|     uint64_t nbytes; |  | ||||||
|     GridBase * grid = field._grid; |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////// |  | ||||||
|     // fill the Grid header |  | ||||||
|     //////////////////////////////////////// |  | ||||||
|     FieldMetaData header; |  | ||||||
|     scidacRecord  _scidacRecord; |  | ||||||
|     scidacFile    _scidacFile; |  | ||||||
|  |  | ||||||
|     ScidacMetaData(field,header,_scidacRecord,_scidacFile); |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////// |  | ||||||
|     // Fill the Lime file record by record |  | ||||||
|     ////////////////////////////////////////////// |  | ||||||
|     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  |  | ||||||
|     writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); |  | ||||||
|     writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); |  | ||||||
|     writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));      // Closes message with checksum |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class IldgWriter : public ScidacWriter { |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|   /////////////////////////////////// |  | ||||||
|   // A little helper |  | ||||||
|   /////////////////////////////////// |  | ||||||
|   void writeLimeIldgLFN(std::string &LFN) |  | ||||||
|   { |  | ||||||
|     uint64_t PayloadSize = LFN.size(); |  | ||||||
|     int err; |  | ||||||
|     createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize); |  | ||||||
|     err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0); |  | ||||||
|     err=limeWriterCloseRecord(LimeW); assert(err>=0); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////// |  | ||||||
|   // Special ILDG operations ; gauge configs only. |  | ||||||
|   // Don't require scidac records EXCEPT checksum |  | ||||||
|   // Use Grid MetaData object if present. |  | ||||||
|   //////////////////////////////////////////////////////////////// |  | ||||||
|   template <class vsimd> |  | ||||||
|   void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)  |  | ||||||
|   { |  | ||||||
|     GridBase * grid = Umu._grid; |  | ||||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; |  | ||||||
|     typedef iLorentzColourMatrix<vsimd> vobj; |  | ||||||
|     typedef typename vobj::scalar_object sobj; |  | ||||||
|  |  | ||||||
|     uint64_t nbytes; |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////// |  | ||||||
|     // fill the Grid header |  | ||||||
|     //////////////////////////////////////// |  | ||||||
|     FieldMetaData header; |  | ||||||
|     scidacRecord  _scidacRecord; |  | ||||||
|     scidacFile    _scidacFile; |  | ||||||
|  |  | ||||||
|     ScidacMetaData(Umu,header,_scidacRecord,_scidacFile); |  | ||||||
|  |  | ||||||
|     std::string format = header.floating_point; |  | ||||||
|     header.ensemble_id    = description; |  | ||||||
|     header.ensemble_label = description; |  | ||||||
|     header.sequence_number = sequence; |  | ||||||
|     header.ildg_lfn = LFN; |  | ||||||
|  |  | ||||||
|     assert ( (format == std::string("IEEE32BIG"))   |  | ||||||
|            ||(format == std::string("IEEE64BIG")) ); |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////// |  | ||||||
|     // Fill ILDG header data struct |  | ||||||
|     ////////////////////////////////////////////////////// |  | ||||||
|     ildgFormat ildgfmt ; |  | ||||||
|     ildgfmt.field     = std::string("su3gauge"); |  | ||||||
|  |  | ||||||
|     if ( format == std::string("IEEE32BIG") ) {  |  | ||||||
|       ildgfmt.precision = 32; |  | ||||||
|     } else {  |  | ||||||
|       ildgfmt.precision = 64; |  | ||||||
|     } |  | ||||||
|     ildgfmt.version = 1.0; |  | ||||||
|     ildgfmt.lx = header.dimension[0]; |  | ||||||
|     ildgfmt.ly = header.dimension[1]; |  | ||||||
|     ildgfmt.lz = header.dimension[2]; |  | ||||||
|     ildgfmt.lt = header.dimension[3]; |  | ||||||
|     assert(header.nd==4); |  | ||||||
|     assert(header.nd==header.dimension.size()); |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Fill the USQCD info field |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     usqcdInfo info; |  | ||||||
|     info.version=1.0; |  | ||||||
|     info.plaq   = header.plaquette; |  | ||||||
|     info.linktr = header.link_trace; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl; |  | ||||||
|     ////////////////////////////////////////////// |  | ||||||
|     // Fill the Lime file record by record |  | ||||||
|     ////////////////////////////////////////////// |  | ||||||
|     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  |  | ||||||
|     writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); |  | ||||||
|     writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); |  | ||||||
|     writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); |  | ||||||
|     writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); |  | ||||||
|     writeLimeObject(0,0,ildgfmt,std::string("ildgFormat")   ,std::string(ILDG_FORMAT)); // rec |  | ||||||
|     writeLimeIldgLFN(header.ildg_lfn);                                                 // rec |  | ||||||
|     writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA));      // Closes message with checksum |  | ||||||
|     //    limeDestroyWriter(LimeW); |  | ||||||
|     fclose(File); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class IldgReader : public GridLimeReader { |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////// |  | ||||||
|   // Read either Grid/SciDAC/ILDG configuration |  | ||||||
|   // Don't require scidac records EXCEPT checksum |  | ||||||
|   // Use Grid MetaData object if present. |  | ||||||
|   // Else use ILDG MetaData object if present. |  | ||||||
|   // Else use SciDAC MetaData object if present. |  | ||||||
|   //////////////////////////////////////////////////////////////// |  | ||||||
|   template <class vsimd> |  | ||||||
|   void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) { |  | ||||||
|  |  | ||||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; |  | ||||||
|     typedef typename GaugeField::vector_object  vobj; |  | ||||||
|     typedef typename vobj::scalar_object sobj; |  | ||||||
|  |  | ||||||
|     typedef LorentzColourMatrixF fobj; |  | ||||||
|     typedef LorentzColourMatrixD dobj; |  | ||||||
|  |  | ||||||
|     GridBase *grid = Umu._grid; |  | ||||||
|  |  | ||||||
|     std::vector<int> dims = Umu._grid->FullDimensions(); |  | ||||||
|  |  | ||||||
|     assert(dims.size()==4); |  | ||||||
|  |  | ||||||
|     // Metadata holders |  | ||||||
|     ildgFormat     ildgFormat_    ; |  | ||||||
|     std::string    ildgLFN_       ; |  | ||||||
|     scidacChecksum scidacChecksum_;  |  | ||||||
|     usqcdInfo      usqcdInfo_     ; |  | ||||||
|  |  | ||||||
|     // track what we read from file |  | ||||||
|     int found_ildgFormat    =0; |  | ||||||
|     int found_ildgLFN       =0; |  | ||||||
|     int found_scidacChecksum=0; |  | ||||||
|     int found_usqcdInfo     =0; |  | ||||||
|     int found_ildgBinary =0; |  | ||||||
|     int found_FieldMetaData =0; |  | ||||||
|  |  | ||||||
|     uint32_t nersc_csum; |  | ||||||
|     uint32_t scidac_csuma; |  | ||||||
|     uint32_t scidac_csumb; |  | ||||||
|  |  | ||||||
|     // Binary format |  | ||||||
|     std::string format; |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Loop over all records |  | ||||||
|     // -- Order is poorly guaranteed except ILDG header preceeds binary section. |  | ||||||
|     // -- Run like an event loop. |  | ||||||
|     // -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing |  | ||||||
|     //    that Scidac.  |  | ||||||
|     // -- Insist on Scidac checksum record. |  | ||||||
|     ////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  |  | ||||||
|  |  | ||||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) |  | ||||||
|        |  | ||||||
|       ////////////////////////////////////////////////////////////////// |  | ||||||
|       // If not BINARY_DATA read a string and parse |  | ||||||
|       ////////////////////////////////////////////////////////////////// |  | ||||||
|       if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) )  ) { |  | ||||||
| 	 |  | ||||||
| 	// Copy out the string |  | ||||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); |  | ||||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     |  | ||||||
| 	std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; |  | ||||||
|  |  | ||||||
| 	////////////////////////////////// |  | ||||||
| 	// ILDG format record |  | ||||||
| 	if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {  |  | ||||||
|  |  | ||||||
| 	  XmlReader RD(&xmlc[0],""); |  | ||||||
| 	  read(RD,"ildgFormat",ildgFormat_); |  | ||||||
|  |  | ||||||
| 	  if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG"); |  | ||||||
| 	  if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG"); |  | ||||||
|  |  | ||||||
| 	  assert( ildgFormat_.lx == dims[0]); |  | ||||||
| 	  assert( ildgFormat_.ly == dims[1]); |  | ||||||
| 	  assert( ildgFormat_.lz == dims[2]); |  | ||||||
| 	  assert( ildgFormat_.lt == dims[3]); |  | ||||||
|  |  | ||||||
| 	  found_ildgFormat = 1; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) { |  | ||||||
| 	  FieldMetaData_.ildg_lfn = std::string(&xmlc[0]); |  | ||||||
| 	  found_ildgLFN = 1; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {  |  | ||||||
|  |  | ||||||
| 	  XmlReader RD(&xmlc[0],""); |  | ||||||
| 	  read(RD,"FieldMetaData",FieldMetaData_); |  | ||||||
|  |  | ||||||
| 	  format = FieldMetaData_.floating_point; |  | ||||||
|  |  | ||||||
| 	  assert(FieldMetaData_.dimension[0] == dims[0]); |  | ||||||
| 	  assert(FieldMetaData_.dimension[1] == dims[1]); |  | ||||||
| 	  assert(FieldMetaData_.dimension[2] == dims[2]); |  | ||||||
| 	  assert(FieldMetaData_.dimension[3] == dims[3]); |  | ||||||
|  |  | ||||||
| 	  found_FieldMetaData = 1; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {  |  | ||||||
| 	  std::string xmls(&xmlc[0]); |  | ||||||
| 	  // is it a USQCD info field |  | ||||||
| 	  if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {  |  | ||||||
| 	    std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; |  | ||||||
| 	    XmlReader RD(&xmlc[0],""); |  | ||||||
| 	    read(RD,"usqcdInfo",usqcdInfo_); |  | ||||||
| 	    found_usqcdInfo = 1; |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {  |  | ||||||
| 	  XmlReader RD(&xmlc[0],""); |  | ||||||
| 	  read(RD,"scidacChecksum",scidacChecksum_); |  | ||||||
| 	  found_scidacChecksum = 1; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
|       } else {   |  | ||||||
| 	///////////////////////////////// |  | ||||||
| 	// Binary data |  | ||||||
| 	///////////////////////////////// |  | ||||||
| 	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl; |  | ||||||
| 	off_t offset= ftell(File); |  | ||||||
|  |  | ||||||
| 	if ( format == std::string("IEEE64BIG") ) { |  | ||||||
| 	  GaugeSimpleMunger<dobj, sobj> munge; |  | ||||||
| 	  BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); |  | ||||||
| 	} else {  |  | ||||||
| 	  GaugeSimpleMunger<fobj, sobj> munge; |  | ||||||
| 	  BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	found_ildgBinary = 1; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////// |  | ||||||
|     // Minimally must find binary segment and checksum |  | ||||||
|     // Since this is an ILDG reader require ILDG format |  | ||||||
|     ////////////////////////////////////////////////////// |  | ||||||
|     assert(found_ildgBinary); |  | ||||||
|     assert(found_ildgFormat); |  | ||||||
|     assert(found_scidacChecksum); |  | ||||||
|  |  | ||||||
|     // Must find something with the lattice dimensions |  | ||||||
|     assert(found_FieldMetaData||found_ildgFormat); |  | ||||||
|  |  | ||||||
|     if ( found_FieldMetaData ) { |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl; |  | ||||||
|  |  | ||||||
|     } else {  |  | ||||||
|  |  | ||||||
|       assert(found_ildgFormat); |  | ||||||
|       assert ( ildgFormat_.field == std::string("su3gauge") ); |  | ||||||
|  |  | ||||||
|       /////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|       // Populate our Grid metadata as best we can |  | ||||||
|       /////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|       std::ostringstream vers; vers << ildgFormat_.version; |  | ||||||
|       FieldMetaData_.hdr_version = vers.str(); |  | ||||||
|       FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); |  | ||||||
|  |  | ||||||
|       FieldMetaData_.nd=4; |  | ||||||
|       FieldMetaData_.dimension.resize(4); |  | ||||||
|  |  | ||||||
|       FieldMetaData_.dimension[0] = ildgFormat_.lx ; |  | ||||||
|       FieldMetaData_.dimension[1] = ildgFormat_.ly ; |  | ||||||
|       FieldMetaData_.dimension[2] = ildgFormat_.lz ; |  | ||||||
|       FieldMetaData_.dimension[3] = ildgFormat_.lt ; |  | ||||||
|  |  | ||||||
|       if ( found_usqcdInfo ) {  |  | ||||||
| 	FieldMetaData_.plaquette = usqcdInfo_.plaq; |  | ||||||
| 	FieldMetaData_.link_trace= usqcdInfo_.linktr; |  | ||||||
| 	std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl; |  | ||||||
| 	std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl; |  | ||||||
| 	std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl; |  | ||||||
|       } else {  |  | ||||||
| 	FieldMetaData_.plaquette = 0.0; |  | ||||||
| 	FieldMetaData_.link_trace= 0.0; |  | ||||||
| 	std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     // Really really want to mandate a scidac checksum |  | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     if ( found_scidacChecksum ) { |  | ||||||
|       FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16); |  | ||||||
|       FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); |  | ||||||
|       scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); |  | ||||||
|       assert( scidac_csuma ==FieldMetaData_.scidac_checksuma); |  | ||||||
|       assert( scidac_csumb ==FieldMetaData_.scidac_checksumb); |  | ||||||
|       std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl; |  | ||||||
|     } else {  |  | ||||||
|       std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl; |  | ||||||
|       assert(0); // Can I insist always checksum ? |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if ( found_FieldMetaData || found_usqcdInfo ) { |  | ||||||
|       FieldMetaData checker; |  | ||||||
|       GaugeStatistics(Umu,checker); |  | ||||||
|       assert(fabs(checker.plaquette  - FieldMetaData_.plaquette )<1.0e-5); |  | ||||||
|       assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5); |  | ||||||
|       std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  }; |  | ||||||
|  |  | ||||||
| }} |  | ||||||
|  |  | ||||||
| //HAVE_LIME |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
| @@ -1,231 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/parallelIO/IldgIO.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef GRID_ILDGTYPES_IO_H |  | ||||||
| #define GRID_ILDGTYPES_IO_H |  | ||||||
|  |  | ||||||
| #ifdef HAVE_LIME |  | ||||||
| extern "C" { // for linkage |  | ||||||
| #include "lime.h" |  | ||||||
| } |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Data representation of records that enter ILDG and SciDac formats |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| #define GRID_FORMAT      "grid-format" |  | ||||||
| #define ILDG_FORMAT      "ildg-format" |  | ||||||
| #define ILDG_BINARY_DATA "ildg-binary-data" |  | ||||||
| #define ILDG_DATA_LFN    "ildg-data-lfn" |  | ||||||
| #define SCIDAC_CHECKSUM           "scidac-checksum" |  | ||||||
| #define SCIDAC_PRIVATE_FILE_XML   "scidac-private-file-xml" |  | ||||||
| #define SCIDAC_FILE_XML           "scidac-file-xml" |  | ||||||
| #define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml" |  | ||||||
| #define SCIDAC_RECORD_XML         "scidac-record-xml" |  | ||||||
| #define SCIDAC_BINARY_DATA        "scidac-binary-data" |  | ||||||
| // Unused SCIDAC records names; could move to support this functionality |  | ||||||
| #define SCIDAC_SITELIST           "scidac-sitelist" |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|   const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat |  | ||||||
|   const int GRID_IO_MULTIFILE  = 1; // hardcode lift from QIO compat |  | ||||||
|   const int GRID_IO_FIELD      = 0; // hardcode lift from QIO compat |  | ||||||
|   const int GRID_IO_GLOBAL     = 1; // hardcode lift from QIO compat |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // QIO uses mandatory "private" records fixed format |  | ||||||
| // Private is in principle "opaque" however it can't be changed now because that would break existing  |  | ||||||
| // file compatability, so should be correct to assume the undocumented but defacto file structure. |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| //////////////////////// |  | ||||||
| // Scidac private file xml |  | ||||||
| // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile> |  | ||||||
| //////////////////////// |  | ||||||
| struct scidacFile : Serializable { |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile, |  | ||||||
|                                   double, version, |  | ||||||
|                                   int, spacetime, |  | ||||||
| 				  std::string, dims, // must convert to int |  | ||||||
|                                   int, volfmt); |  | ||||||
|  |  | ||||||
|   std::vector<int> getDimensions(void) {  |  | ||||||
|     std::stringstream stream(dims); |  | ||||||
|     std::vector<int> dimensions; |  | ||||||
|     int n; |  | ||||||
|     while(stream >> n){ |  | ||||||
|       dimensions.push_back(n); |  | ||||||
|     } |  | ||||||
|     return dimensions; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void setDimensions(std::vector<int> dimensions) {  |  | ||||||
|     char delimiter = ' '; |  | ||||||
|     std::stringstream stream; |  | ||||||
|     for(int i=0;i<dimensions.size();i++){  |  | ||||||
|       stream << dimensions[i]; |  | ||||||
|       if ( i != dimensions.size()-1) {  |  | ||||||
| 	stream << delimiter <<std::endl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     dims = stream.str(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Constructor provides Grid |  | ||||||
|   scidacFile() =default; // default constructor |  | ||||||
|   scidacFile(GridBase * grid){ |  | ||||||
|     version      = 1.0; |  | ||||||
|     spacetime    = grid->_ndimension; |  | ||||||
|     setDimensions(grid->FullDimensions());  |  | ||||||
|     volfmt       = GRID_IO_SINGLEFILE; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////// |  | ||||||
| // scidac-private-record-xml : example |  | ||||||
| // <scidacRecord> |  | ||||||
| // <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype> |  | ||||||
| // <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins> |  | ||||||
| // <typesize>144</typesize><datacount>4</datacount> |  | ||||||
| // </scidacRecord> |  | ||||||
| /////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| struct scidacRecord : Serializable { |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord, |  | ||||||
|                                   double, version, |  | ||||||
|                                   std::string, date, |  | ||||||
| 				  int, recordtype, |  | ||||||
| 				  std::string, datatype, |  | ||||||
| 				  std::string, precision, |  | ||||||
| 				  int, colors, |  | ||||||
| 				  int, spins, |  | ||||||
| 				  int, typesize, |  | ||||||
| 				  int, datacount); |  | ||||||
|  |  | ||||||
|   scidacRecord() { version =1.0; } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| //////////////////////// |  | ||||||
| // ILDG format |  | ||||||
| //////////////////////// |  | ||||||
| struct ildgFormat : Serializable { |  | ||||||
| public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat, |  | ||||||
| 				  double, version, |  | ||||||
| 				  std::string, field, |  | ||||||
| 				  int, precision, |  | ||||||
| 				  int, lx, |  | ||||||
| 				  int, ly, |  | ||||||
| 				  int, lz, |  | ||||||
| 				  int, lt); |  | ||||||
|   ildgFormat() { version=1.0; }; |  | ||||||
| }; |  | ||||||
| //////////////////////// |  | ||||||
| // USQCD info |  | ||||||
| //////////////////////// |  | ||||||
| struct usqcdInfo : Serializable {  |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo, |  | ||||||
| 				  double, version, |  | ||||||
| 				  double, plaq, |  | ||||||
| 				  double, linktr, |  | ||||||
| 				  std::string, info); |  | ||||||
|   usqcdInfo() {  |  | ||||||
|     version=1.0;  |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| //////////////////////// |  | ||||||
| // Scidac Checksum |  | ||||||
| //////////////////////// |  | ||||||
| struct scidacChecksum : Serializable {  |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, |  | ||||||
| 				  double, version, |  | ||||||
| 				  std::string, suma, |  | ||||||
| 				  std::string, sumb); |  | ||||||
|   scidacChecksum() {  |  | ||||||
|     version=1.0;  |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Type:           scidac-file-xml         <title>MILC ILDG archival gauge configuration</title> |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Type:            |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| //////////////////////// |  | ||||||
| // Scidac private file xml  |  | ||||||
| // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>  |  | ||||||
| ////////////////////////                                                                                                                                                                               |  | ||||||
|  |  | ||||||
| #if 0 |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| struct usqcdPropFile : Serializable {  |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile, |  | ||||||
| 				  double, version, |  | ||||||
| 				  std::string, type, |  | ||||||
| 				  std::string, info); |  | ||||||
|   usqcdPropFile() {  |  | ||||||
|     version=1.0;  |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| struct usqcdSourceInfo : Serializable {  |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo, |  | ||||||
| 				  double, version, |  | ||||||
| 				  std::string, info); |  | ||||||
|   usqcdSourceInfo() {  |  | ||||||
|     version=1.0;  |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| struct usqcdPropInfo : Serializable {  |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo, |  | ||||||
| 				  double, version, |  | ||||||
| 				  int, spin, |  | ||||||
| 				  int, color, |  | ||||||
| 				  std::string, info); |  | ||||||
|   usqcdPropInfo() {  |  | ||||||
|     version=1.0;  |  | ||||||
|   }; |  | ||||||
| }; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| @@ -1,325 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/parallelIO/NerscIO.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <algorithm> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <fstream> |  | ||||||
| #include <map> |  | ||||||
| #include <unistd.h> |  | ||||||
| #include <sys/utsname.h> |  | ||||||
| #include <pwd.h> |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   // Precision mapping |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   template<class vobj> static std::string getFormatString (void) |  | ||||||
|   { |  | ||||||
|     std::string format; |  | ||||||
|     typedef typename getPrecision<vobj>::real_scalar_type stype; |  | ||||||
|     if ( sizeof(stype) == sizeof(float) ) { |  | ||||||
|       format = std::string("IEEE32BIG"); |  | ||||||
|     } |  | ||||||
|     if ( sizeof(stype) == sizeof(double) ) { |  | ||||||
|       format = std::string("IEEE64BIG"); |  | ||||||
|     } |  | ||||||
|     return format; |  | ||||||
|   } |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // header specification/interpretation |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     class FieldMetaData : Serializable { |  | ||||||
|     public: |  | ||||||
|  |  | ||||||
|       GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData, |  | ||||||
| 				      int, nd, |  | ||||||
| 				      std::vector<int>, dimension, |  | ||||||
| 				      std::vector<std::string>, boundary, |  | ||||||
| 				      int, data_start, |  | ||||||
| 				      std::string, hdr_version, |  | ||||||
| 				      std::string, storage_format, |  | ||||||
| 				      double, link_trace, |  | ||||||
| 				      double, plaquette, |  | ||||||
| 				      uint32_t, checksum, |  | ||||||
| 				      uint32_t, scidac_checksuma, |  | ||||||
| 				      uint32_t, scidac_checksumb, |  | ||||||
| 				      unsigned int, sequence_number, |  | ||||||
| 				      std::string, data_type, |  | ||||||
| 				      std::string, ensemble_id, |  | ||||||
| 				      std::string, ensemble_label, |  | ||||||
| 				      std::string, ildg_lfn, |  | ||||||
| 				      std::string, creator, |  | ||||||
| 				      std::string, creator_hardware, |  | ||||||
| 				      std::string, creation_date, |  | ||||||
| 				      std::string, archive_date, |  | ||||||
| 				      std::string, floating_point); |  | ||||||
|       FieldMetaData(void) {  |  | ||||||
| 	nd=4; |  | ||||||
| 	dimension.resize(4); |  | ||||||
| 	boundary.resize(4); |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   namespace QCD { |  | ||||||
|  |  | ||||||
|     using namespace Grid; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Bit and Physical Checksumming and QA of data |  | ||||||
|     ////////////////////////////////////////////////////////////////////// |  | ||||||
|     inline void GridMetaData(GridBase *grid,FieldMetaData &header) |  | ||||||
|     { |  | ||||||
|       int nd = grid->_ndimension; |  | ||||||
|       header.nd = nd; |  | ||||||
|       header.dimension.resize(nd); |  | ||||||
|       header.boundary.resize(nd); |  | ||||||
|       for(int d=0;d<nd;d++) { |  | ||||||
| 	header.dimension[d] = grid->_fdimensions[d]; |  | ||||||
|       } |  | ||||||
|       for(int d=0;d<nd;d++) { |  | ||||||
| 	header.boundary[d] = std::string("PERIODIC"); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     inline void MachineCharacteristics(FieldMetaData &header) |  | ||||||
|     { |  | ||||||
|       // Who |  | ||||||
|       struct passwd *pw = getpwuid (getuid()); |  | ||||||
|       if (pw) header.creator = std::string(pw->pw_name);  |  | ||||||
|  |  | ||||||
|       // When |  | ||||||
|       std::time_t t = std::time(nullptr); |  | ||||||
|       std::tm tm_ = *std::localtime(&t); |  | ||||||
|       std::ostringstream oss;  |  | ||||||
|       //      oss << std::put_time(&tm_, "%c %Z"); |  | ||||||
|       header.creation_date = oss.str(); |  | ||||||
|       header.archive_date  = header.creation_date; |  | ||||||
|  |  | ||||||
|       // What |  | ||||||
|       struct utsname name;  uname(&name); |  | ||||||
|       header.creator_hardware = std::string(name.nodename)+"-"; |  | ||||||
|       header.creator_hardware+= std::string(name.machine)+"-"; |  | ||||||
|       header.creator_hardware+= std::string(name.sysname)+"-"; |  | ||||||
|       header.creator_hardware+= std::string(name.release); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
| #define dump_meta_data(field, s)					\ |  | ||||||
|       s << "BEGIN_HEADER"      << std::endl;				\ |  | ||||||
|       s << "HDR_VERSION = "    << field.hdr_version    << std::endl;	\ |  | ||||||
|       s << "DATATYPE = "       << field.data_type      << std::endl;	\ |  | ||||||
|       s << "STORAGE_FORMAT = " << field.storage_format << std::endl;	\ |  | ||||||
|       for(int i=0;i<4;i++){						\ |  | ||||||
| 	s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ |  | ||||||
|       }									\ |  | ||||||
|       s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ |  | ||||||
|       s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl; \ |  | ||||||
|       for(int i=0;i<4;i++){						\ |  | ||||||
| 	s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;	\ |  | ||||||
|       }									\ |  | ||||||
| 									\ |  | ||||||
|       s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \ |  | ||||||
|       s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \ |  | ||||||
|       s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \ |  | ||||||
|       s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;	\ |  | ||||||
|       s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;	\ |  | ||||||
|       s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;	\ |  | ||||||
|       s << "CREATOR = "         << field.creator          << std::endl;	\ |  | ||||||
|       s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;	\ |  | ||||||
|       s << "CREATION_DATE = "   << field.creation_date    << std::endl;	\ |  | ||||||
|       s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;	\ |  | ||||||
|       s << "FLOATING_POINT = "  << field.floating_point   << std::endl;	\ |  | ||||||
|       s << "END_HEADER"         << std::endl; |  | ||||||
|  |  | ||||||
| template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header) |  | ||||||
| { |  | ||||||
|   GridBase *grid = field._grid; |  | ||||||
|   std::string format = getFormatString<vobj>(); |  | ||||||
|    header.floating_point = format; |  | ||||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac |  | ||||||
|    GridMetaData(grid,header);  |  | ||||||
|    MachineCharacteristics(header); |  | ||||||
|  } |  | ||||||
|  inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header) |  | ||||||
|  { |  | ||||||
|    // How to convert data precision etc... |  | ||||||
|    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data); |  | ||||||
|    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data); |  | ||||||
|  } |  | ||||||
|  inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header) |  | ||||||
|  { |  | ||||||
|    // How to convert data precision etc... |  | ||||||
|    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data); |  | ||||||
|    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data); |  | ||||||
|  } |  | ||||||
|  template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header) |  | ||||||
|  { |  | ||||||
|     |  | ||||||
|    GridBase *grid = field._grid; |  | ||||||
|    std::string format = getFormatString<vLorentzColourMatrixF>(); |  | ||||||
|    header.floating_point = format; |  | ||||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac |  | ||||||
|    GridMetaData(grid,header);  |  | ||||||
|    GaugeStatistics(field,header); |  | ||||||
|    MachineCharacteristics(header); |  | ||||||
|  } |  | ||||||
|  template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header) |  | ||||||
|  { |  | ||||||
|    GridBase *grid = field._grid; |  | ||||||
|    std::string format = getFormatString<vLorentzColourMatrixD>(); |  | ||||||
|    header.floating_point = format; |  | ||||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac |  | ||||||
|    GridMetaData(grid,header);  |  | ||||||
|    GaugeStatistics(field,header); |  | ||||||
|    MachineCharacteristics(header); |  | ||||||
|  } |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Utilities ; these are QCD aware |  | ||||||
|     ////////////////////////////////////////////////////////////////////// |  | ||||||
|     inline void reconstruct3(LorentzColourMatrix & cm) |  | ||||||
|     { |  | ||||||
|       const int x=0; |  | ||||||
|       const int y=1; |  | ||||||
|       const int z=2; |  | ||||||
|       for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy |  | ||||||
| 	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz |  | ||||||
| 	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Some data types for intermediate storage |  | ||||||
|     //////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >; |  | ||||||
|  |  | ||||||
|     typedef iLorentzColour2x3<Complex>  LorentzColour2x3; |  | ||||||
|     typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; |  | ||||||
|     typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Simple classes for precision conversion |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////// |  | ||||||
| template <class fobj, class sobj> |  | ||||||
| struct BinarySimpleUnmunger { |  | ||||||
|   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; |  | ||||||
|   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; |  | ||||||
|    |  | ||||||
|   void operator()(sobj &in, fobj &out) { |  | ||||||
|     // take word by word and transform accoding to the status |  | ||||||
|     fobj_stype *out_buffer = (fobj_stype *)&out; |  | ||||||
|     sobj_stype *in_buffer = (sobj_stype *)∈ |  | ||||||
|     size_t fobj_words = sizeof(out) / sizeof(fobj_stype); |  | ||||||
|     size_t sobj_words = sizeof(in) / sizeof(sobj_stype); |  | ||||||
|     assert(fobj_words == sobj_words); |  | ||||||
|      |  | ||||||
|     for (unsigned int word = 0; word < sobj_words; word++) |  | ||||||
|       out_buffer[word] = in_buffer[word];  // type conversion on the fly |  | ||||||
|      |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <class fobj, class sobj> |  | ||||||
| struct BinarySimpleMunger { |  | ||||||
|   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; |  | ||||||
|   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; |  | ||||||
|  |  | ||||||
|   void operator()(fobj &in, sobj &out) { |  | ||||||
|     // take word by word and transform accoding to the status |  | ||||||
|     fobj_stype *in_buffer = (fobj_stype *)∈ |  | ||||||
|     sobj_stype *out_buffer = (sobj_stype *)&out; |  | ||||||
|     size_t fobj_words = sizeof(in) / sizeof(fobj_stype); |  | ||||||
|     size_t sobj_words = sizeof(out) / sizeof(sobj_stype); |  | ||||||
|     assert(fobj_words == sobj_words); |  | ||||||
|      |  | ||||||
|     for (unsigned int word = 0; word < sobj_words; word++) |  | ||||||
|       out_buffer[word] = in_buffer[word];  // type conversion on the fly |  | ||||||
|      |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     template<class fobj,class sobj> |  | ||||||
|     struct GaugeSimpleMunger{ |  | ||||||
|       void operator()(fobj &in, sobj &out) { |  | ||||||
|         for (int mu = 0; mu < Nd; mu++) { |  | ||||||
|           for (int i = 0; i < Nc; i++) { |  | ||||||
|           for (int j = 0; j < Nc; j++) { |  | ||||||
| 	    out(mu)()(i, j) = in(mu)()(i, j); |  | ||||||
| 	  }} |  | ||||||
|         } |  | ||||||
|       }; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template <class fobj, class sobj> |  | ||||||
|     struct GaugeSimpleUnmunger { |  | ||||||
|  |  | ||||||
|       void operator()(sobj &in, fobj &out) { |  | ||||||
|         for (int mu = 0; mu < Nd; mu++) { |  | ||||||
|           for (int i = 0; i < Nc; i++) { |  | ||||||
|           for (int j = 0; j < Nc; j++) { |  | ||||||
| 	    out(mu)()(i, j) = in(mu)()(i, j); |  | ||||||
| 	  }} |  | ||||||
|         } |  | ||||||
|       }; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class fobj,class sobj> |  | ||||||
|     struct Gauge3x2munger{ |  | ||||||
|       void operator() (fobj &in,sobj &out){ |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  for(int i=0;i<2;i++){ |  | ||||||
| 	  for(int j=0;j<3;j++){ |  | ||||||
| 	    out(mu)()(i,j) = in(mu)(i)(j); |  | ||||||
| 	  }} |  | ||||||
| 	} |  | ||||||
| 	reconstruct3(out); |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class fobj,class sobj> |  | ||||||
|     struct Gauge3x2unmunger{ |  | ||||||
|       void operator() (sobj &in,fobj &out){ |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	  for(int i=0;i<2;i++){ |  | ||||||
| 	  for(int j=0;j<3;j++){ |  | ||||||
| 	    out(mu)(i)(j) = in(mu)()(i,j); |  | ||||||
| 	  }} |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| } |  | ||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user