mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 18:19:34 +01:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			feature/la
			...
			feature/ha
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | e57eafe388 | 
							
								
								
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -92,7 +92,6 @@ build*/* | |||||||
| ##################### | ##################### | ||||||
| *.xcodeproj/* | *.xcodeproj/* | ||||||
| build.sh | build.sh | ||||||
| .vscode |  | ||||||
|  |  | ||||||
| # Eigen source # | # Eigen source # | ||||||
| ################ | ################ | ||||||
| @@ -107,10 +106,6 @@ lib/fftw/* | |||||||
| m4/lt* | m4/lt* | ||||||
| m4/libtool.m4 | m4/libtool.m4 | ||||||
|  |  | ||||||
| # github pages # |  | ||||||
| ################ |  | ||||||
| gh-pages/ |  | ||||||
|  |  | ||||||
| # Buck files # | # Buck files # | ||||||
| ############## | ############## | ||||||
| .buck* | .buck* | ||||||
| @@ -121,5 +116,4 @@ make-bin-BUCK.sh | |||||||
| # generated sources # | # generated sources # | ||||||
| ##################### | ##################### | ||||||
| lib/qcd/spin/gamma-gen/*.h | lib/qcd/spin/gamma-gen/*.h | ||||||
| lib/qcd/spin/gamma-gen/*.cc | lib/qcd/spin/gamma-gen/*.cc | ||||||
|  |  | ||||||
							
								
								
									
										67
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										67
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -9,6 +9,62 @@ matrix: | |||||||
|     - os:        osx |     - os:        osx | ||||||
|       osx_image: xcode8.3 |       osx_image: xcode8.3 | ||||||
|       compiler: clang |       compiler: clang | ||||||
|  |     - compiler: gcc | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.9 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: VERSION=-4.9 | ||||||
|  |     - compiler: gcc | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-5 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: VERSION=-5 | ||||||
|  |     - compiler: clang | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.8 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||||
|  |     - compiler: clang | ||||||
|  |       addons: | ||||||
|  |         apt: | ||||||
|  |           sources: | ||||||
|  |             - ubuntu-toolchain-r-test | ||||||
|  |           packages: | ||||||
|  |             - g++-4.8 | ||||||
|  |             - libmpfr-dev | ||||||
|  |             - libgmp-dev | ||||||
|  |             - libmpc-dev | ||||||
|  |             - libopenmpi-dev | ||||||
|  |             - openmpi-bin | ||||||
|  |             - binutils-dev | ||||||
|  |       env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||||
|        |        | ||||||
| before_install: | before_install: | ||||||
|     - export GRIDDIR=`pwd` |     - export GRIDDIR=`pwd` | ||||||
| @@ -22,10 +78,6 @@ install: | |||||||
|     - export CC=$CC$VERSION |     - export CC=$CC$VERSION | ||||||
|     - export CXX=$CXX$VERSION |     - export CXX=$CXX$VERSION | ||||||
|     - echo $PATH |     - echo $PATH | ||||||
|     - which autoconf |  | ||||||
|     - autoconf  --version |  | ||||||
|     - which automake |  | ||||||
|     - automake  --version |  | ||||||
|     - which $CC |     - which $CC | ||||||
|     - $CC  --version |     - $CC  --version | ||||||
|     - which $CXX |     - which $CXX | ||||||
| @@ -43,4 +95,9 @@ script: | |||||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4 |     - make -j4 | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals |     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||||
|     - make check |     - echo make clean | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							| @@ -3,15 +3,10 @@ SUBDIRS = lib benchmarks tests extras | |||||||
|  |  | ||||||
| include $(top_srcdir)/doxygen.inc | include $(top_srcdir)/doxygen.inc | ||||||
|  |  | ||||||
| bin_SCRIPTS=grid-config | tests: all | ||||||
|  | 	$(MAKE) -C tests tests | ||||||
|  |  | ||||||
|  | .PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) | ||||||
| .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) |  | ||||||
|  |  | ||||||
| tests-local: all |  | ||||||
| bench-local: all |  | ||||||
| check-local: all |  | ||||||
|  |  | ||||||
| AM_CXXFLAGS += -I$(top_builddir)/include | AM_CXXFLAGS += -I$(top_builddir)/include | ||||||
|  |  | ||||||
| ACLOCAL_AMFLAGS = -I m4 | ACLOCAL_AMFLAGS = -I m4 | ||||||
|   | |||||||
							
								
								
									
										302
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										302
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,13 +1,41 @@ | |||||||
| # Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid) | # Grid | ||||||
|  | <table> | ||||||
|  | <tr> | ||||||
|  |     <td>Last stable release</td> | ||||||
|  |     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||||
|  |     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a> | ||||||
|  |     </td> | ||||||
|  | </tr> | ||||||
|  | <tr> | ||||||
|  |     <td>Development branch</td> | ||||||
|  |     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||||
|  |     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a> | ||||||
|  |     </td> | ||||||
|  | </tr> | ||||||
|  | </table> | ||||||
|  |  | ||||||
| **Data parallel C++ mathematical object library.** | **Data parallel C++ mathematical object library.** | ||||||
|  |  | ||||||
| License: GPL v2. | License: GPL v2. | ||||||
|  |  | ||||||
| Last update June 2017. | Last update Nov 2016. | ||||||
|  |  | ||||||
| _Please do not send pull requests to the `master` branch which is reserved for releases._ | _Please do not send pull requests to the `master` branch which is reserved for releases._ | ||||||
|  |  | ||||||
|  | ### Bug report | ||||||
|  |  | ||||||
|  | _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | ||||||
|  |  | ||||||
|  | When you file an issue, please go though the following checklist: | ||||||
|  |  | ||||||
|  | 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  | ||||||
|  | 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | ||||||
|  | 3. Give the exact `configure` command used. | ||||||
|  | 4. Attach `config.log`. | ||||||
|  | 5. Attach `config.summary`. | ||||||
|  | 6. Attach the output of `make V=1`. | ||||||
|  | 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Description | ### Description | ||||||
| @@ -30,68 +58,13 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi | |||||||
| for most programmers. | for most programmers. | ||||||
|  |  | ||||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||||
| Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. | Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). | ||||||
|  |  | ||||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.  | These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. | ||||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||||
|  |  | ||||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | MPI, OpenMP, and SIMD parallelism are present in the library. | ||||||
| Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. | Please see https://arxiv.org/abs/1512.03487 for more detail. | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Compilers |  | ||||||
|  |  | ||||||
| Intel ICPC v16.0.3 and later |  | ||||||
|  |  | ||||||
| Clang v3.5 and later (need 3.8 and later for OpenMP) |  | ||||||
|  |  | ||||||
| GCC   v4.9.x (recommended) |  | ||||||
|  |  | ||||||
| GCC   v6.3 and later |  | ||||||
|  |  | ||||||
| ### Important:  |  | ||||||
|  |  | ||||||
| Some versions of GCC appear to have a bug under high optimisation (-O2, -O3). |  | ||||||
|  |  | ||||||
| The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates. |  | ||||||
|  |  | ||||||
| GCC   v5.x |  | ||||||
|  |  | ||||||
| GCC   v6.1, v6.2 |  | ||||||
|  |  | ||||||
| ### Bug report |  | ||||||
|  |  | ||||||
| _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ |  | ||||||
|  |  | ||||||
| When you file an issue, please go though the following checklist: |  | ||||||
|  |  | ||||||
| 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  |  | ||||||
| 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. |  | ||||||
| 3. Give the exact `configure` command used. |  | ||||||
| 4. Attach `config.log`. |  | ||||||
| 5. Attach `grid.config.summary`. |  | ||||||
| 6. Attach the output of `make V=1`. |  | ||||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. |  | ||||||
|  |  | ||||||
| ### Required libraries |  | ||||||
| Grid requires: |  | ||||||
|  |  | ||||||
| [GMP](https://gmplib.org/),  |  | ||||||
|  |  | ||||||
| [MPFR](http://www.mpfr.org/)  |  | ||||||
|  |  | ||||||
| Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. |  | ||||||
|  |  | ||||||
| Grid optionally uses: |  | ||||||
|  |  | ||||||
| [HDF5](https://support.hdfgroup.org/HDF5/)   |  | ||||||
|  |  | ||||||
| [LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.  |  | ||||||
|  |  | ||||||
| [FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. |  | ||||||
|  |  | ||||||
| LAPACK either generic version or Intel MKL library. |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Quick start | ### Quick start | ||||||
| First, start by cloning the repository: | First, start by cloning the repository: | ||||||
| @@ -122,10 +95,10 @@ install Grid. Other options are detailed in the next section, you can also use ` | |||||||
| `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | ||||||
| customise the build. | customise the build. | ||||||
|  |  | ||||||
| Finally, you can build, check, and install Grid: | Finally, you can build and install Grid: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| make; make check; make install | make; make install | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | ||||||
| @@ -148,7 +121,7 @@ If you want to build all the tests at once just use `make tests`. | |||||||
| - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | ||||||
| - `--enable-precision={single|double}`: set the default precision (default: `double`). | - `--enable-precision={single|double}`: set the default precision (default: `double`). | ||||||
| - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | ||||||
| - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). | - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). | ||||||
| - `--disable-timers`: disable system dependent high-resolution timers. | - `--disable-timers`: disable system dependent high-resolution timers. | ||||||
| - `--enable-chroma`: enable Chroma regression tests. | - `--enable-chroma`: enable Chroma regression tests. | ||||||
| - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | ||||||
| @@ -162,6 +135,7 @@ The following options can be use with the `--enable-comms=` option to target dif | |||||||
| | `none`         | no communications                                             | | | `none`         | no communications                                             | | ||||||
| | `mpi[-auto]`   | MPI communications                                            | | | `mpi[-auto]`   | MPI communications                                            | | ||||||
| | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | ||||||
|  | | `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | | ||||||
| | `shmem `       | Cray SHMEM communications                                     | | | `shmem `       | Cray SHMEM communications                                     | | ||||||
|  |  | ||||||
| For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | ||||||
| @@ -179,13 +153,13 @@ The following options can be use with the `--enable-simd=` option to target diff | |||||||
| | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | ||||||
| | `AVX2`      | AVX 2 (256 bit)                        | | | `AVX2`      | AVX 2 (256 bit)                        | | ||||||
| | `AVX512`    | AVX 512 bit                            | | | `AVX512`    | AVX 512 bit                            | | ||||||
| | `NEONv8`    | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit)                     | | | `QPX`       | QPX (256 bit)                          | | ||||||
| | `QPX`       | IBM QPX (256 bit)                      | |  | ||||||
|  |  | ||||||
| Alternatively, some CPU codenames can be directly used: | Alternatively, some CPU codenames can be directly used: | ||||||
|  |  | ||||||
| | `<code>`    | Description                            | | | `<code>`    | Description                            | | ||||||
| | ----------- | -------------------------------------- | | | ----------- | -------------------------------------- | | ||||||
|  | | `KNC`       | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | | ||||||
| | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | ||||||
| | `BGQ`       | Blue Gene/Q                            | | | `BGQ`       | Blue Gene/Q                            | | ||||||
|  |  | ||||||
| @@ -202,205 +176,21 @@ The following configuration is recommended for the Intel Knights Landing platfor | |||||||
| ``` bash | ``` bash | ||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi-auto  \ |              --enable-comms=mpi-auto \ | ||||||
|  |              --with-gmp=<path>        \ | ||||||
|  |              --with-mpfr=<path>       \ | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=icpc MPICXX=mpiicpc |              CXX=icpc MPICXX=mpiicpc | ||||||
| ``` | ``` | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi       \ |              --enable-comms=mpi       \ | ||||||
|  |              --with-gmp=<path>        \ | ||||||
|  |              --with-mpfr=<path>       \ | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=CC CC=cc |              CXX=CC CC=cc | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| Knight's Landing with Intel Omnipath adapters with two adapters per node  |  | ||||||
| presently performs better with use of more than one rank per node, using shared memory  |  | ||||||
| for interior communication. This is the mpi3 communications implementation.  |  | ||||||
| We recommend four ranks per node for best performance, but optimum is local volume dependent. |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=KNL        \ |  | ||||||
|              --enable-comms=mpi3-auto \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CC=icpc MPICXX=mpiicpc  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| ### Build setup for Intel Haswell Xeon platform |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the Intel Haswell platform: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3-auto \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=icpc MPICXX=mpiicpc |  | ||||||
| ``` |  | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=CC CC=cc |  | ||||||
| ``` |  | ||||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  |  | ||||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using |  | ||||||
| ``` |  | ||||||
|         export I_MPI_PIN=1 |  | ||||||
| ``` |  | ||||||
| This is the default. |  | ||||||
|  |  | ||||||
| ### Build setup for Intel Skylake Xeon platform |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the Intel Skylake platform: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX512     \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=mpiicpc |  | ||||||
| ``` |  | ||||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX512     \ |  | ||||||
|              --enable-comms=mpi3      \ |  | ||||||
|              --enable-mkl             \ |  | ||||||
|              CXX=CC CC=cc |  | ||||||
| ``` |  | ||||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  |  | ||||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using |  | ||||||
| ```  |  | ||||||
|         export I_MPI_PIN=1 |  | ||||||
| ``` |  | ||||||
| This is the default.  |  | ||||||
|  |  | ||||||
| #### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):  |  | ||||||
|  |  | ||||||
| mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18  |  | ||||||
|  |  | ||||||
| TBA |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Build setup for AMD EPYC / RYZEN |  | ||||||
|  |  | ||||||
| The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. |  | ||||||
| So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total |  | ||||||
| are common. Each chip within the module exposes a separate NUMA domain. |  | ||||||
| There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. |  | ||||||
| MPI-3 is recommended with the use of four ranks per socket, |  | ||||||
| and 8 threads per rank.  |  | ||||||
|  |  | ||||||
| The following configuration is recommended for the AMD EPYC platform. |  | ||||||
|  |  | ||||||
| ``` bash |  | ||||||
| ../configure --enable-precision=double\ |  | ||||||
|              --enable-simd=AVX2       \ |  | ||||||
|              --enable-comms=mpi3 \ |  | ||||||
|              CXX=mpicxx  |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: |  | ||||||
| ``` bash |  | ||||||
|                --with-gmp=<path>        \ |  | ||||||
|                --with-mpfr=<path>       \ |  | ||||||
| ``` |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  |  | ||||||
|  |  | ||||||
| Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. |  | ||||||
| This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.  |  | ||||||
|  |  | ||||||
| It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and |  | ||||||
| shared memory to communicate within this node: |  | ||||||
|  |  | ||||||
| mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4  |  | ||||||
|  |  | ||||||
| Where omp_bind.sh does the following: |  | ||||||
| ``` |  | ||||||
| #!/bin/bash |  | ||||||
|  |  | ||||||
| numanode=` expr $PMI_RANK % 8 ` |  | ||||||
| basecore=`expr $numanode \* 16` |  | ||||||
| core0=`expr $basecore + 0 ` |  | ||||||
| core1=`expr $basecore + 2 ` |  | ||||||
| core2=`expr $basecore + 4 ` |  | ||||||
| core3=`expr $basecore + 6 ` |  | ||||||
| core4=`expr $basecore + 8 ` |  | ||||||
| core5=`expr $basecore + 10 ` |  | ||||||
| core6=`expr $basecore + 12 ` |  | ||||||
| core7=`expr $basecore + 14 ` |  | ||||||
|  |  | ||||||
| export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" |  | ||||||
| echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY |  | ||||||
|  |  | ||||||
| $@ |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| Performance: |  | ||||||
|  |  | ||||||
| #### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):  |  | ||||||
|  |  | ||||||
| mpirun  -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 |  | ||||||
|  |  | ||||||
| TBA |  | ||||||
|  |  | ||||||
| ### Build setup for BlueGene/Q |  | ||||||
|  |  | ||||||
| To be written... |  | ||||||
|  |  | ||||||
| ### Build setup for ARM Neon |  | ||||||
|  |  | ||||||
| To be written... |  | ||||||
|  |  | ||||||
| ### Build setup for laptops, other compilers, non-cluster builds |  | ||||||
|  |  | ||||||
| Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), |  | ||||||
| and omit the enable-mkl flag.  |  | ||||||
|  |  | ||||||
| Single node builds are enabled with  |  | ||||||
| ``` |  | ||||||
|             --enable-comms=none |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| FFTW support that is not in the default search path may then enabled with |  | ||||||
| ``` |  | ||||||
|     --with-fftw=<installpath> |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. |  | ||||||
|  |  | ||||||
							
								
								
									
										33
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,32 +1,23 @@ | |||||||
| TODO: | TODO: | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
| Large item work list: | Peter's work list: | ||||||
|  | 2)- Precision conversion and sort out localConvert      <--  | ||||||
| 1)- BG/Q port and check ; Andrew says ok. | 3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started  | ||||||
| 2)- Christoph's local basis expansion Lanczos | 4)- Binary I/O speed up & x-strips | ||||||
| -- | -- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet | ||||||
| 3a)- RNG I/O in ILDG/SciDAC (minor) | -- Physical propagator interface | ||||||
| 3b)- Precision conversion and sort out localConvert      <-- partial/easy | -- Conserved currents | ||||||
| 3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet | -- GaugeFix into central location | ||||||
| 4)- Physical propagator interface | -- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||||
| 5)- Conserved currents | -- HDCR resume | ||||||
| 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations |  | ||||||
| 7)- HDCR resume |  | ||||||
|  |  | ||||||
| Recent DONE  | Recent DONE  | ||||||
| -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori |  | ||||||
| -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE |  | ||||||
| -- GaugeFix into central location                      <-- DONE |  | ||||||
| -- Scidac and Ildg metadata handling                   <-- DONE |  | ||||||
| -- Binary I/O MPI2 IO                                  <-- DONE |  | ||||||
| -- Binary I/O speed up & x-strips                      <-- DONE |  | ||||||
| -- Cut down the exterior overhead                      <-- DONE | -- Cut down the exterior overhead                      <-- DONE | ||||||
| -- Interior legs from SHM comms                        <-- DONE | -- Interior legs from SHM comms                        <-- DONE | ||||||
| -- Half-precision comms                                <-- DONE | -- Half-precision comms                                <-- DONE | ||||||
| -- Merge high precision reduction into develop         <-- DONE | -- Merge high precision reduction into develop         | ||||||
| -- BlockCG, BCGrQ                                      <-- DONE | -- multiRHS DWF; benchmark on Cori/BNL for comms elimination | ||||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE |  | ||||||
|    -- slice* linalg routines for multiRHS, BlockCG     |    -- slice* linalg routines for multiRHS, BlockCG     | ||||||
|  |  | ||||||
| ----- | ----- | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								VERSION
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								VERSION
									
									
									
									
									
								
							| @@ -1,5 +1,6 @@ | |||||||
| Version : 0.7.0 | Version : 0.6.0 | ||||||
|  |  | ||||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended | - AVX512, AVX2, AVX, SSE good | ||||||
| - MPI and MPI3 comms optimisations for KNL and OPA finished | - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above | ||||||
| - Half precision comms | - MPI and MPI3 | ||||||
|  | - HiRep, Smearing, Generic gauge group | ||||||
|   | |||||||
| @@ -1,800 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_memory_bandwidth.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Grid::QCD; |  | ||||||
|  |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; |  | ||||||
| typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| std::vector<int> L_list; |  | ||||||
| std::vector<int> Ls_list; |  | ||||||
| std::vector<double> mflop_list; |  | ||||||
|  |  | ||||||
| double mflop_ref; |  | ||||||
| double mflop_ref_err; |  | ||||||
|  |  | ||||||
| int NN_global; |  | ||||||
|  |  | ||||||
| struct time_statistics{ |  | ||||||
|   double mean; |  | ||||||
|   double err; |  | ||||||
|   double min; |  | ||||||
|   double max; |  | ||||||
|  |  | ||||||
|   void statistics(std::vector<double> v){ |  | ||||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); |  | ||||||
|       mean = sum / v.size(); |  | ||||||
|  |  | ||||||
|       std::vector<double> diff(v.size()); |  | ||||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); |  | ||||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); |  | ||||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); |  | ||||||
|  |  | ||||||
|       auto result = std::minmax_element(v.begin(), v.end()); |  | ||||||
|       min = *result.first; |  | ||||||
|       max = *result.second; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| void comms_header(){ |  | ||||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" |  | ||||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| Gamma::Algebra Gmu [] = { |  | ||||||
|   Gamma::Algebra::GammaX, |  | ||||||
|   Gamma::Algebra::GammaY, |  | ||||||
|   Gamma::Algebra::GammaZ, |  | ||||||
|   Gamma::Algebra::GammaT |  | ||||||
| }; |  | ||||||
| struct controls { |  | ||||||
|   int Opt; |  | ||||||
|   int CommsOverlap; |  | ||||||
|   Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; |  | ||||||
|   //  int HugePages; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class Benchmark { |  | ||||||
| public: |  | ||||||
|   static void Decomposition (void ) { |  | ||||||
|  |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; |  | ||||||
|     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvReal          : "<<sizeof(vReal )*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvRealD         : "<<sizeof(vRealD)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplex       : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplexF      : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static void Comms(void) |  | ||||||
|   { |  | ||||||
|     int Nloop=200; |  | ||||||
|     int nmu=0; |  | ||||||
|     int maxlat=32; |  | ||||||
|  |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|  |  | ||||||
|     for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |  | ||||||
|  |  | ||||||
|     std::vector<double> t_time(Nloop); |  | ||||||
|     time_statistics timestat; |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|     comms_header(); |  | ||||||
|  |  | ||||||
|     for(int lat=4;lat<=maxlat;lat+=4){ |  | ||||||
|       for(int Ls=8;Ls<=8;Ls*=2){ |  | ||||||
|  |  | ||||||
| 	std::vector<int> latt_size  ({lat*mpi_layout[0], |  | ||||||
| 	      lat*mpi_layout[1], |  | ||||||
| 	      lat*mpi_layout[2], |  | ||||||
| 	      lat*mpi_layout[3]}); |  | ||||||
|  |  | ||||||
| 	GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
| 	RealD Nrank = Grid._Nprocessors; |  | ||||||
| 	RealD Nnode = Grid.NodeCount(); |  | ||||||
| 	RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
| 	std::vector<HalfSpinColourVectorD *> xbuf(8); |  | ||||||
| 	std::vector<HalfSpinColourVectorD *> rbuf(8); |  | ||||||
| 	Grid.ShmBufferFreeAll(); |  | ||||||
| 	for(int d=0;d<8;d++){ |  | ||||||
| 	  xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	  bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |  | ||||||
| 	int ncomm; |  | ||||||
| 	double dbytes; |  | ||||||
| 	std::vector<double> times(Nloop); |  | ||||||
| 	for(int i=0;i<Nloop;i++){ |  | ||||||
|  |  | ||||||
| 	  double start=usecond(); |  | ||||||
|  |  | ||||||
| 	  dbytes=0; |  | ||||||
| 	  ncomm=0; |  | ||||||
|  |  | ||||||
| 	  parallel_for(int dir=0;dir<8;dir++){ |  | ||||||
|  |  | ||||||
| 	    double tbytes; |  | ||||||
| 	    int mu =dir % 4; |  | ||||||
|  |  | ||||||
| 	    if (mpi_layout[mu]>1 ) { |  | ||||||
| 	         |  | ||||||
| 	      int xmit_to_rank; |  | ||||||
| 	      int recv_from_rank; |  | ||||||
| 	      if ( dir == mu ) {  |  | ||||||
| 		int comm_proc=1; |  | ||||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	      } else {  |  | ||||||
| 		int comm_proc = mpi_layout[mu]-1; |  | ||||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	      } |  | ||||||
| 	      tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, |  | ||||||
| 						 (void *)&rbuf[dir][0], recv_from_rank, |  | ||||||
| 						 bytes,dir); |  | ||||||
| 	   |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp atomic |  | ||||||
| #endif |  | ||||||
| 	      ncomm++; |  | ||||||
|  |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp atomic |  | ||||||
| #endif |  | ||||||
| 	      dbytes+=tbytes; |  | ||||||
| 	    } |  | ||||||
| 	  } |  | ||||||
| 	  Grid.Barrier(); |  | ||||||
| 	  double stop=usecond(); |  | ||||||
| 	  t_time[i] = stop-start; // microseconds |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	//	for(int i=0;i<t_time.size();i++){ |  | ||||||
| 	//	  std::cout << i<<" "<<t_time[i]<<std::endl; |  | ||||||
| 	//	} |  | ||||||
|  |  | ||||||
| 	dbytes=dbytes*ppn; |  | ||||||
| 	double xbytes    = dbytes*0.5; |  | ||||||
| 	double rbytes    = dbytes*0.5; |  | ||||||
| 	double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
| 		 <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
| 		 <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
| 		 <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
| 		 << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
| 		 << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|   |  | ||||||
| 	 |  | ||||||
| 	    } |  | ||||||
|     }     |  | ||||||
|  |  | ||||||
|     return; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static void Memory(void) |  | ||||||
|   { |  | ||||||
|     const int Nvec=8; |  | ||||||
|     typedef Lattice< iVector< vReal,Nvec> > LatticeVec; |  | ||||||
|     typedef iVector<vReal,Nvec> Vec; |  | ||||||
|  |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |  | ||||||
|    |  | ||||||
|     uint64_t NP; |  | ||||||
|     uint64_t NN; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   uint64_t lmax=48; |  | ||||||
| #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) |  | ||||||
|  |  | ||||||
|     GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |  | ||||||
|     for(int lat=8;lat<=lmax;lat+=4){ |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |  | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|  |  | ||||||
|       NP= Grid.RankCount(); |  | ||||||
|       NN =Grid.NodeCount(); |  | ||||||
|  |  | ||||||
|       Vec rn ; random(sRNG,rn); |  | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); z=rn; |  | ||||||
|       LatticeVec x(&Grid); x=rn; |  | ||||||
|       LatticeVec y(&Grid); y=rn; |  | ||||||
|       double a=2.0; |  | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |  | ||||||
|  |  | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
| 	z=a*x-y; |  | ||||||
|         x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away |  | ||||||
|         y._odata[4]=z._odata[4]; |  | ||||||
|       } |  | ||||||
|       double stop=usecond(); |  | ||||||
|       double time = (stop-start)/Nloop*1000; |  | ||||||
|       |  | ||||||
|       double flops=vol*Nvec*2;// mul,add |  | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |  | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3)  |  | ||||||
| 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000. |  | ||||||
| 	       << "\t\t"<< bytes/time/NN <<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   static double DWF5(int Ls,int L) |  | ||||||
|   { |  | ||||||
|     RealD mass=0.1; |  | ||||||
|     RealD M5  =1.8; |  | ||||||
|  |  | ||||||
|     double mflops; |  | ||||||
|     double mflops_best = 0; |  | ||||||
|     double mflops_worst= 0; |  | ||||||
|     std::vector<double> mflops_all; |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     // Set/Get the layout & grid size |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); |  | ||||||
|     std::vector<int> local({L,L,L,L}); |  | ||||||
|  |  | ||||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  |  | ||||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     uint64_t NP = TmpGrid->RankCount(); |  | ||||||
|     uint64_t NN = TmpGrid->NodeCount(); |  | ||||||
|     NN_global=NN; |  | ||||||
|     uint64_t SHM=NP/NN; |  | ||||||
|  |  | ||||||
|     std::vector<int> internal; |  | ||||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); |  | ||||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); |  | ||||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); |  | ||||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); |  | ||||||
|     else assert(0); |  | ||||||
|  |  | ||||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); |  | ||||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); |  | ||||||
|  |  | ||||||
|     ///////// Welcome message //////////// |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Lattice Init //////////// |  | ||||||
|     GridCartesian         * UGrid    = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * UrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|     GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); |  | ||||||
|     GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); |  | ||||||
|     GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|     ///////// RNG Init //////////// |  | ||||||
|     std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|     std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|     GridParallelRNG          RNG5(sFGrid);  RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Source preparation //////////// |  | ||||||
|     LatticeFermion src   (sFGrid); random(RNG5,src); |  | ||||||
|     LatticeFermion tmp   (sFGrid); |  | ||||||
|  |  | ||||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|     src = src*N2; |  | ||||||
|      |  | ||||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  |  | ||||||
|  |  | ||||||
|     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); |  | ||||||
|     LatticeFermion src_e (sFrbGrid); |  | ||||||
|     LatticeFermion src_o (sFrbGrid); |  | ||||||
|     LatticeFermion r_e   (sFrbGrid); |  | ||||||
|     LatticeFermion r_o   (sFrbGrid); |  | ||||||
|     LatticeFermion r_eo  (sFGrid); |  | ||||||
|     LatticeFermion err   (sFGrid); |  | ||||||
|     { |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,src); |  | ||||||
|       pickCheckerboard(Odd,src_o,src); |  | ||||||
|  |  | ||||||
| #if defined(AVX512)  |  | ||||||
|       const int num_cases = 6; |  | ||||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #else |  | ||||||
|       const int num_cases = 4; |  | ||||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #endif |  | ||||||
|       controls Cases [] = { |  | ||||||
| #ifdef AVX512 |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| #endif |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } |  | ||||||
|       };  |  | ||||||
|  |  | ||||||
|       for(int c=0;c<num_cases;c++) { |  | ||||||
|  |  | ||||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; |  | ||||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; |  | ||||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |  | ||||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
| 	int nwarm = 100; |  | ||||||
| 	uint64_t ncall = 1000; |  | ||||||
|  |  | ||||||
| 	double t0=usecond(); |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	for(int i=0;i<nwarm;i++){ |  | ||||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	} |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	double t1=usecond(); |  | ||||||
|  |  | ||||||
| 	sDw.ZeroCounters(); |  | ||||||
| 	time_statistics timestat; |  | ||||||
| 	std::vector<double> t_time(ncall); |  | ||||||
| 	for(uint64_t i=0;i<ncall;i++){ |  | ||||||
| 	  t0=usecond(); |  | ||||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	  t1=usecond(); |  | ||||||
| 	  t_time[i] = t1-t0; |  | ||||||
| 	} |  | ||||||
| 	sFGrid->Barrier(); |  | ||||||
| 	 |  | ||||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
| 	double flops=(1344.0*volume)/2; |  | ||||||
| 	double mf_hi, mf_lo, mf_err; |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	mf_hi = flops/timestat.min; |  | ||||||
| 	mf_lo = flops/timestat.max; |  | ||||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; |  | ||||||
|  |  | ||||||
| 	mflops = flops/timestat.mean; |  | ||||||
| 	mflops_all.push_back(mflops); |  | ||||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; |  | ||||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; |  | ||||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; |  | ||||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank   "<< mflops/NP<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node   "<< mflops/NN<<std::endl; |  | ||||||
|  |  | ||||||
| 	sDw.Report(); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|       double robust = mflops_worst/mflops_best;; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |  | ||||||
|       std::cout<<GridLogMessage; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<mflops_all.size();i++){ |  | ||||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; |  | ||||||
|       } |  | ||||||
|       std::cout<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|     return mflops_best; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static double DWF(int Ls,int L, double & robust) |  | ||||||
|   { |  | ||||||
|     RealD mass=0.1; |  | ||||||
|     RealD M5  =1.8; |  | ||||||
|  |  | ||||||
|     double mflops; |  | ||||||
|     double mflops_best = 0; |  | ||||||
|     double mflops_worst= 0; |  | ||||||
|     std::vector<double> mflops_all; |  | ||||||
|  |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     // Set/Get the layout & grid size |  | ||||||
|     /////////////////////////////////////////////////////// |  | ||||||
|     int threads = GridThread::GetThreads(); |  | ||||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); |  | ||||||
|     std::vector<int> local({L,L,L,L}); |  | ||||||
|  |  | ||||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  |  | ||||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     uint64_t NP = TmpGrid->RankCount(); |  | ||||||
|     uint64_t NN = TmpGrid->NodeCount(); |  | ||||||
|     NN_global=NN; |  | ||||||
|     uint64_t SHM=NP/NN; |  | ||||||
|  |  | ||||||
|     std::vector<int> internal; |  | ||||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); |  | ||||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); |  | ||||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); |  | ||||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); |  | ||||||
|     else assert(0); |  | ||||||
|  |  | ||||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); |  | ||||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); |  | ||||||
|  |  | ||||||
|     ///////// Welcome message //////////// |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     ///////// Lattice Init //////////// |  | ||||||
|     GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|     GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|      |  | ||||||
|     ///////// RNG Init //////////// |  | ||||||
|     std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|     std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|     ///////// Source preparation //////////// |  | ||||||
|     LatticeFermion src   (FGrid); random(RNG5,src); |  | ||||||
|     LatticeFermion ref   (FGrid); |  | ||||||
|     LatticeFermion tmp   (FGrid); |  | ||||||
|  |  | ||||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|     src = src*N2; |  | ||||||
|      |  | ||||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  |  | ||||||
|  |  | ||||||
|     DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|  |  | ||||||
|     //////////////////////////////////// |  | ||||||
|     // Naive wilson implementation |  | ||||||
|     //////////////////////////////////// |  | ||||||
|     { |  | ||||||
|       LatticeGaugeField Umu5d(FGrid);  |  | ||||||
|       std::vector<LatticeColourMatrix> U(4,FGrid); |  | ||||||
|       for(int ss=0;ss<Umu._grid->oSites();ss++){ |  | ||||||
| 	for(int s=0;s<Ls;s++){ |  | ||||||
| 	  Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|       ref = zero; |  | ||||||
|       for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); |  | ||||||
|       } |  | ||||||
|       for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	 |  | ||||||
| 	tmp = U[mu]*Cshift(src,mu+1,1); |  | ||||||
| 	ref=ref + tmp - Gamma(Gmu[mu])*tmp; |  | ||||||
| 	 |  | ||||||
| 	tmp =adj(U[mu])*src; |  | ||||||
| 	tmp =Cshift(tmp,mu+1,-1); |  | ||||||
| 	ref=ref + tmp + Gamma(Gmu[mu])*tmp; |  | ||||||
|       } |  | ||||||
|       ref = -0.5*ref; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeFermion src_e (FrbGrid); |  | ||||||
|     LatticeFermion src_o (FrbGrid); |  | ||||||
|     LatticeFermion r_e   (FrbGrid); |  | ||||||
|     LatticeFermion r_o   (FrbGrid); |  | ||||||
|     LatticeFermion r_eo  (FGrid); |  | ||||||
|     LatticeFermion err   (FGrid); |  | ||||||
|     { |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,src); |  | ||||||
|       pickCheckerboard(Odd,src_o,src); |  | ||||||
|  |  | ||||||
| #if defined(AVX512)  |  | ||||||
|       const int num_cases = 6; |  | ||||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #else |  | ||||||
|       const int num_cases = 4; |  | ||||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); |  | ||||||
| #endif |  | ||||||
|       controls Cases [] = { |  | ||||||
| #ifdef AVX512 |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| #endif |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, |  | ||||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } |  | ||||||
|       };  |  | ||||||
|  |  | ||||||
|       for(int c=0;c<num_cases;c++) { |  | ||||||
|  |  | ||||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; |  | ||||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; |  | ||||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |  | ||||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
| 	int nwarm = 200; |  | ||||||
| 	double t0=usecond(); |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	for(int i=0;i<nwarm;i++){ |  | ||||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	} |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	double t1=usecond(); |  | ||||||
| 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); |  | ||||||
| 	//	if (ncall < 500) ncall = 500; |  | ||||||
| 	uint64_t ncall = 1000; |  | ||||||
|  |  | ||||||
| 	FGrid->Broadcast(0,&ncall,sizeof(ncall)); |  | ||||||
|  |  | ||||||
| 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; |  | ||||||
| 	Dw.ZeroCounters(); |  | ||||||
|  |  | ||||||
| 	time_statistics timestat; |  | ||||||
| 	std::vector<double> t_time(ncall); |  | ||||||
| 	for(uint64_t i=0;i<ncall;i++){ |  | ||||||
| 	  t0=usecond(); |  | ||||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	  t1=usecond(); |  | ||||||
| 	  t_time[i] = t1-t0; |  | ||||||
| 	} |  | ||||||
| 	FGrid->Barrier(); |  | ||||||
| 	 |  | ||||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
| 	double flops=(1344.0*volume)/2; |  | ||||||
| 	double mf_hi, mf_lo, mf_err; |  | ||||||
|  |  | ||||||
| 	timestat.statistics(t_time); |  | ||||||
| 	mf_hi = flops/timestat.min; |  | ||||||
| 	mf_lo = flops/timestat.max; |  | ||||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; |  | ||||||
|  |  | ||||||
| 	mflops = flops/timestat.mean; |  | ||||||
| 	mflops_all.push_back(mflops); |  | ||||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; |  | ||||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; |  | ||||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; |  | ||||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl; |  | ||||||
|  |  | ||||||
| 	Dw.Report(); |  | ||||||
|  |  | ||||||
| 	Dw.DhopEO(src_o,r_e,DaggerNo); |  | ||||||
| 	Dw.DhopOE(src_e,r_o,DaggerNo); |  | ||||||
| 	setCheckerboard(r_eo,r_o); |  | ||||||
| 	setCheckerboard(r_eo,r_e); |  | ||||||
| 	err = r_eo-ref;  |  | ||||||
| 	std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |  | ||||||
| 	assert((norm2(err)<1.0e-4)); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|       robust = mflops_worst/mflops_best; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust  <<std::endl; |  | ||||||
|       std::cout<<GridLogMessage <<fmt << std::endl; |  | ||||||
|       std::cout<<GridLogMessage ; |  | ||||||
|  |  | ||||||
|       for(int i=0;i<mflops_all.size();i++){ |  | ||||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; |  | ||||||
|       } |  | ||||||
|       std::cout<<std::endl; |  | ||||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|     return mflops_best; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); |  | ||||||
| #ifdef KNL |  | ||||||
|   LebesgueOrder::Block = std::vector<int>({8,2,2,2}); |  | ||||||
| #else |  | ||||||
|   LebesgueOrder::Block = std::vector<int>({2,2,2,2}); |  | ||||||
| #endif |  | ||||||
|   Benchmark::Decomposition(); |  | ||||||
|  |  | ||||||
|   int do_memory=1; |  | ||||||
|   int do_comms =1; |  | ||||||
|   int do_su3   =0; |  | ||||||
|   int do_wilson=1; |  | ||||||
|   int do_dwf   =1; |  | ||||||
|  |  | ||||||
|   if ( do_su3 ) { |  | ||||||
|     // empty for now |  | ||||||
|   } |  | ||||||
| #if 1 |  | ||||||
|   int sel=2; |  | ||||||
|   std::vector<int> L_list({8,12,16,24}); |  | ||||||
| #else |  | ||||||
|   int sel=1; |  | ||||||
|   std::vector<int> L_list({8,12}); |  | ||||||
| #endif |  | ||||||
|   int selm1=sel-1; |  | ||||||
|   std::vector<double> robust_list; |  | ||||||
|  |  | ||||||
|   std::vector<double> wilson; |  | ||||||
|   std::vector<double> dwf4; |  | ||||||
|   std::vector<double> dwf5; |  | ||||||
|  |  | ||||||
|   if ( do_wilson ) { |  | ||||||
|     int Ls=1; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       double robust; |  | ||||||
|       wilson.push_back(Benchmark::DWF(1,L_list[l],robust)); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int Ls=16; |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       double robust; |  | ||||||
|       double result = Benchmark::DWF(Ls,L_list[l],robust) ; |  | ||||||
|       dwf4.push_back(result); |  | ||||||
|       robust_list.push_back(robust); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     for(int l=0;l<L_list.size();l++){ |  | ||||||
|       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl; |  | ||||||
|   for(int l=0;l<L_list.size();l++){ |  | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; |  | ||||||
|   } |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int NN=NN_global; |  | ||||||
|   if ( do_memory ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Memory(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_comms && (NN>1) ) { |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|     Benchmark::Comms(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if ( do_dwf ) { |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; |  | ||||||
|   for(int l=0;l<L_list.size();l++){ |  | ||||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; |  | ||||||
|   } |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point     result: "  << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl; |  | ||||||
|   std::cout<<std::setprecision(3); |  | ||||||
|   std::cout<<GridLogMessage << " Comparison point robustness: "  << robust_list[sel] <<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
| } |  | ||||||
| @@ -31,32 +31,6 @@ using namespace std; | |||||||
| using namespace Grid; | using namespace Grid; | ||||||
| using namespace Grid::QCD; | using namespace Grid::QCD; | ||||||
|  |  | ||||||
| struct time_statistics{ |  | ||||||
|   double mean; |  | ||||||
|   double err; |  | ||||||
|   double min; |  | ||||||
|   double max; |  | ||||||
|  |  | ||||||
|   void statistics(std::vector<double> v){ |  | ||||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); |  | ||||||
|       mean = sum / v.size(); |  | ||||||
|  |  | ||||||
|       std::vector<double> diff(v.size()); |  | ||||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); |  | ||||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); |  | ||||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); |  | ||||||
|  |  | ||||||
|       auto result = std::minmax_element(v.begin(), v.end()); |  | ||||||
|       min = *result.first; |  | ||||||
|       max = *result.second; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| void header(){ |  | ||||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" |  | ||||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| @@ -66,21 +40,17 @@ int main (int argc, char ** argv) | |||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   int Nloop=100; |   int Nloop=10; | ||||||
|   int nmu=0; |   int nmu=0; | ||||||
|   int maxlat=32; |  | ||||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; |  | ||||||
|   std::vector<double> t_time(Nloop); |  | ||||||
|   time_statistics timestat; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |   int maxlat=24; | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -88,23 +58,15 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);	 |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|       for(int mu=0;mu<8;mu++){ |  | ||||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| @@ -117,6 +79,7 @@ int main (int argc, char ** argv) | |||||||
| 	    int comm_proc=1; | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
|  | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    Grid.SendToRecvFromBegin(requests, | 	    Grid.SendToRecvFromBegin(requests, | ||||||
| 				   (void *)&xbuf[mu][0], | 				   (void *)&xbuf[mu][0], | ||||||
| @@ -139,24 +102,18 @@ int main (int argc, char ** argv) | |||||||
| 	} | 	} | ||||||
| 	Grid.SendToRecvFromComplete(requests); | 	Grid.SendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|       } |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|       double dbytes    = bytes*ppn; |  | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |  | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |       double time = stop-start; // microseconds | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
| @@ -164,32 +121,25 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); |       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|  |  | ||||||
|       for(int mu=0;mu<8;mu++){ |  | ||||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); |  | ||||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|      |      | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| @@ -228,37 +178,30 @@ int main (int argc, char ** argv) | |||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double stop=usecond(); | ||||||
|        |        | ||||||
|       double dbytes    = bytes*ppn; |       double dbytes    = bytes; | ||||||
|       double xbytes    = dbytes*2.0*ncomm; |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |       double time = stop-start; | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|        |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|     } |     } | ||||||
|   }   |   }   | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Nloop=10; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -266,9 +209,6 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -276,115 +216,16 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double dbytes; |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	double start=usecond(); |  | ||||||
|  |  | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| 	for(int mu=0;mu<4;mu++){ |  | ||||||
| 	 |  | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { |  | ||||||
| 	   |  | ||||||
| 	    ncomm++; |  | ||||||
| 	    int comm_proc=1; |  | ||||||
| 	    int xmit_to_rank; |  | ||||||
| 	    int recv_from_rank; |  | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	    dbytes+= |  | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, |  | ||||||
| 					      (void *)&xbuf[mu][0], |  | ||||||
| 					      xmit_to_rank, |  | ||||||
| 					      (void *)&rbuf[mu][0], |  | ||||||
| 					      recv_from_rank, |  | ||||||
| 					      bytes,mu); |  | ||||||
| 	 |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; |  | ||||||
| 	   |  | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); |  | ||||||
| 	    dbytes+= |  | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, |  | ||||||
| 					      (void *)&xbuf[mu+4][0], |  | ||||||
| 					      xmit_to_rank, |  | ||||||
| 					      (void *)&rbuf[mu+4][0], |  | ||||||
| 					      recv_from_rank, |  | ||||||
| 					      bytes,mu+4); |  | ||||||
| 	   |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
| 	Grid.StencilSendToRecvFromComplete(requests,0); |  | ||||||
| 	Grid.Barrier(); |  | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
| 	 |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |  | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |  | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|   }     |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   header(); |  | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |  | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |  | ||||||
|       				    lat*mpi_layout[1], |  | ||||||
|       				    lat*mpi_layout[2], |  | ||||||
|       				    lat*mpi_layout[3]}); |  | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |  | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |  | ||||||
|       Grid.ShmBufferFreeAll(); |  | ||||||
|       for(int d=0;d<8;d++){ |  | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       int ncomm; |  | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |  | ||||||
|       double dbytes; |  | ||||||
|       for(int i=0;i<Nloop;i++){ |  | ||||||
| 	double start=usecond(); |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; |  | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
| @@ -396,64 +237,52 @@ int main (int argc, char ** argv) | |||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 					    (void *)&xbuf[mu][0], | ||||||
| 					      (void *)&xbuf[mu][0], | 					    xmit_to_rank, | ||||||
| 					      xmit_to_rank, | 					    (void *)&rbuf[mu][0], | ||||||
| 					      (void *)&rbuf[mu][0], | 					    recv_from_rank, | ||||||
| 					      recv_from_rank, | 					    bytes); | ||||||
| 					      bytes,mu); | 	 | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu); |  | ||||||
| 	    requests.resize(0); |  | ||||||
|  |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    dbytes+= | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 					    (void *)&xbuf[mu+4][0], | ||||||
| 					      (void *)&xbuf[mu+4][0], | 					    xmit_to_rank, | ||||||
| 					      xmit_to_rank, | 					    (void *)&rbuf[mu+4][0], | ||||||
| 					      (void *)&rbuf[mu+4][0], | 					    recv_from_rank, | ||||||
| 					      recv_from_rank, | 					    bytes); | ||||||
| 					      bytes,mu+4); |  | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu+4); |  | ||||||
| 	    requests.resize(0); |  | ||||||
| 	   | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
|  | 	Grid.StencilSendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
| 	 |  | ||||||
|       } |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|  |       double rbytes    = xbytes; | ||||||
|  |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |       double time = stop-start; // microseconds | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|   |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Nloop=100; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   header(); |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=8;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -461,9 +290,6 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       RealD Nrank = Grid._Nprocessors; |  | ||||||
|       RealD Nnode = Grid.NodeCount(); |  | ||||||
|       RealD ppn = Nrank/Nnode; |  | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -471,71 +297,65 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|       double dbytes; |  | ||||||
|  |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	double start=usecond(); |  | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
| 	dbytes=0; |  | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
|  | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	parallel_for(int dir=0;dir<8;dir++){ | 	 | ||||||
|  |  | ||||||
| 	  double tbytes; |  | ||||||
| 	  int mu =dir % 4; |  | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { | 	  if (mpi_layout[mu]>1 ) { | ||||||
| 	   | 	   | ||||||
| 	    ncomm++; | 	    ncomm++; | ||||||
|  | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	    if ( dir == mu ) {  | 	     | ||||||
| 	      int comm_proc=1; | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	    } else {  | 					    (void *)&xbuf[mu][0], | ||||||
| 	      int comm_proc = mpi_layout[mu]-1; | 					    xmit_to_rank, | ||||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 					    (void *)&rbuf[mu][0], | ||||||
| 	    } | 					    recv_from_rank, | ||||||
|  | 					    bytes); | ||||||
|  | 	    Grid.StencilSendToRecvFromComplete(requests); | ||||||
|  | 	    requests.resize(0); | ||||||
|  |  | ||||||
| 	    tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 					       (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); | 	   | ||||||
|  | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| #pragma omp atomic | 	    Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 	    dbytes+=tbytes; | 					    (void *)&xbuf[mu+4][0], | ||||||
|  | 					    xmit_to_rank, | ||||||
|  | 					    (void *)&rbuf[mu+4][0], | ||||||
|  | 					    recv_from_rank, | ||||||
|  | 					    bytes); | ||||||
|  | 	    Grid.StencilSendToRecvFromComplete(requests); | ||||||
|  | 	    requests.resize(0); | ||||||
|  | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
| 	double stop=usecond(); |  | ||||||
| 	t_time[i] = stop-start; // microseconds |  | ||||||
|       } |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |  | ||||||
|       timestat.statistics(t_time); |       double dbytes    = bytes; | ||||||
|  |       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||||
|  |       double rbytes    = xbytes; | ||||||
|  |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       dbytes=dbytes*ppn; |       double time = stop-start; // microseconds | ||||||
|       double xbytes    = dbytes*0.5; |  | ||||||
|       double rbytes    = dbytes*0.5; |  | ||||||
|       double bidibytes = dbytes; |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" |  | ||||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) |  | ||||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " |  | ||||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   |  | ||||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " |  | ||||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; |  | ||||||
|   |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,22 +1,28 @@ | |||||||
|  /************************************************************************************* |     /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_dwf.cc |     Source file: ./benchmarks/Benchmark_dwf.cc | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|     Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or |     the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. |     (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |     This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |     You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| @@ -51,13 +57,7 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   std::vector<int> latt4 = GridDefaultLatt(); |   std::vector<int> latt4 = GridDefaultLatt(); | ||||||
|   int Ls=16; |   const int Ls=16; | ||||||
|   for(int i=0;i<argc;i++) |  | ||||||
|     if(std::string(argv[i]) == "-Ls"){ |  | ||||||
|       std::stringstream ss(argv[i+1]); ss >> Ls; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
| @@ -151,7 +151,6 @@ int main (int argc, char ** argv) | |||||||
|   RealD M5  =1.8; |   RealD M5  =1.8; | ||||||
|  |  | ||||||
|   RealD NP = UGrid->_Nprocessors; |   RealD NP = UGrid->_Nprocessors; | ||||||
|   RealD NN = UGrid->NodeCount(); |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; |   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; | ||||||
| @@ -161,17 +160,13 @@ int main (int argc, char ** argv) | |||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|  |  | ||||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|   int ncall =500; |   int ncall =1000; | ||||||
|   if (1) { |   if (1) { | ||||||
|     FGrid->Barrier(); |     FGrid->Barrier(); | ||||||
|     Dw.ZeroCounters(); |     Dw.ZeroCounters(); | ||||||
| @@ -194,7 +189,6 @@ int main (int argc, char ** argv) | |||||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     err = ref-result;  |     err = ref-result;  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
| @@ -231,7 +225,6 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     err = ref-result;  |     err = ref-result;  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
| @@ -247,10 +240,6 @@ int main (int argc, char ** argv) | |||||||
|     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -282,7 +271,6 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; |     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; | ||||||
|     sDw.Report(); |     sDw.Report(); | ||||||
|     RealD sum=0; |     RealD sum=0; | ||||||
| @@ -308,7 +296,6 @@ int main (int argc, char ** argv) | |||||||
|       std::cout<< "sD ERR   \n " << err  <<std::endl; |       std::cout<< "sD ERR   \n " << err  <<std::endl; | ||||||
|     } |     } | ||||||
|     assert(sum < 1.0e-4); |     assert(sum < 1.0e-4); | ||||||
|  |  | ||||||
|      |      | ||||||
|     if(1){ |     if(1){ | ||||||
|       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
| @@ -316,10 +303,6 @@ int main (int argc, char ** argv) | |||||||
|       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  | ||||||
| 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  | ||||||
| @@ -359,7 +342,6 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|       sDw.Report(); |       sDw.Report(); | ||||||
|  |  | ||||||
|       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); |       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||||
| @@ -388,23 +370,8 @@ int main (int argc, char ** argv) | |||||||
|       } |       } | ||||||
|       assert(error<1.0e-4); |       assert(error<1.0e-4); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   if(0){ |  | ||||||
|     std::cout << "Single cache warm call to sDw.Dhop " <<std::endl; |  | ||||||
|     for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ |  | ||||||
|       sDw.Dhop(ssrc,sresult,0); |  | ||||||
|       PerformanceCounter Counter(i); |  | ||||||
|       Counter.Start(); |  | ||||||
|       sDw.Dhop(ssrc,sresult,0); |  | ||||||
|       Counter.Stop(); |  | ||||||
|       Counter.Report(); |  | ||||||
|     } |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { // Naive wilson dag implementation |   { // Naive wilson dag implementation | ||||||
|     ref = zero; |     ref = zero; | ||||||
| @@ -453,15 +420,14 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|  |  | ||||||
|   // S-direction is INNERMOST and takes no part in the parity. |   // S-direction is INNERMOST and takes no part in the parity. | ||||||
|  |   static int Opt;  // these are a temporary hack | ||||||
|  |   static int Comms;  // these are a temporary hack | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; |   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -482,7 +448,6 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|   Dw.DhopEO(src_o,r_e,DaggerNo); |   Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
| @@ -509,9 +474,8 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; |   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; |   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; | ||||||
|  |  | ||||||
|   assert(norm2(src_e)<1.0e-4); |   //assert(norm2(src_e)<1.0e-4); | ||||||
|   assert(norm2(src_o)<1.0e-4); |   //assert(norm2(src_o)<1.0e-4); | ||||||
|   Grid_finalize(); |  | ||||||
|   exit(0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,190 +0,0 @@ | |||||||
| #include <Grid/Grid.h> |  | ||||||
| #include <sstream> |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Grid::QCD; |  | ||||||
|  |  | ||||||
| template<class d> |  | ||||||
| struct scal { |  | ||||||
|   d internal; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|   Gamma::Algebra Gmu [] = { |  | ||||||
|     Gamma::Algebra::GammaX, |  | ||||||
|     Gamma::Algebra::GammaY, |  | ||||||
|     Gamma::Algebra::GammaZ, |  | ||||||
|     Gamma::Algebra::GammaT |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| typedef typename GparityDomainWallFermionF::FermionField GparityLatticeFermionF; |  | ||||||
| typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   int Ls=16; |  | ||||||
|   for(int i=0;i<argc;i++) |  | ||||||
|     if(std::string(argv[i]) == "-Ls"){ |  | ||||||
|       std::stringstream ss(argv[i+1]); ss >> Ls; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   int threads = GridThread::GetThreads(); |  | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "Ls = " << Ls << std::endl; |  | ||||||
|  |  | ||||||
|   std::vector<int> latt4 = GridDefaultLatt(); |  | ||||||
|  |  | ||||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); |  | ||||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|   std::vector<int> seeds4({1,2,3,4}); |  | ||||||
|   std::vector<int> seeds5({5,6,7,8}); |  | ||||||
|    |  | ||||||
|   std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; |  | ||||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); |  | ||||||
|   std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; |  | ||||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); |  | ||||||
|   std::cout << GridLogMessage << "Initialised RNGs" << std::endl; |  | ||||||
|  |  | ||||||
|   GparityLatticeFermionF src   (FGrid); random(RNG5,src); |  | ||||||
|   RealD N2 = 1.0/::sqrt(norm2(src)); |  | ||||||
|   src = src*N2; |  | ||||||
|  |  | ||||||
|   GparityLatticeFermionF result(FGrid); result=zero; |  | ||||||
|   GparityLatticeFermionF    ref(FGrid);    ref=zero; |  | ||||||
|   GparityLatticeFermionF    tmp(FGrid); |  | ||||||
|   GparityLatticeFermionF    err(FGrid); |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "Drawing gauge field" << std::endl; |  | ||||||
|   LatticeGaugeFieldF Umu(UGrid);  |  | ||||||
|   SU3::HotConfiguration(RNG4,Umu);  |  | ||||||
|   std::cout << GridLogMessage << "Random gauge initialised " << std::endl; |  | ||||||
|  |  | ||||||
|   RealD mass=0.1; |  | ||||||
|   RealD M5  =1.8; |  | ||||||
|  |  | ||||||
|   RealD NP = UGrid->_Nprocessors; |  | ||||||
|   RealD NN = UGrid->NodeCount(); |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermion::Dhop                  "<<std::endl; |  | ||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl; |  | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; |  | ||||||
| #endif |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |  | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "* SINGLE/SINGLE"<<std::endl; |  | ||||||
|   GparityDomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|   int ncall =1000; |  | ||||||
|   if (1) { |  | ||||||
|     FGrid->Barrier(); |  | ||||||
|     Dw.ZeroCounters(); |  | ||||||
|     Dw.Dhop(src,result,0); |  | ||||||
|     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; |  | ||||||
|     double t0=usecond(); |  | ||||||
|     for(int i=0;i<ncall;i++){ |  | ||||||
|       __SSC_START; |  | ||||||
|       Dw.Dhop(src,result,0); |  | ||||||
|       __SSC_STOP; |  | ||||||
|     } |  | ||||||
|     double t1=usecond(); |  | ||||||
|     FGrid->Barrier(); |  | ||||||
|      |  | ||||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
|     double flops=2*1344*volume*ncall; |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |  | ||||||
|     //    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; |  | ||||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     Dw.Report(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "* SINGLE/HALF"<<std::endl; |  | ||||||
|   GparityDomainWallFermionFH DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|   if (1) { |  | ||||||
|     FGrid->Barrier(); |  | ||||||
|     DwH.ZeroCounters(); |  | ||||||
|     DwH.Dhop(src,result,0); |  | ||||||
|     double t0=usecond(); |  | ||||||
|     for(int i=0;i<ncall;i++){ |  | ||||||
|       __SSC_START; |  | ||||||
|       DwH.Dhop(src,result,0); |  | ||||||
|       __SSC_STOP; |  | ||||||
|     } |  | ||||||
|     double t1=usecond(); |  | ||||||
|     FGrid->Barrier(); |  | ||||||
|      |  | ||||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
|     double flops=2*1344*volume*ncall; |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     DwH.Report(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   GridCartesian         * UGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); |  | ||||||
|   GridRedBlackCartesian * UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d); |  | ||||||
|   GridCartesian         * FGrid_d   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_d); |  | ||||||
|   GridRedBlackCartesian * FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_d); |  | ||||||
|  |  | ||||||
|    |  | ||||||
|   std::cout << GridLogMessage<< "* DOUBLE/DOUBLE"<<std::endl; |  | ||||||
|   GparityLatticeFermionD src_d(FGrid_d); |  | ||||||
|   precisionChange(src_d,src); |  | ||||||
|  |  | ||||||
|   LatticeGaugeFieldD Umu_d(UGrid_d);  |  | ||||||
|   precisionChange(Umu_d,Umu); |  | ||||||
|  |  | ||||||
|   GparityLatticeFermionD result_d(FGrid_d); |  | ||||||
|  |  | ||||||
|   GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5); |  | ||||||
|   if (1) { |  | ||||||
|     FGrid_d->Barrier(); |  | ||||||
|     DwD.ZeroCounters(); |  | ||||||
|     DwD.Dhop(src_d,result_d,0); |  | ||||||
|     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; |  | ||||||
|     double t0=usecond(); |  | ||||||
|     for(int i=0;i<ncall;i++){ |  | ||||||
|       __SSC_START; |  | ||||||
|       DwD.Dhop(src_d,result_d,0); |  | ||||||
|       __SSC_STOP; |  | ||||||
|     } |  | ||||||
|     double t1=usecond(); |  | ||||||
|     FGrid_d->Barrier(); |  | ||||||
|      |  | ||||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; |  | ||||||
|     double flops=2*1344*volume*ncall; |  | ||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |  | ||||||
|     //    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; |  | ||||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |  | ||||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; |  | ||||||
|     DwD.Report(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| @@ -55,21 +55,21 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|   uint64_t lmax=96; |   uint64_t lmax=44; | ||||||
| #define NLOOP (10*lmax*lmax*lmax*lmax/vol) | #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       double a=2.0; |       double a=2.0; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -83,7 +83,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |       double bytes=3*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -94,17 +94,17 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|    |    | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       double a=2.0; |       double a=2.0; | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
| @@ -119,7 +119,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|       |       | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); |       double bytes=3*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -129,20 +129,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       RealD a=2.0; |       RealD a=2.0; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -154,7 +154,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=2.0*vol*Nvec*sizeof(Real); |       double bytes=2*vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*1;// mul |       double flops=vol*Nvec*1;// mul | ||||||
|       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
| @@ -166,17 +166,17 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=8;lat<=lmax;lat+=8){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|       LatticeVec z(&Grid);// random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid);// random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid);// random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|       RealD a=2.0; |       RealD a=2.0; | ||||||
|       Real nn;       |       Real nn;       | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
| @@ -187,7 +187,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=1.0*vol*Nvec*sizeof(Real); |       double bytes=vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -40,7 +40,7 @@ int main (int argc, char ** argv) | |||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); |   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); | ||||||
|   GridRedBlackCartesian     RBGrid(&Grid); |   GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|   | |||||||
| @@ -35,14 +35,13 @@ using namespace Grid::QCD; | |||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| #define LMAX (64) |  | ||||||
|  |  | ||||||
|   int64_t Nloop=20; |   int Nloop=1000; | ||||||
|  |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |  | ||||||
|   int64_t threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
| @@ -51,19 +50,19 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid);// random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid);// random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid);// random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	x=x*y; | 	x=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -83,20 +82,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	z=x*y; | 	z=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -114,20 +113,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	mult(z,x,y); | 	mult(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -145,20 +144,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=LMAX;lat+=2){ |   for(int lat=2;lat<=32;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); |       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int64_t i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
| 	mac(z,x,y); | 	mac(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|   | |||||||
| @@ -58,7 +58,7 @@ int main (int argc, char ** argv) | |||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); |   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); | ||||||
|   GridRedBlackCartesian     RBGrid(&Grid); |   GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|   | |||||||
| @@ -93,7 +93,7 @@ int main (int argc, char ** argv) | |||||||
| 	  std::cout << latt_size.back() << "\t\t"; | 	  std::cout << latt_size.back() << "\t\t"; | ||||||
|  |  | ||||||
| 	  GridCartesian           Grid(latt_size,simd_layout,mpi_layout); | 	  GridCartesian           Grid(latt_size,simd_layout,mpi_layout); | ||||||
| 	  GridRedBlackCartesian RBGrid(&Grid); | 	  GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
| 	  GridParallelRNG  pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); | 	  GridParallelRNG  pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); | ||||||
| 	  LatticeGaugeField Umu(&Grid); random(pRNG,Umu); | 	  LatticeGaugeField Umu(&Grid); random(pRNG,Umu); | ||||||
|   | |||||||
| @@ -1,7 +1,11 @@ | |||||||
| include Make.inc | include Make.inc | ||||||
|  |  | ||||||
| bench-local: all | simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o | ||||||
| 	./Benchmark_su3 |  | ||||||
| 	./Benchmark_memory_bandwidth | EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a | ||||||
| 	./Benchmark_wilson |  | ||||||
| 	./Benchmark_dwf --dslash-unroll | libsimple_su3_test_a_SOURCES = simple_su3_test.cc | ||||||
|  |  | ||||||
|  | libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc | ||||||
|  |  | ||||||
|  | libsimple_simd_test_a_SOURCES = simple_simd_test.cc | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| #!/usr/bin/env bash | #!/usr/bin/env bash | ||||||
|  |  | ||||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' | ||||||
|  |  | ||||||
| echo "-- deploying Eigen source..." | echo "-- deploying Eigen source..." | ||||||
| wget ${EIGEN_URL} --no-check-certificate | wget ${EIGEN_URL} --no-check-certificate | ||||||
|   | |||||||
							
								
								
									
										210
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										210
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -1,23 +1,16 @@ | |||||||
| AC_PREREQ([2.63]) | AC_PREREQ([2.63]) | ||||||
| AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) | AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) | ||||||
| AC_CANONICAL_BUILD | AC_CANONICAL_BUILD | ||||||
| AC_CANONICAL_HOST | AC_CANONICAL_HOST | ||||||
| AC_CANONICAL_TARGET | AC_CANONICAL_TARGET | ||||||
| AM_INIT_AUTOMAKE([subdir-objects 1.13]) | AM_INIT_AUTOMAKE(subdir-objects) | ||||||
| AM_EXTRA_RECURSIVE_TARGETS([tests bench]) |  | ||||||
| AC_CONFIG_MACRO_DIR([m4]) | AC_CONFIG_MACRO_DIR([m4]) | ||||||
| AC_CONFIG_SRCDIR([lib/Grid.h]) | AC_CONFIG_SRCDIR([lib/Grid.h]) | ||||||
| AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | ||||||
| m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||||
|  |  | ||||||
| ################ Get git info |  | ||||||
| #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) |  | ||||||
|  |  | ||||||
| ################ Set flags |  | ||||||
| # do not move! |  | ||||||
| CXXFLAGS="-O3 $CXXFLAGS" |  | ||||||
|  |  | ||||||
| ############### Checks for programs | ############### Checks for programs | ||||||
|  | CXXFLAGS="-O3 $CXXFLAGS" | ||||||
| AC_PROG_CXX | AC_PROG_CXX | ||||||
| AC_PROG_RANLIB | AC_PROG_RANLIB | ||||||
|  |  | ||||||
| @@ -31,14 +24,12 @@ AX_GXX_VERSION | |||||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||||
|       [version of g++ that will compile the code]) |       [version of g++ that will compile the code]) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ############### Checks for typedefs, structures, and compiler characteristics | ############### Checks for typedefs, structures, and compiler characteristics | ||||||
| AC_TYPE_SIZE_T | AC_TYPE_SIZE_T | ||||||
| AC_TYPE_UINT32_T | AC_TYPE_UINT32_T | ||||||
| AC_TYPE_UINT64_T | AC_TYPE_UINT64_T | ||||||
|  |  | ||||||
| ############### OpenMP | ############### OpenMP  | ||||||
| AC_OPENMP | AC_OPENMP | ||||||
| ac_openmp=no | ac_openmp=no | ||||||
| if test "${OPENMP_CXXFLAGS}X" != "X"; then | if test "${OPENMP_CXXFLAGS}X" != "X"; then | ||||||
| @@ -54,14 +45,9 @@ AC_CHECK_HEADERS(malloc/malloc.h) | |||||||
| AC_CHECK_HEADERS(malloc.h) | AC_CHECK_HEADERS(malloc.h) | ||||||
| AC_CHECK_HEADERS(endian.h) | AC_CHECK_HEADERS(endian.h) | ||||||
| AC_CHECK_HEADERS(execinfo.h) | AC_CHECK_HEADERS(execinfo.h) | ||||||
| AC_CHECK_HEADERS(numaif.h) |  | ||||||
| AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | ||||||
| AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | ||||||
|  |  | ||||||
| ############## Standard libraries |  | ||||||
| AC_CHECK_LIB([m],[cos]) |  | ||||||
| AC_CHECK_LIB([stdc++],[abort]) |  | ||||||
|  |  | ||||||
| ############### GMP and MPFR | ############### GMP and MPFR | ||||||
| AC_ARG_WITH([gmp], | AC_ARG_WITH([gmp], | ||||||
|     [AS_HELP_STRING([--with-gmp=prefix], |     [AS_HELP_STRING([--with-gmp=prefix], | ||||||
| @@ -74,23 +60,16 @@ AC_ARG_WITH([mpfr], | |||||||
|     [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] |     [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] | ||||||
|     [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) |     [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
| ############### FFTW3 | ############### FFTW3  | ||||||
| AC_ARG_WITH([fftw], | AC_ARG_WITH([fftw],     | ||||||
|             [AS_HELP_STRING([--with-fftw=prefix], |             [AS_HELP_STRING([--with-fftw=prefix], | ||||||
|             [try this for a non-standard install prefix of the FFTW3 library])], |             [try this for a non-standard install prefix of the FFTW3 library])], | ||||||
|             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] |             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] | ||||||
|             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) |             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
| ############### LIME | ############### lapack  | ||||||
| AC_ARG_WITH([lime], |  | ||||||
|             [AS_HELP_STRING([--with-lime=prefix], |  | ||||||
|             [try this for a non-standard install prefix of the LIME library])], |  | ||||||
|             [AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"] |  | ||||||
|             [AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"]) |  | ||||||
|  |  | ||||||
| ############### lapack |  | ||||||
| AC_ARG_ENABLE([lapack], | AC_ARG_ENABLE([lapack], | ||||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], |     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  | ||||||
|     [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) |     [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) | ||||||
|  |  | ||||||
| case ${ac_LAPACK} in | case ${ac_LAPACK} in | ||||||
| @@ -106,7 +85,7 @@ esac | |||||||
|  |  | ||||||
| ############### FP16 conversions | ############### FP16 conversions | ||||||
| AC_ARG_ENABLE([sfw-fp16], | AC_ARG_ENABLE([sfw-fp16], | ||||||
|     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], |     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],  | ||||||
|     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) |     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) | ||||||
| case ${ac_SFW_FP16} in | case ${ac_SFW_FP16} in | ||||||
|     yes) |     yes) | ||||||
| @@ -141,7 +120,7 @@ AC_ARG_WITH([hdf5], | |||||||
|  |  | ||||||
| ############### first-touch | ############### first-touch | ||||||
| AC_ARG_ENABLE([numa], | AC_ARG_ENABLE([numa], | ||||||
|     [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], |     [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],  | ||||||
|     [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) |     [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) | ||||||
|  |  | ||||||
| case ${ac_NUMA} in | case ${ac_NUMA} in | ||||||
| @@ -167,8 +146,8 @@ if test "${ac_MKL}x" != "nox"; then | |||||||
| fi | fi | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([__gmpf_init], [gmp], | AC_SEARCH_LIBS([__gmpf_init], [gmp], | ||||||
|                [AC_SEARCH_LIBS([mpfr_init], [mpfr], |                [AC_SEARCH_LIBS([mpfr_init], [mpfr],  | ||||||
|                                [AC_DEFINE([HAVE_LIBMPFR], [1], |                                [AC_DEFINE([HAVE_LIBMPFR], [1],  | ||||||
|                                           [Define to 1 if you have the `MPFR' library])] |                                           [Define to 1 if you have the `MPFR' library])] | ||||||
|                                [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] |                                [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] | ||||||
|                [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] |                [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] | ||||||
| @@ -177,7 +156,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp], | |||||||
| if test "${ac_LAPACK}x" != "nox"; then | if test "${ac_LAPACK}x" != "nox"; then | ||||||
|     AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], |     AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], | ||||||
|                    [AC_MSG_ERROR("LAPACK enabled but library not found")]) |                    [AC_MSG_ERROR("LAPACK enabled but library not found")]) | ||||||
| fi | fi    | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([fftw_execute], [fftw3], | AC_SEARCH_LIBS([fftw_execute], [fftw3], | ||||||
|                [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], |                [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], | ||||||
| @@ -185,23 +164,6 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3], | |||||||
|                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] |                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] | ||||||
|                [have_fftw=true]) |                [have_fftw=true]) | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([limeCreateReader], [lime], |  | ||||||
|                [AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])] |  | ||||||
|                [have_lime=true], |  | ||||||
| 	       [AC_MSG_WARN(C-LIME library was not found in your system. |  | ||||||
| In order to use ILGG file format please install or provide the correct path to your installation |  | ||||||
| Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([crc32], [z], |  | ||||||
|                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] |  | ||||||
|                [have_zlib=true] [LIBS="${LIBS} -lz"], |  | ||||||
| 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([move_pages], [numa], |  | ||||||
|                [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] |  | ||||||
|                [have_libnuma=true] [LIBS="${LIBS} -lnuma"], |  | ||||||
| 	       [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) |  | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] |                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||||
|                [have_hdf5=true] |                [have_hdf5=true] | ||||||
| @@ -254,7 +216,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) |  | ||||||
|         SIMD_FLAGS='-march=knl';; |         SIMD_FLAGS='-march=knl';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -262,9 +223,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|                            [generic SIMD vector width (in bytes)]) |                            [generic SIMD vector width (in bytes)]) | ||||||
|         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" |         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       NEONv8) |  | ||||||
|         AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) |  | ||||||
|         SIMD_FLAGS='-march=armv8-a';; |  | ||||||
|       QPX|BGQ) |       QPX|BGQ) | ||||||
|         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) |         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
| @@ -293,7 +251,6 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) | ||||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) |  | ||||||
|         SIMD_FLAGS='-xmic-avx512';; |         SIMD_FLAGS='-xmic-avx512';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -331,41 +288,8 @@ case ${ac_PRECISION} in | |||||||
|      double) |      double) | ||||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) |        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) | ||||||
|      ;; |      ;; | ||||||
|      *) |  | ||||||
|      AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]); |  | ||||||
|      ;; |  | ||||||
| esac | esac | ||||||
|  |  | ||||||
| ######################  Shared memory allocation technique under MPI3 |  | ||||||
| AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], |  | ||||||
|               [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) |  | ||||||
|  |  | ||||||
| case ${ac_SHM} in |  | ||||||
|  |  | ||||||
|      shmget) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      shmopen) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      hugetlbfs) |  | ||||||
|      AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) |  | ||||||
|      ;; |  | ||||||
|  |  | ||||||
|      *) |  | ||||||
|      AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]); |  | ||||||
|      ;; |  | ||||||
| esac |  | ||||||
|  |  | ||||||
| ######################  Shared base path for SHMMMAP |  | ||||||
| AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], |  | ||||||
|               [Select SHM mmap base path for hugetlbfs])], |  | ||||||
| 	      [ac_SHMPATH=${enable_shmpath}], |  | ||||||
| 	      [ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/]) |  | ||||||
| AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) |  | ||||||
|  |  | ||||||
| ############### communication type selection | ############### communication type selection | ||||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | ||||||
|               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) |               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||||
| @@ -375,14 +299,14 @@ case ${ac_COMMS} in | |||||||
|         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) |         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) | ||||||
|         comms_type='none' |         comms_type='none' | ||||||
|      ;; |      ;; | ||||||
|  |      mpi3l*) | ||||||
|  |        AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] ) | ||||||
|  |        comms_type='mpi3l' | ||||||
|  |      ;; | ||||||
|      mpi3*) |      mpi3*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) |         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) | ||||||
|         comms_type='mpi3' |         comms_type='mpi3' | ||||||
|      ;; |      ;; | ||||||
|      mpit) |  | ||||||
|         AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) |  | ||||||
|         comms_type='mpit' |  | ||||||
|      ;; |  | ||||||
|      mpi*) |      mpi*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) |         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||||
|         comms_type='mpi' |         comms_type='mpi' | ||||||
| @@ -392,7 +316,7 @@ case ${ac_COMMS} in | |||||||
|         comms_type='shmem' |         comms_type='shmem' | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|         AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); |         AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
| case ${ac_COMMS} in | case ${ac_COMMS} in | ||||||
| @@ -410,7 +334,7 @@ esac | |||||||
| AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPIT,  [ test "${comms_type}X" == "mpitX" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | ||||||
|  |  | ||||||
| ############### RNG selection | ############### RNG selection | ||||||
| @@ -429,7 +353,7 @@ case ${ac_RNG} in | |||||||
|       AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) |       AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|       AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); |       AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| @@ -446,7 +370,7 @@ case ${ac_TIMERS} in | |||||||
|       AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) |       AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|       AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); |       AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| @@ -458,7 +382,7 @@ case ${ac_CHROMA} in | |||||||
|      yes|no) |      yes|no) | ||||||
|      ;; |      ;; | ||||||
|      *) |      *) | ||||||
|        AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); |        AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);  | ||||||
|      ;; |      ;; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| @@ -479,67 +403,12 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg]) | |||||||
|  |  | ||||||
| ############### Ouput | ############### Ouput | ||||||
| cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | ||||||
| GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" |  | ||||||
| GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS" |  | ||||||
| GRID_LIBS=$LIBS |  | ||||||
| GRID_SHORT_SHA=`git rev-parse --short HEAD` |  | ||||||
| GRID_SHA=`git rev-parse HEAD` |  | ||||||
| GRID_BRANCH=`git rev-parse --abbrev-ref HEAD` |  | ||||||
| AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | ||||||
| AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | ||||||
| AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | ||||||
| AC_SUBST([AM_CFLAGS]) | AC_SUBST([AM_CFLAGS]) | ||||||
| AC_SUBST([AM_CXXFLAGS]) | AC_SUBST([AM_CXXFLAGS]) | ||||||
| AC_SUBST([AM_LDFLAGS]) | AC_SUBST([AM_LDFLAGS]) | ||||||
| AC_SUBST([GRID_CXXFLAGS]) |  | ||||||
| AC_SUBST([GRID_LDFLAGS]) |  | ||||||
| AC_SUBST([GRID_LIBS]) |  | ||||||
| AC_SUBST([GRID_SHA]) |  | ||||||
| AC_SUBST([GRID_BRANCH]) |  | ||||||
|  |  | ||||||
| git_commit=`cd $srcdir && ./scripts/configure.commit` |  | ||||||
|  |  | ||||||
| echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |  | ||||||
| Summary of configuration for $PACKAGE v$VERSION |  | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |  | ||||||
| ----- GIT VERSION ------------------------------------- |  | ||||||
| $git_commit |  | ||||||
| ----- PLATFORM ---------------------------------------- |  | ||||||
| architecture (build)        : $build_cpu |  | ||||||
| os (build)                  : $build_os |  | ||||||
| architecture (target)       : $target_cpu |  | ||||||
| os (target)                 : $target_os |  | ||||||
| compiler vendor             : ${ax_cv_cxx_compiler_vendor} |  | ||||||
| compiler version            : ${ax_cv_gxx_version} |  | ||||||
| ----- BUILD OPTIONS ----------------------------------- |  | ||||||
| SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} |  | ||||||
| Threading                   : ${ac_openmp} |  | ||||||
| Communications type         : ${comms_type} |  | ||||||
| Shared memory allocator     : ${ac_SHM} |  | ||||||
| Shared memory mmap path     : ${ac_SHMPATH} |  | ||||||
| Default precision           : ${ac_PRECISION} |  | ||||||
| Software FP16 conversion    : ${ac_SFW_FP16} |  | ||||||
| RNG choice                  : ${ac_RNG} |  | ||||||
| GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| LAPACK                      : ${ac_LAPACK} |  | ||||||
| FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| LIME (ILDG support)         : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` |  | ||||||
| build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` |  | ||||||
| ----- BUILD FLAGS ------------------------------------- |  | ||||||
| CXXFLAGS: |  | ||||||
| `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` |  | ||||||
| LDFLAGS: |  | ||||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` |  | ||||||
| LIBS: |  | ||||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` |  | ||||||
| -------------------------------------------------------" > grid.configure.summary |  | ||||||
|  |  | ||||||
| GRID_SUMMARY="`cat grid.configure.summary`" |  | ||||||
| AM_SUBST_NOTMAKE([GRID_SUMMARY]) |  | ||||||
| AC_SUBST([GRID_SUMMARY]) |  | ||||||
|  |  | ||||||
| AC_CONFIG_FILES([grid-config], [chmod +x grid-config]) |  | ||||||
| AC_CONFIG_FILES(Makefile) | AC_CONFIG_FILES(Makefile) | ||||||
| AC_CONFIG_FILES(lib/Makefile) | AC_CONFIG_FILES(lib/Makefile) | ||||||
| AC_CONFIG_FILES(tests/Makefile) | AC_CONFIG_FILES(tests/Makefile) | ||||||
| @@ -550,8 +419,6 @@ AC_CONFIG_FILES(tests/forces/Makefile) | |||||||
| AC_CONFIG_FILES(tests/hadrons/Makefile) | AC_CONFIG_FILES(tests/hadrons/Makefile) | ||||||
| AC_CONFIG_FILES(tests/hmc/Makefile) | AC_CONFIG_FILES(tests/hmc/Makefile) | ||||||
| AC_CONFIG_FILES(tests/solver/Makefile) | AC_CONFIG_FILES(tests/solver/Makefile) | ||||||
| AC_CONFIG_FILES(tests/lanczos/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/smearing/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) | AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||||
| AC_CONFIG_FILES(tests/testu01/Makefile) | AC_CONFIG_FILES(tests/testu01/Makefile) | ||||||
| AC_CONFIG_FILES(benchmarks/Makefile) | AC_CONFIG_FILES(benchmarks/Makefile) | ||||||
| @@ -559,7 +426,36 @@ AC_CONFIG_FILES(extras/Makefile) | |||||||
| AC_CONFIG_FILES(extras/Hadrons/Makefile) | AC_CONFIG_FILES(extras/Hadrons/Makefile) | ||||||
| AC_OUTPUT | AC_OUTPUT | ||||||
|  |  | ||||||
|  | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  | Summary of configuration for $PACKAGE v$VERSION | ||||||
|  | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  | ----- PLATFORM ---------------------------------------- | ||||||
|  | architecture (build)        : $build_cpu | ||||||
|  | os (build)                  : $build_os | ||||||
|  | architecture (target)       : $target_cpu | ||||||
|  | os (target)                 : $target_os | ||||||
|  | compiler vendor             : ${ax_cv_cxx_compiler_vendor} | ||||||
|  | compiler version            : ${ax_cv_gxx_version} | ||||||
|  | ----- BUILD OPTIONS ----------------------------------- | ||||||
|  | SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | ||||||
|  | Threading                   : ${ac_openmp}  | ||||||
|  | Communications type         : ${comms_type} | ||||||
|  | Default precision           : ${ac_PRECISION} | ||||||
|  | Software FP16 conversion    : ${ac_SFW_FP16} | ||||||
|  | RNG choice                  : ${ac_RNG}  | ||||||
|  | GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | LAPACK                      : ${ac_LAPACK} | ||||||
|  | FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | ||||||
|  | ----- BUILD FLAGS ------------------------------------- | ||||||
|  | CXXFLAGS: | ||||||
|  | `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
|  | LDFLAGS: | ||||||
|  | `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
|  | LIBS: | ||||||
|  | `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
|  | -------------------------------------------------------" > config.summary | ||||||
| echo "" | echo "" | ||||||
| cat grid.configure.summary | cat config.summary | ||||||
| echo "" | echo "" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -162,8 +162,7 @@ void Application::saveParameterFile(const std::string parameterFileName) | |||||||
| sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | ||||||
|  |  | ||||||
| #define DEFINE_MEMPEAK \ | #define DEFINE_MEMPEAK \ | ||||||
| GeneticScheduler<unsigned int>::ObjFunc memPeak = \ | auto memPeak = [this](const std::vector<unsigned int> &program)\ | ||||||
| [this](const std::vector<unsigned int> &program)\ |  | ||||||
| {\ | {\ | ||||||
|     unsigned int memPeak;\ |     unsigned int memPeak;\ | ||||||
|     bool         msg;\ |     bool         msg;\ | ||||||
|   | |||||||
| @@ -41,10 +41,9 @@ using namespace Hadrons; | |||||||
| // constructor ///////////////////////////////////////////////////////////////// | // constructor ///////////////////////////////////////////////////////////////// | ||||||
| Environment::Environment(void) | Environment::Environment(void) | ||||||
| { | { | ||||||
|     dim_ = GridDefaultLatt(); |     nd_ = GridDefaultLatt().size(); | ||||||
|     nd_  = dim_.size(); |  | ||||||
|     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( |     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( | ||||||
|         dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), |         GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||||
|         GridDefaultMpi())); |         GridDefaultMpi())); | ||||||
|     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); |     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); | ||||||
|     auto loc = getGrid()->LocalDimensions(); |     auto loc = getGrid()->LocalDimensions(); | ||||||
| @@ -133,16 +132,6 @@ unsigned int Environment::getNd(void) const | |||||||
|     return nd_; |     return nd_; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::vector<int> Environment::getDim(void) const |  | ||||||
| { |  | ||||||
|     return dim_; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int Environment::getDim(const unsigned int mu) const |  | ||||||
| { |  | ||||||
|     return dim_[mu]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // random number generator ///////////////////////////////////////////////////// | // random number generator ///////////////////////////////////////////////////// | ||||||
| void Environment::setSeed(const std::vector<int> &seed) | void Environment::setSeed(const std::vector<int> &seed) | ||||||
| { | { | ||||||
| @@ -282,21 +271,6 @@ std::string Environment::getModuleType(const std::string name) const | |||||||
|     return getModuleType(getModuleAddress(name)); |     return getModuleType(getModuleAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string Environment::getModuleNamespace(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     std::string type = getModuleType(address), ns; |  | ||||||
|      |  | ||||||
|     auto pos2 = type.rfind("::"); |  | ||||||
|     auto pos1 = type.rfind("::", pos2 - 2); |  | ||||||
|      |  | ||||||
|     return type.substr(pos1 + 2, pos2 - pos1 - 2); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::string Environment::getModuleNamespace(const std::string name) const |  | ||||||
| { |  | ||||||
|     return getModuleNamespace(getModuleAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool Environment::hasModule(const unsigned int address) const | bool Environment::hasModule(const unsigned int address) const | ||||||
| { | { | ||||||
|     return (address < module_.size()); |     return (address < module_.size()); | ||||||
| @@ -518,14 +492,7 @@ std::string Environment::getObjectType(const unsigned int address) const | |||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|     { |     { | ||||||
|         if (object_[address].type) |         return typeName(object_[address].type); | ||||||
|         { |  | ||||||
|             return typeName(object_[address].type); |  | ||||||
|         } |  | ||||||
|         else |  | ||||||
|         { |  | ||||||
|             return "<no type>"; |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|     else if (hasObject(address)) |     else if (hasObject(address)) | ||||||
|     { |     { | ||||||
| @@ -565,23 +532,6 @@ Environment::Size Environment::getObjectSize(const std::string name) const | |||||||
|     return getObjectSize(getObjectAddress(name)); |     return getObjectSize(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectModule(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     if (hasObject(address)) |  | ||||||
|     { |  | ||||||
|         return object_[address].module; |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectModule(const std::string name) const |  | ||||||
| { |  | ||||||
|     return getObjectModule(getObjectAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectLs(const unsigned int address) const | unsigned int Environment::getObjectLs(const unsigned int address) const | ||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|   | |||||||
| @@ -106,8 +106,6 @@ public: | |||||||
|     void                    createGrid(const unsigned int Ls); |     void                    createGrid(const unsigned int Ls); | ||||||
|     GridCartesian *         getGrid(const unsigned int Ls = 1) const; |     GridCartesian *         getGrid(const unsigned int Ls = 1) const; | ||||||
|     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; |     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; | ||||||
|     std::vector<int>        getDim(void) const; |  | ||||||
|     int                     getDim(const unsigned int mu) const; |  | ||||||
|     unsigned int            getNd(void) const; |     unsigned int            getNd(void) const; | ||||||
|     // random number generator |     // random number generator | ||||||
|     void                    setSeed(const std::vector<int> &seed); |     void                    setSeed(const std::vector<int> &seed); | ||||||
| @@ -133,8 +131,6 @@ public: | |||||||
|     std::string             getModuleName(const unsigned int address) const; |     std::string             getModuleName(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const unsigned int address) const; |     std::string             getModuleType(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const std::string name) const; |     std::string             getModuleType(const std::string name) const; | ||||||
|     std::string             getModuleNamespace(const unsigned int address) const; |  | ||||||
|     std::string             getModuleNamespace(const std::string name) const; |  | ||||||
|     bool                    hasModule(const unsigned int address) const; |     bool                    hasModule(const unsigned int address) const; | ||||||
|     bool                    hasModule(const std::string name) const; |     bool                    hasModule(const std::string name) const; | ||||||
|     Graph<unsigned int>     makeModuleGraph(void) const; |     Graph<unsigned int>     makeModuleGraph(void) const; | ||||||
| @@ -175,8 +171,6 @@ public: | |||||||
|     std::string             getObjectType(const std::string name) const; |     std::string             getObjectType(const std::string name) const; | ||||||
|     Size                    getObjectSize(const unsigned int address) const; |     Size                    getObjectSize(const unsigned int address) const; | ||||||
|     Size                    getObjectSize(const std::string name) const; |     Size                    getObjectSize(const std::string name) const; | ||||||
|     unsigned int            getObjectModule(const unsigned int address) const; |  | ||||||
|     unsigned int            getObjectModule(const std::string name) const; |  | ||||||
|     unsigned int            getObjectLs(const unsigned int address) const; |     unsigned int            getObjectLs(const unsigned int address) const; | ||||||
|     unsigned int            getObjectLs(const std::string name) const; |     unsigned int            getObjectLs(const std::string name) const; | ||||||
|     bool                    hasObject(const unsigned int address) const; |     bool                    hasObject(const unsigned int address) const; | ||||||
| @@ -187,10 +181,6 @@ public: | |||||||
|     bool                    hasCreatedObject(const std::string name) const; |     bool                    hasCreatedObject(const std::string name) const; | ||||||
|     bool                    isObject5d(const unsigned int address) const; |     bool                    isObject5d(const unsigned int address) const; | ||||||
|     bool                    isObject5d(const std::string name) const; |     bool                    isObject5d(const std::string name) const; | ||||||
|     template <typename T> |  | ||||||
|     bool                    isObjectOfType(const unsigned int address) const; |  | ||||||
|     template <typename T> |  | ||||||
|     bool                    isObjectOfType(const std::string name) const; |  | ||||||
|     Environment::Size       getTotalSize(void) const; |     Environment::Size       getTotalSize(void) const; | ||||||
|     void                    addOwnership(const unsigned int owner, |     void                    addOwnership(const unsigned int owner, | ||||||
|                                          const unsigned int property); |                                          const unsigned int property); | ||||||
| @@ -207,7 +197,6 @@ private: | |||||||
|     bool                                   dryRun_{false}; |     bool                                   dryRun_{false}; | ||||||
|     unsigned int                           traj_, locVol_; |     unsigned int                           traj_, locVol_; | ||||||
|     // grids |     // grids | ||||||
|     std::vector<int>                       dim_; |  | ||||||
|     GridPt                                 grid4d_; |     GridPt                                 grid4d_; | ||||||
|     std::map<unsigned int, GridPt>         grid5d_; |     std::map<unsigned int, GridPt>         grid5d_; | ||||||
|     GridRbPt                               gridRb4d_; |     GridRbPt                               gridRb4d_; | ||||||
| @@ -354,7 +343,7 @@ T * Environment::getObject(const unsigned int address) const | |||||||
|         else |         else | ||||||
|         { |         { | ||||||
|             HADRON_ERROR("object with address " + std::to_string(address) + |             HADRON_ERROR("object with address " + std::to_string(address) + | ||||||
|                          " does not have type '" + typeName(&typeid(T)) + |                          " does not have type '" + typeid(T).name() + | ||||||
|                          "' (has type '" + getObjectType(address) + "')"); |                          "' (has type '" + getObjectType(address) + "')"); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -391,37 +380,6 @@ T * Environment::createLattice(const std::string name) | |||||||
|     return createLattice<T>(getObjectAddress(name)); |     return createLattice<T>(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename T> |  | ||||||
| bool Environment::isObjectOfType(const unsigned int address) const |  | ||||||
| { |  | ||||||
|     if (hasRegisteredObject(address)) |  | ||||||
|     { |  | ||||||
|         if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get())) |  | ||||||
|         { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         else |  | ||||||
|         { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else if (hasObject(address)) |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("object with address " + std::to_string(address) + |  | ||||||
|                      " exists but is not registered"); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename T> |  | ||||||
| bool Environment::isObjectOfType(const std::string name) const |  | ||||||
| { |  | ||||||
|     return isObjectOfType<T>(getObjectAddress(name)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Environment_hpp_ | #endif // Hadrons_Environment_hpp_ | ||||||
|   | |||||||
| @@ -51,43 +51,23 @@ using Grid::operator<<; | |||||||
|  * error with GCC 5 (clang & GCC 6 compile fine without it). |  * error with GCC 5 (clang & GCC 6 compile fine without it). | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | // FIXME: find a way to do that in a more general fashion | ||||||
| #ifndef FIMPL | #ifndef FIMPL | ||||||
| #define FIMPL WilsonImplR | #define FIMPL WilsonImplR | ||||||
| #endif | #endif | ||||||
| #ifndef SIMPL |  | ||||||
| #define SIMPL ScalarImplCR |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| // type aliases | // type aliases | ||||||
| #define FERM_TYPE_ALIASES(FImpl, suffix)\ | #define TYPE_ALIASES(FImpl, suffix)\ | ||||||
| typedef FermionOperator<FImpl>                       FMat##suffix;             \ | typedef FermionOperator<FImpl>                       FMat##suffix;             \ | ||||||
| typedef typename FImpl::FermionField                 FermionField##suffix;     \ | typedef typename FImpl::FermionField                 FermionField##suffix;     \ | ||||||
| typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | ||||||
| typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | ||||||
| typedef std::vector<typename FImpl::SitePropagator::scalar_object>             \ | typedef typename FImpl::DoubledGaugeField            DoubledGaugeField##suffix;\ | ||||||
|                                                      SlicedPropagator##suffix; | typedef std::function<void(FermionField##suffix &,                             \ | ||||||
|  |  | ||||||
| #define GAUGE_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; |  | ||||||
|  |  | ||||||
| #define SCALAR_TYPE_ALIASES(SImpl, suffix)\ |  | ||||||
| typedef typename SImpl::Field ScalarField##suffix;\ |  | ||||||
| typedef typename SImpl::Field PropagatorField##suffix; |  | ||||||
|  |  | ||||||
| #define SOLVER_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| typedef std::function<void(FermionField##suffix &,\ |  | ||||||
|                       const FermionField##suffix &)> SolverFn##suffix; |                       const FermionField##suffix &)> SolverFn##suffix; | ||||||
|  |  | ||||||
| #define SINK_TYPE_ALIASES(suffix)\ |  | ||||||
| typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix; |  | ||||||
|  |  | ||||||
| #define FGS_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| FERM_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| GAUGE_TYPE_ALIASES(FImpl, suffix)\ |  | ||||||
| SOLVER_TYPE_ALIASES(FImpl, suffix) |  | ||||||
|  |  | ||||||
| // logger | // logger | ||||||
| class HadronsLogger: public Logger | class HadronsLogger: public Logger | ||||||
| { | { | ||||||
| @@ -165,15 +145,6 @@ std::string typeName(void) | |||||||
|     return typeName(typeIdPt<T>()); |     return typeName(typeIdPt<T>()); | ||||||
| } | } | ||||||
|  |  | ||||||
| // default writers/readers |  | ||||||
| #ifdef HAVE_HDF5 |  | ||||||
| typedef Hdf5Reader CorrReader; |  | ||||||
| typedef Hdf5Writer CorrWriter; |  | ||||||
| #else |  | ||||||
| typedef XmlReader CorrReader; |  | ||||||
| typedef XmlWriter CorrWriter; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Global_hpp_ | #endif // Hadrons_Global_hpp_ | ||||||
|   | |||||||
| @@ -1,25 +1,40 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
| #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSink/Point.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Point.hpp> | #include <Grid/Hadrons/Modules/MSource/Point.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Wall.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/Quark.hpp> | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MAction_DWF_hpp_ | #ifndef Hadrons_DWF_hpp_ | ||||||
| #define Hadrons_MAction_DWF_hpp_ | #define Hadrons_DWF_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -48,15 +48,14 @@ public: | |||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     unsigned int, Ls, |                                     unsigned int, Ls, | ||||||
|                                     double      , mass, |                                     double      , mass, | ||||||
|                                     double      , M5, |                                     double      , M5); | ||||||
|                                     std::string , boundary); |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TDWF: public Module<DWFPar> | class TDWF: public Module<DWFPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TDWF(const std::string name); |     TDWF(const std::string name); | ||||||
| @@ -117,19 +116,14 @@ void TDWF<FImpl>::execute(void) | |||||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " |                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" |                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  |  | ||||||
|                  << std::endl; |  | ||||||
|     env().createGrid(par().Ls); |     env().createGrid(par().Ls); | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &g4     = *env().getGrid(); |     auto &g4     = *env().getGrid(); | ||||||
|     auto &grb4   = *env().getRbGrid(); |     auto &grb4   = *env().getRbGrid(); | ||||||
|     auto &g5     = *env().getGrid(par().Ls); |     auto &g5     = *env().getGrid(par().Ls); | ||||||
|     auto &grb5   = *env().getRbGrid(par().Ls); |     auto &grb5   = *env().getRbGrid(par().Ls); | ||||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); |  | ||||||
|     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); |  | ||||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, |     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||||
|                                                 par().mass, par().M5, |                                                 par().mass, par().M5); | ||||||
|                                                 implParams); |  | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -137,4 +131,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MAction_DWF_hpp_ | #endif // Hadrons_DWF_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MAction_Wilson_hpp_ | #ifndef Hadrons_Wilson_hpp_ | ||||||
| #define Hadrons_MAction_Wilson_hpp_ | #define Hadrons_Wilson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -46,15 +46,14 @@ class WilsonPar: Serializable | |||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     double     , mass, |                                     double     , mass); | ||||||
|                                     std::string, boundary); |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TWilson: public Module<WilsonPar> | class TWilson: public Module<WilsonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TWilson(const std::string name); |     TWilson(const std::string name); | ||||||
| @@ -113,15 +112,10 @@ void TWilson<FImpl>::execute() | |||||||
| { | { | ||||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass |     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; |                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  |  | ||||||
|                  << std::endl; |  | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &grid   = *env().getGrid(); |     auto &grid   = *env().getGrid(); | ||||||
|     auto &gridRb = *env().getRbGrid(); |     auto &gridRb = *env().getRbGrid(); | ||||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); |     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); | ||||||
|     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); |  | ||||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, |  | ||||||
|                                             implParams); |  | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Baryon_hpp_ | #ifndef Hadrons_Baryon_hpp_ | ||||||
| #define Hadrons_MContraction_Baryon_hpp_ | #define Hadrons_Baryon_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3> | |||||||
| class TBaryon: public Module<BaryonPar> | class TBaryon: public Module<BaryonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |     TYPE_ALIASES(FImpl1, 1); | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |     TYPE_ALIASES(FImpl2, 2); | ||||||
|     FERM_TYPE_ALIASES(FImpl3, 3); |     TYPE_ALIASES(FImpl3, 3); | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
| @@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" |                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" | ||||||
|                  << par().q3 << "'" << std::endl; |                  << par().q3 << "'" << std::endl; | ||||||
|      |      | ||||||
|     CorrWriter             writer(par().output); |     XmlWriter             writer(par().output); | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); |     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); | ||||||
| @@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|      |      | ||||||
|     // FIXME: do contractions |     // FIXME: do contractions | ||||||
|      |      | ||||||
|     // write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Baryon_hpp_ | #endif // Hadrons_Baryon_hpp_ | ||||||
|   | |||||||
| @@ -1,144 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
| #define Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                                DiscLoop                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| class DiscLoopPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar, |  | ||||||
|                                     std::string,    q_loop, |  | ||||||
|                                     Gamma::Algebra, gamma, |  | ||||||
|                                     std::string,    output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TDiscLoop: public Module<DiscLoopPar> |  | ||||||
| { |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
|     class Result: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |  | ||||||
|                                         Gamma::Algebra, gamma, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TDiscLoop(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TDiscLoop(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       TDiscLoop implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TDiscLoop<FImpl>::TDiscLoop(const std::string name) |  | ||||||
| : Module<DiscLoopPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TDiscLoop<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q_loop}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TDiscLoop<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TDiscLoop<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|      |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TDiscLoop<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing disconnected loop contraction '" << getName()  |  | ||||||
|                  << "' using '" << par().q_loop << "' with " << par().gamma  |  | ||||||
|                  << " insertion." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter            writer(par().output); |  | ||||||
|     PropagatorField       &q_loop = *env().template getObject<PropagatorField>(par().q_loop); |  | ||||||
|     LatticeComplex        c(env().getGrid()); |  | ||||||
|     Gamma                 gamma(par().gamma); |  | ||||||
|     std::vector<TComplex> buf; |  | ||||||
|     Result                result; |  | ||||||
|  |  | ||||||
|     c = trace(gamma*q_loop); |  | ||||||
|     sliceSum(c, buf, Tp); |  | ||||||
|  |  | ||||||
|     result.gamma = par().gamma; |  | ||||||
|     result.corr.resize(buf.size()); |  | ||||||
|     for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|     { |  | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     write(writer, "disc", result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_DiscLoop_hpp_ |  | ||||||
| @@ -1,170 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
| #define Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * 3pt contraction with gamma matrix insertion. |  | ||||||
|  * |  | ||||||
|  * Schematic: |  | ||||||
|  * |  | ||||||
|  *             q2           q3 |  | ||||||
|  *        /----<------*------<----¬ |  | ||||||
|  *       /          gamma          \ |  | ||||||
|  *      /                           \ |  | ||||||
|  *   i *                            * f |  | ||||||
|  *      \                          / |  | ||||||
|  *       \                        / |  | ||||||
|  *        \----------->----------/ |  | ||||||
|  *                   q1 |  | ||||||
|  * |  | ||||||
|  *      trace(g5*q1*adj(q2)*g5*gamma*q3) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                               Gamma3pt                                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| class Gamma3ptPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar, |  | ||||||
|                                     std::string,    q1, |  | ||||||
|                                     std::string,    q2, |  | ||||||
|                                     std::string,    q3, |  | ||||||
|                                     Gamma::Algebra, gamma, |  | ||||||
|                                     std::string,    output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| class TGamma3pt: public Module<Gamma3ptPar> |  | ||||||
| { |  | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |  | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |  | ||||||
|     FERM_TYPE_ALIASES(FImpl3, 3); |  | ||||||
|     class Result: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |  | ||||||
|                                         Gamma::Algebra, gamma, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TGamma3pt(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TGamma3pt(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       TGamma3pt implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name) |  | ||||||
| : Module<Gamma3ptPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void) |  | ||||||
| { |  | ||||||
|      |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> |  | ||||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing 3pt contractions '" << getName() << "' using" |  | ||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "' and '" |  | ||||||
|                  << par().q3 << "', with " << par().gamma << " insertion."  |  | ||||||
|                  << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter            writer(par().output); |  | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |  | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |  | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q3); |  | ||||||
|     LatticeComplex        c(env().getGrid()); |  | ||||||
|     Gamma                 g5(Gamma::Algebra::Gamma5); |  | ||||||
|     Gamma                 gamma(par().gamma); |  | ||||||
|     std::vector<TComplex> buf; |  | ||||||
|     Result                result; |  | ||||||
|  |  | ||||||
|     c = trace(g5*q1*adj(q2)*(g5*gamma)*q3); |  | ||||||
|     sliceSum(c, buf, Tp); |  | ||||||
|  |  | ||||||
|     result.gamma = par().gamma; |  | ||||||
|     result.corr.resize(buf.size()); |  | ||||||
|     for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|     { |  | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     write(writer, "gamma3pt", result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Gamma3pt_hpp_ |  | ||||||
| @@ -6,10 +6,8 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|         Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -29,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_Meson_hpp_ | #ifndef Hadrons_Meson_hpp_ | ||||||
| #define Hadrons_MContraction_Meson_hpp_ | #define Hadrons_Meson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -38,56 +36,32 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Meson contractions |  | ||||||
|  ----------------------------- |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - q1: input propagator 1 (string) |  | ||||||
|  - q2: input propagator 2 (string) |  | ||||||
|  - gammas: gamma products to insert at sink & source, pairs of gamma matrices  |  | ||||||
|            (space-separated strings) in angled brackets (i.e. <g_sink g_src>), |  | ||||||
|            in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>"). |  | ||||||
|  |  | ||||||
|            Special values: "all" - perform all possible contractions. |  | ||||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."), |  | ||||||
|         given as multiples of (2*pi) / L. |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                                TMeson                                       * |  *                                TMeson                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
| typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair; |  | ||||||
|  |  | ||||||
| class MesonPar: Serializable | class MesonPar: Serializable | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, | ||||||
|                                     std::string, q1, |                                     std::string,    q1, | ||||||
|                                     std::string, q2, |                                     std::string,    q2, | ||||||
|                                     std::string, gammas, |                                     std::string,    output, | ||||||
|                                     std::string, sink, |                                     Gamma::Algebra, gammaSource, | ||||||
|                                     std::string, output); |                                     Gamma::Algebra, gammaSink); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| class TMeson: public Module<MesonPar> | class TMeson: public Module<MesonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl1, 1); |     TYPE_ALIASES(FImpl1, 1); | ||||||
|     FERM_TYPE_ALIASES(FImpl2, 2); |     TYPE_ALIASES(FImpl2, 2); | ||||||
|     FERM_TYPE_ALIASES(ScalarImplCR, Scalar); |  | ||||||
|     SINK_TYPE_ALIASES(Scalar); |  | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr); | ||||||
|                                         Gamma::Algebra, gamma_snk, |  | ||||||
|                                         Gamma::Algebra, gamma_src, |  | ||||||
|                                         std::vector<Complex>, corr); |  | ||||||
|     }; |     }; | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
| @@ -97,7 +71,6 @@ public: | |||||||
|     // dependencies/products |     // dependencies/products | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|     virtual void parseGammaString(std::vector<GammaPair> &gammaList); |  | ||||||
|     // execution |     // execution | ||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
| @@ -117,7 +90,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name) | |||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> input = {par().q1, par().q2, par().sink}; |     std::vector<std::string> input = {par().q1, par().q2}; | ||||||
|      |      | ||||||
|     return input; |     return input; | ||||||
| } | } | ||||||
| @@ -130,35 +103,7 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | |||||||
|     return output; |     return output; | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> |  | ||||||
| void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) |  | ||||||
| { |  | ||||||
|     gammaList.clear(); |  | ||||||
|     // Determine gamma matrices to insert at source/sink. |  | ||||||
|     if (par().gammas.compare("all") == 0) |  | ||||||
|     { |  | ||||||
|         // Do all contractions. |  | ||||||
|         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) |  | ||||||
|         { |  | ||||||
|             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) |  | ||||||
|             { |  | ||||||
|                 gammaList.push_back(std::make_pair((Gamma::Algebra)i,  |  | ||||||
|                                                    (Gamma::Algebra)j)); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         // Parse individual contractions from input string. |  | ||||||
|         gammaList = strToVec<GammaPair>(par().gammas); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| #define mesonConnected(q1, q2, gSnk, gSrc) \ |  | ||||||
| (g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) |  | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| void TMeson<FImpl1, FImpl2>::execute(void) | void TMeson<FImpl1, FImpl2>::execute(void) | ||||||
| { | { | ||||||
| @@ -166,73 +111,21 @@ void TMeson<FImpl1, FImpl2>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" |                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|      |      | ||||||
|     CorrWriter             writer(par().output); |     XmlWriter             writer(par().output); | ||||||
|     std::vector<TComplex>  buf; |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     std::vector<Result>    result; |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     Gamma                  g5(Gamma::Algebra::Gamma5); |     LatticeComplex        c(env().getGrid()); | ||||||
|     std::vector<GammaPair> gammaList; |     Gamma                 gSrc(par().gammaSource), gSnk(par().gammaSink); | ||||||
|     int                    nt = env().getDim(Tp); |     Gamma                 g5(Gamma::Algebra::Gamma5); | ||||||
|  |     std::vector<TComplex> buf; | ||||||
|  |     Result                result; | ||||||
|      |      | ||||||
|     parseGammaString(gammaList); |     c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5); | ||||||
|     result.resize(gammaList.size()); |     sliceSum(c, buf, Tp); | ||||||
|     for (unsigned int i = 0; i < result.size(); ++i) |     result.corr.resize(buf.size()); | ||||||
|  |     for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|     { |     { | ||||||
|         result[i].gamma_snk = gammaList[i].first; |         result.corr[t] = TensorRemove(buf[t]); | ||||||
|         result[i].gamma_src = gammaList[i].second; |  | ||||||
|         result[i].corr.resize(nt); |  | ||||||
|     } |  | ||||||
|     if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and |  | ||||||
|         env().template isObjectOfType<SlicedPropagator2>(par().q2)) |  | ||||||
|     { |  | ||||||
|         SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1); |  | ||||||
|         SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "(propagator already sinked)" << std::endl; |  | ||||||
|         for (unsigned int i = 0; i < result.size(); ++i) |  | ||||||
|         { |  | ||||||
|             Gamma gSnk(gammaList[i].first); |  | ||||||
|             Gamma gSrc(gammaList[i].second); |  | ||||||
|              |  | ||||||
|             for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|             { |  | ||||||
|                 result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         PropagatorField1 &q1   = *env().template getObject<PropagatorField1>(par().q1); |  | ||||||
|         PropagatorField2 &q2   = *env().template getObject<PropagatorField2>(par().q2); |  | ||||||
|         LatticeComplex   c(env().getGrid()); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; |  | ||||||
|         for (unsigned int i = 0; i < result.size(); ++i) |  | ||||||
|         { |  | ||||||
|             Gamma       gSnk(gammaList[i].first); |  | ||||||
|             Gamma       gSrc(gammaList[i].second); |  | ||||||
|             std::string ns; |  | ||||||
|                  |  | ||||||
|             ns = env().getModuleNamespace(env().getObjectModule(par().sink)); |  | ||||||
|             if (ns == "MSource") |  | ||||||
|             { |  | ||||||
|                 PropagatorField1 &sink = |  | ||||||
|                     *env().template getObject<PropagatorField1>(par().sink); |  | ||||||
|                  |  | ||||||
|                 c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); |  | ||||||
|                 sliceSum(c, buf, Tp); |  | ||||||
|             } |  | ||||||
|             else if (ns == "MSink") |  | ||||||
|             { |  | ||||||
|                 SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink); |  | ||||||
|                  |  | ||||||
|                 c   = trace(mesonConnected(q1, q2, gSnk, gSrc)); |  | ||||||
|                 buf = sink(c); |  | ||||||
|             } |  | ||||||
|             for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|             { |  | ||||||
|                 result[i].corr[t] = TensorRemove(buf[t]); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
|     write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
| @@ -241,4 +134,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_Meson_hpp_ | #endif // Hadrons_Meson_hpp_ | ||||||
|   | |||||||
| @@ -1,114 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonian                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| /******************************************************************************* |  | ||||||
|  * Utilities for contractions involving the Weak Hamiltonian. |  | ||||||
|  ******************************************************************************/ |  | ||||||
| //// Sum and store correlator. |  | ||||||
| #define MAKE_DIAG(exp, buf, res, n)\ |  | ||||||
| sliceSum(exp, buf, Tp);\ |  | ||||||
| res.name = (n);\ |  | ||||||
| res.corr.resize(buf.size());\ |  | ||||||
| for (unsigned int t = 0; t < buf.size(); ++t)\ |  | ||||||
| {\ |  | ||||||
|     res.corr[t] = TensorRemove(buf[t]);\ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //// Contraction of mu index: use 'mu' variable in exp. |  | ||||||
| #define SUM_MU(buf,exp)\ |  | ||||||
| buf = zero;\ |  | ||||||
| for (unsigned int mu = 0; mu < ndim; ++mu)\ |  | ||||||
| {\ |  | ||||||
|     buf += exp;\ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| enum  |  | ||||||
| { |  | ||||||
|   i_V = 0, |  | ||||||
|   i_A = 1, |  | ||||||
|   n_i = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class WeakHamiltonianPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar, |  | ||||||
|                                     std::string, q1, |  | ||||||
|                                     std::string, q2, |  | ||||||
|                                     std::string, q3, |  | ||||||
|                                     std::string, q4, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #define MAKE_WEAK_MODULE(modname)\ |  | ||||||
| class T##modname: public Module<WeakHamiltonianPar>\ |  | ||||||
| {\ |  | ||||||
| public:\ |  | ||||||
|     FERM_TYPE_ALIASES(FIMPL,)\ |  | ||||||
|     class Result: Serializable\ |  | ||||||
|     {\ |  | ||||||
|     public:\ |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\ |  | ||||||
|                                         std::string, name,\ |  | ||||||
|                                         std::vector<Complex>, corr);\ |  | ||||||
|     };\ |  | ||||||
| public:\ |  | ||||||
|     /* constructor */ \ |  | ||||||
|     T##modname(const std::string name);\ |  | ||||||
|     /* destructor */ \ |  | ||||||
|     virtual ~T##modname(void) = default;\ |  | ||||||
|     /* dependency relation */ \ |  | ||||||
|     virtual std::vector<std::string> getInput(void);\ |  | ||||||
|     virtual std::vector<std::string> getOutput(void);\ |  | ||||||
|     /* setup */ \ |  | ||||||
|     virtual void setup(void);\ |  | ||||||
|     /* execution */ \ |  | ||||||
|     virtual void execute(void);\ |  | ||||||
|     std::vector<std::string> VA_label = {"V", "A"};\ |  | ||||||
| };\ |  | ||||||
| MODULE_REGISTER_NS(modname, T##modname, MContraction); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonian_hpp_ |  | ||||||
| @@ -1,137 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian current-current contractions, Eye-type. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical |  | ||||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematics:        q4                 |                   |  | ||||||
|  *                  /-<-¬                |                              |  | ||||||
|  *                 /     \               |             q2           q3 |  | ||||||
|  *                 \     /               |        /----<------*------<----¬                         |  | ||||||
|  *            q2    \   /    q3          |       /          /-*-¬          \ |  | ||||||
|  *       /-----<-----* *-----<----¬      |      /          /     \          \ |  | ||||||
|  *    i *            H_W           * f   |   i *           \     /  q4      * f |  | ||||||
|  *       \                        /      |      \           \->-/          /    |  | ||||||
|  *        \                      /       |       \                        /        |  | ||||||
|  *         \---------->---------/        |        \----------->----------/         |  | ||||||
|  *                   q1                  |                   q1                   |  | ||||||
|  *                                       | |  | ||||||
|  *                Saucer (S)             |                  Eye (E) |  | ||||||
|  *  |  | ||||||
|  * S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2]) |  | ||||||
|  * E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2]) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                  TWeakHamiltonianEye implementation                        * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakHamiltonianEye::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakHamiltonianEye::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianEye::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianEye::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_eye_diag); |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp1(env().getGrid()); |  | ||||||
|     LatticeComplex               tmp2(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> S_body(ndim, tmp1); |  | ||||||
|     std::vector<PropagatorField> S_loop(ndim, tmp1); |  | ||||||
|     std::vector<LatticeComplex>  E_body(ndim, tmp2); |  | ||||||
|     std::vector<LatticeComplex>  E_loop(ndim, tmp2); |  | ||||||
|  |  | ||||||
|     // Setup for S-type contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu])); |  | ||||||
|         S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform S-type contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu])) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S") |  | ||||||
|  |  | ||||||
|     // Recycle sub-expressions for E-type contractions. |  | ||||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         E_body[mu] = trace(S_body[mu]); |  | ||||||
|         E_loop[mu] = trace(S_loop[mu]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform E-type contractions. |  | ||||||
|     SUM_MU(expbuf, E_body[mu]*E_loop[mu]) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_Eye", result); |  | ||||||
| } |  | ||||||
| @@ -1,58 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonianEye                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     S_diag = 0, |  | ||||||
|     E_diag = 1, |  | ||||||
|     n_eye_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Saucer and Eye subdiagram contractions. |  | ||||||
| #define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma) |  | ||||||
| #define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakHamiltonianEye) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ |  | ||||||
| @@ -1,139 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian current-current contractions, Non-Eye-type. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical |  | ||||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematic:      |  | ||||||
|  *            q2             q3          |           q2              q3 |  | ||||||
|  *          /--<--¬       /--<--¬        |        /--<--¬         /--<--¬        |  | ||||||
|  *         /       \     /       \       |       /       \       /       \       |  | ||||||
|  *        /         \   /         \      |      /         \     /         \      |  | ||||||
|  *       /           \ /           \     |     /           \   /           \     |  | ||||||
|  *    i *             * H_W         *  f |  i *             * * H_W         * f  |  | ||||||
|  *      \             *             |    |     \           /   \           / |  | ||||||
|  *       \           / \           /     |      \         /     \         /     |  | ||||||
|  *        \         /   \         /      |       \       /       \       /   |  | ||||||
|  *         \       /     \       /       |        \-->--/         \-->--/       |  | ||||||
|  *          \-->--/       \-->--/        |          q1               q4  |  | ||||||
|  *            q1             q4          | |  | ||||||
|  *                Connected (C)          |                 Wing (W) |  | ||||||
|  * |  | ||||||
|  * C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu]) |  | ||||||
|  * W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu]) |  | ||||||
|  *  |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                  TWeakHamiltonianNonEye implementation                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakHamiltonianNonEye::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianNonEye::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakHamiltonianNonEye::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|      |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_noneye_diag);  |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp1(env().getGrid()); |  | ||||||
|     LatticeComplex               tmp2(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> C_i_side_loop(ndim, tmp1); |  | ||||||
|     std::vector<PropagatorField> C_f_side_loop(ndim, tmp1); |  | ||||||
|     std::vector<LatticeComplex>  W_i_side_loop(ndim, tmp2); |  | ||||||
|     std::vector<LatticeComplex>  W_f_side_loop(ndim, tmp2); |  | ||||||
|  |  | ||||||
|     // Setup for C-type contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu])); |  | ||||||
|         C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform C-type contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu])) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C") |  | ||||||
|  |  | ||||||
|     // Recycle sub-expressions for W-type contractions. |  | ||||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         W_i_side_loop[mu] = trace(C_i_side_loop[mu]); |  | ||||||
|         W_f_side_loop[mu] = trace(C_f_side_loop[mu]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Perform W-type contractions. |  | ||||||
|     SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu]) |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_NonEye", result); |  | ||||||
| } |  | ||||||
| @@ -1,57 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakHamiltonianNonEye                              * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     W_diag = 0, |  | ||||||
|     C_diag = 1, |  | ||||||
|     n_noneye_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Wing and Connected subdiagram contractions |  | ||||||
| #define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakHamiltonianNonEye) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ |  | ||||||
| @@ -1,135 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MContraction; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * Weak Hamiltonian + current contractions, disconnected topology for neutral  |  | ||||||
|  * mesons. |  | ||||||
|  *  |  | ||||||
|  * These contractions are generated by operators Q_1,...,10 of the dS=1 Weak |  | ||||||
|  * Hamiltonian in the physical basis and an additional current J (see e.g.  |  | ||||||
|  * Fig 11 of arXiv:1507.03094). |  | ||||||
|  *  |  | ||||||
|  * Schematic: |  | ||||||
|  *                         |  | ||||||
|  *           q2          q4             q3 |  | ||||||
|  *       /--<--¬     /---<--¬       /---<--¬ |  | ||||||
|  *     /         \ /         \     /        \ |  | ||||||
|  *  i *           * H_W      |  J *          * f |  | ||||||
|  *     \         / \         /     \        / |  | ||||||
|  *      \--->---/   \-------/       \------/ |  | ||||||
|  *          q1  |  | ||||||
|  *  |  | ||||||
|  * options |  | ||||||
|  * - q1: input propagator 1 (string) |  | ||||||
|  * - q2: input propagator 2 (string) |  | ||||||
|  * - q3: input propagator 3 (string), assumed to be sequential propagator  |  | ||||||
|  * - q4: input propagator 4 (string), assumed to be a loop |  | ||||||
|  *  |  | ||||||
|  * type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5) |  | ||||||
|  * type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5) |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /******************************************************************************* |  | ||||||
|  *                  TWeakNeutral4ptDisc implementation                         * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name) |  | ||||||
| : Module<WeakHamiltonianPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TWeakNeutral4ptDisc::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakNeutral4ptDisc::setup(void) |  | ||||||
| { |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TWeakNeutral4ptDisc::execute(void) |  | ||||||
| { |  | ||||||
|     LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"  |  | ||||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  |  | ||||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  |  | ||||||
|                  << "'." << std::endl; |  | ||||||
|  |  | ||||||
|     CorrWriter             writer(par().output); |  | ||||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); |  | ||||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); |  | ||||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); |  | ||||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); |  | ||||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); |  | ||||||
|     LatticeComplex        expbuf(env().getGrid()); |  | ||||||
|     std::vector<TComplex> corrbuf; |  | ||||||
|     std::vector<Result>   result(n_neut_disc_diag); |  | ||||||
|     unsigned int ndim   = env().getNd(); |  | ||||||
|  |  | ||||||
|     PropagatorField              tmp(env().getGrid()); |  | ||||||
|     std::vector<PropagatorField> meson(ndim, tmp); |  | ||||||
|     std::vector<PropagatorField> loop(ndim, tmp); |  | ||||||
|     LatticeComplex               curr(env().getGrid()); |  | ||||||
|  |  | ||||||
|     // Setup for type 1 contractions. |  | ||||||
|     for (int mu = 0; mu < ndim; ++mu) |  | ||||||
|     { |  | ||||||
|         meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu])); |  | ||||||
|         loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu])); |  | ||||||
|     } |  | ||||||
|     curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5)); |  | ||||||
|  |  | ||||||
|     // Perform type 1 contractions.     |  | ||||||
|     SUM_MU(expbuf, trace(meson[mu]*loop[mu])) |  | ||||||
|     expbuf *= curr; |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1") |  | ||||||
|  |  | ||||||
|     // Perform type 2 contractions. |  | ||||||
|     SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu])) |  | ||||||
|     expbuf *= curr; |  | ||||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2") |  | ||||||
|  |  | ||||||
|     write(writer, "HW_disc0", result); |  | ||||||
| } |  | ||||||
| @@ -1,59 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
| #define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         WeakNeutral4ptDisc                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) |  | ||||||
|  |  | ||||||
| enum |  | ||||||
| { |  | ||||||
|     neut_disc_1_diag = 0, |  | ||||||
|     neut_disc_2_diag = 1, |  | ||||||
|     n_neut_disc_diag = 2 |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Neutral 4pt disconnected subdiagram contractions. |  | ||||||
| #define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) |  | ||||||
| #define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma) |  | ||||||
| #define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma)) |  | ||||||
|  |  | ||||||
| MAKE_WEAK_MODULE(WeakNeutral4ptDisc) |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ |  | ||||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | |||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| void TLoad::execute(void) | void TLoad::execute(void) | ||||||
| { | { | ||||||
|     FieldMetaData  header; |     NerscField  header; | ||||||
|     std::string fileName = par().file + "." |     std::string fileName = par().file + "." | ||||||
|                            + std::to_string(env().getTrajectory()); |                            + std::to_string(env().getTrajectory()); | ||||||
|      |      | ||||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | |||||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); |     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||||
|     NerscIO::readConfiguration(U, header, fileName); |     NerscIO::readConfiguration(U, header, fileName); | ||||||
|     LOG(Message) << "NERSC header:" << std::endl; |     LOG(Message) << "NERSC header:" << std::endl; | ||||||
|     dump_meta_data(header, LOG(Message)); |     dump_nersc_header(header, LOG(Message)); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Load_hpp_ | #ifndef Hadrons_Load_hpp_ | ||||||
| #define Hadrons_MGauge_Load_hpp_ | #define Hadrons_Load_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Load_hpp_ | #endif // Hadrons_Load_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Random_hpp_ | #ifndef Hadrons_Random_hpp_ | ||||||
| #define Hadrons_MGauge_Random_hpp_ | #define Hadrons_Random_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Random_hpp_ | #endif // Hadrons_Random_hpp_ | ||||||
|   | |||||||
| @@ -1,88 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MGauge; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                  TStochEm implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TStochEm::TStochEm(const std::string name) |  | ||||||
| : Module<StochEmPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TStochEm::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TStochEm::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TStochEm::setup(void) |  | ||||||
| { |  | ||||||
|     if (!env().hasRegisteredObject("_" + getName() + "_weight")) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<EmComp>("_" + getName() + "_weight"); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<EmField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TStochEm::execute(void) |  | ||||||
| { |  | ||||||
|     PhotonR photon(par().gauge, par().zmScheme); |  | ||||||
|     EmField &a = *env().createLattice<EmField>(getName()); |  | ||||||
|     EmComp  *w; |  | ||||||
|      |  | ||||||
|     if (!env().hasCreatedObject("_" + getName() + "_weight")) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching stochatic EM potential weight (gauge: " |  | ||||||
|                      << par().gauge << ", zero-mode scheme: " |  | ||||||
|                      << par().zmScheme << ")..." << std::endl; |  | ||||||
|         w = env().createLattice<EmComp>("_" + getName() + "_weight"); |  | ||||||
|         photon.StochasticWeight(*w); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         w = env().getObject<EmComp>("_" + getName() + "_weight"); |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Generating stochatic EM potential..." << std::endl; |  | ||||||
|     photon.StochasticField(a, *env().get4dRng(), *w); |  | ||||||
| } |  | ||||||
| @@ -1,75 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef Hadrons_MGauge_StochEm_hpp_ |  | ||||||
| #define Hadrons_MGauge_StochEm_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         StochEm                                 * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MGauge) |  | ||||||
|  |  | ||||||
| class StochEmPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, |  | ||||||
|                                     PhotonR::Gauge,    gauge, |  | ||||||
|                                     PhotonR::ZmScheme, zmScheme); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TStochEm: public Module<StochEmPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     typedef PhotonR::GaugeField     EmField; |  | ||||||
|     typedef PhotonR::GaugeLinkField EmComp; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TStochEm(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TStochEm(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_StochEm_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MGauge_Unit_hpp_ | #ifndef Hadrons_Unit_hpp_ | ||||||
| #define Hadrons_MGauge_Unit_hpp_ | #define Hadrons_Unit_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MGauge_Unit_hpp_ | #endif // Hadrons_Unit_hpp_ | ||||||
|   | |||||||
| @@ -1,132 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
| #define Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Noise loop propagator |  | ||||||
|  ----------------------------- |  | ||||||
|  * loop_x = q_x * adj(eta_x) |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - q = Result of inversion on noise source. |  | ||||||
|  - eta = noise source. |  | ||||||
|  |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         NoiseLoop                                          * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MLoop) |  | ||||||
|  |  | ||||||
| class NoiseLoopPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar, |  | ||||||
|                                     std::string, q, |  | ||||||
|                                     std::string, eta); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TNoiseLoop: public Module<NoiseLoopPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TNoiseLoop(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TNoiseLoop(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                 TNoiseLoop implementation                                  * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TNoiseLoop<FImpl>::TNoiseLoop(const std::string name) |  | ||||||
| : Module<NoiseLoopPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TNoiseLoop<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().q, par().eta}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TNoiseLoop<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TNoiseLoop<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     PropagatorField &loop = *env().template createLattice<PropagatorField>(getName()); |  | ||||||
|     PropagatorField &q    = *env().template getObject<PropagatorField>(par().q); |  | ||||||
|     PropagatorField &eta  = *env().template getObject<PropagatorField>(par().eta); |  | ||||||
|     loop = q*adj(eta); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MLoop_NoiseLoop_hpp_ |  | ||||||
| @@ -1,226 +0,0 @@ | |||||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MScalar; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                     TChargedProp implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TChargedProp::TChargedProp(const std::string name) |  | ||||||
| : Module<ChargedPropPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TChargedProp::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().source, par().emField}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TChargedProp::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TChargedProp::setup(void) |  | ||||||
| { |  | ||||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); |  | ||||||
|     phaseName_.clear(); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         phaseName_.push_back("_shiftphase_" + std::to_string(mu)); |  | ||||||
|     } |  | ||||||
|     GFSrcName_ = "_" + getName() + "_DinvSrc"; |  | ||||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     if (!env().hasRegisteredObject(phaseName_[0])) |  | ||||||
|     { |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             env().registerLattice<ScalarField>(phaseName_[mu]); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (!env().hasRegisteredObject(GFSrcName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(GFSrcName_); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<ScalarField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TChargedProp::execute(void) |  | ||||||
| { |  | ||||||
|     // CACHING ANALYTIC EXPRESSIONS |  | ||||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); |  | ||||||
|     Complex     ci(0.0,1.0); |  | ||||||
|     FFT         fft(env().getGrid()); |  | ||||||
|      |  | ||||||
|     // cache free scalar propagator |  | ||||||
|     if (!env().hasCreatedObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching momentum space free scalar propagator" |  | ||||||
|                      << " (mass= " << par().mass << ")..." << std::endl; |  | ||||||
|         freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_); |  | ||||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     // cache G*F*src |  | ||||||
|     if (!env().hasCreatedObject(GFSrcName_)) |  | ||||||
|          |  | ||||||
|     { |  | ||||||
|         GFSrc_ = env().createLattice<ScalarField>(GFSrcName_); |  | ||||||
|         fft.FFT_all_dim(*GFSrc_, source, FFT::forward); |  | ||||||
|         *GFSrc_ = (*freeMomProp_)*(*GFSrc_); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         GFSrc_ = env().getObject<ScalarField>(GFSrcName_); |  | ||||||
|     } |  | ||||||
|     // cache phases |  | ||||||
|     if (!env().hasCreatedObject(phaseName_[0])) |  | ||||||
|     { |  | ||||||
|         std::vector<int> &l = env().getGrid()->_fdimensions; |  | ||||||
|          |  | ||||||
|         LOG(Message) << "Caching shift phases..." << std::endl; |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             Real    twoPiL = M_PI*2./l[mu]; |  | ||||||
|              |  | ||||||
|             phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu])); |  | ||||||
|             LatticeCoordinate(*(phase_[mu]), mu); |  | ||||||
|             *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|         { |  | ||||||
|             phase_.push_back(env().getObject<ScalarField>(phaseName_[mu])); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // PROPAGATOR CALCULATION |  | ||||||
|     LOG(Message) << "Computing charged scalar propagator" |  | ||||||
|                  << " (mass= " << par().mass |  | ||||||
|                  << ", charge= " << par().charge << ")..." << std::endl; |  | ||||||
|      |  | ||||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); |  | ||||||
|     ScalarField buf(env().getGrid()); |  | ||||||
|     ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; |  | ||||||
|     double      q = par().charge; |  | ||||||
|      |  | ||||||
|     // G*F*Src |  | ||||||
|     prop = GFSrc; |  | ||||||
|  |  | ||||||
|     // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) |  | ||||||
|     buf = GFSrc; |  | ||||||
|     momD1(buf, fft); |  | ||||||
|     buf = G*buf; |  | ||||||
|     prop = prop - q*buf; |  | ||||||
|  |  | ||||||
|     // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) |  | ||||||
|     momD1(buf, fft); |  | ||||||
|     prop = prop + q*q*G*buf; |  | ||||||
|  |  | ||||||
|     // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) |  | ||||||
|     buf = GFSrc; |  | ||||||
|     momD2(buf, fft); |  | ||||||
|     prop = prop - q*q*G*buf; |  | ||||||
|  |  | ||||||
|     // final FT |  | ||||||
|     fft.FFT_all_dim(prop, prop, FFT::backward); |  | ||||||
|      |  | ||||||
|     // OUTPUT IF NECESSARY |  | ||||||
|     if (!par().output.empty()) |  | ||||||
|     { |  | ||||||
|         std::string           filename = par().output + "." + |  | ||||||
|                                          std::to_string(env().getTrajectory()); |  | ||||||
|          |  | ||||||
|         LOG(Message) << "Saving zero-momentum projection to '" |  | ||||||
|                      << filename << "'..." << std::endl; |  | ||||||
|          |  | ||||||
|         CorrWriter            writer(filename); |  | ||||||
|         std::vector<TComplex> vecBuf; |  | ||||||
|         std::vector<Complex>  result; |  | ||||||
|          |  | ||||||
|         sliceSum(prop, vecBuf, Tp); |  | ||||||
|         result.resize(vecBuf.size()); |  | ||||||
|         for (unsigned int t = 0; t < vecBuf.size(); ++t) |  | ||||||
|         { |  | ||||||
|             result[t] = TensorRemove(vecBuf[t]); |  | ||||||
|         } |  | ||||||
|         write(writer, "charge", q); |  | ||||||
|         write(writer, "prop", result); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void TChargedProp::momD1(ScalarField &s, FFT &fft) |  | ||||||
| { |  | ||||||
|     EmField     &A = *env().getObject<EmField>(par().emField); |  | ||||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), |  | ||||||
|                 Amu(env().getGrid()); |  | ||||||
|     Complex     ci(0.0,1.0); |  | ||||||
|  |  | ||||||
|     result = zero; |  | ||||||
|  |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = (*phase_[mu])*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); |  | ||||||
|         buf = Amu*buf; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result - ci*buf; |  | ||||||
|     } |  | ||||||
|     fft.FFT_all_dim(s, s, FFT::backward); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = Amu*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + ci*adj(*phase_[mu])*buf; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     s = result; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void TChargedProp::momD2(ScalarField &s, FFT &fft) |  | ||||||
| { |  | ||||||
|     EmField     &A = *env().getObject<EmField>(par().emField); |  | ||||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), |  | ||||||
|                 Amu(env().getGrid()); |  | ||||||
|  |  | ||||||
|     result = zero; |  | ||||||
|      |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu); |  | ||||||
|         buf = (*phase_[mu])*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); |  | ||||||
|         buf = Amu*Amu*buf; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + .5*buf; |  | ||||||
|     } |  | ||||||
|     fft.FFT_all_dim(s, s, FFT::backward); |  | ||||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) |  | ||||||
|     { |  | ||||||
|         Amu = peekLorentz(A, mu);         |  | ||||||
|         buf = Amu*Amu*s; |  | ||||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); |  | ||||||
|         result = result + .5*adj(*phase_[mu])*buf; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     s = result; |  | ||||||
| } |  | ||||||
| @@ -1,61 +0,0 @@ | |||||||
| #ifndef Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
| #define Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                       Charged scalar propagator                            * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MScalar) |  | ||||||
|  |  | ||||||
| class ChargedPropPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, |  | ||||||
|                                     std::string, emField, |  | ||||||
|                                     std::string, source, |  | ||||||
|                                     double,      mass, |  | ||||||
|                                     double,      charge, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TChargedProp: public Module<ChargedPropPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     SCALAR_TYPE_ALIASES(SIMPL,); |  | ||||||
|     typedef PhotonR::GaugeField     EmField; |  | ||||||
|     typedef PhotonR::GaugeLinkField EmComp; |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TChargedProp(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TChargedProp(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| private: |  | ||||||
|     void momD1(ScalarField &s, FFT &fft); |  | ||||||
|     void momD2(ScalarField &s, FFT &fft); |  | ||||||
| private: |  | ||||||
|     std::string                freeMomPropName_, GFSrcName_; |  | ||||||
|     std::vector<std::string>   phaseName_; |  | ||||||
|     ScalarField                *freeMomProp_, *GFSrc_; |  | ||||||
|     std::vector<ScalarField *> phase_; |  | ||||||
|     EmField                    *A; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MScalar_ChargedProp_hpp_ |  | ||||||
| @@ -1,79 +0,0 @@ | |||||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> |  | ||||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Hadrons; |  | ||||||
| using namespace MScalar; |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
| *                        TFreeProp implementation                             * |  | ||||||
| ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| TFreeProp::TFreeProp(const std::string name) |  | ||||||
| : Module<FreePropPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| std::vector<std::string> TFreeProp::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in = {par().source}; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::vector<std::string> TFreeProp::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| void TFreeProp::setup(void) |  | ||||||
| { |  | ||||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); |  | ||||||
|      |  | ||||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         env().registerLattice<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     env().registerLattice<ScalarField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| void TFreeProp::execute(void) |  | ||||||
| { |  | ||||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); |  | ||||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); |  | ||||||
|     ScalarField *freeMomProp; |  | ||||||
|  |  | ||||||
|     if (!env().hasCreatedObject(freeMomPropName_)) |  | ||||||
|     { |  | ||||||
|         LOG(Message) << "Caching momentum space free scalar propagator" |  | ||||||
|                      << " (mass= " << par().mass << ")..." << std::endl; |  | ||||||
|         freeMomProp = env().createLattice<ScalarField>(freeMomPropName_); |  | ||||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         freeMomProp = env().getObject<ScalarField>(freeMomPropName_); |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Computing free scalar propagator..." << std::endl; |  | ||||||
|     SIMPL::FreePropagator(source, prop, *freeMomProp); |  | ||||||
|      |  | ||||||
|     if (!par().output.empty()) |  | ||||||
|     { |  | ||||||
|         TextWriter            writer(par().output + "." + |  | ||||||
|                                      std::to_string(env().getTrajectory())); |  | ||||||
|         std::vector<TComplex> buf; |  | ||||||
|         std::vector<Complex>  result; |  | ||||||
|          |  | ||||||
|         sliceSum(prop, buf, Tp); |  | ||||||
|         result.resize(buf.size()); |  | ||||||
|         for (unsigned int t = 0; t < buf.size(); ++t) |  | ||||||
|         { |  | ||||||
|             result[t] = TensorRemove(buf[t]); |  | ||||||
|         } |  | ||||||
|         write(writer, "prop", result); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| @@ -1,50 +0,0 @@ | |||||||
| #ifndef Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
| #define Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                               FreeProp                                     * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MScalar) |  | ||||||
|  |  | ||||||
| class FreePropPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, |  | ||||||
|                                     std::string, source, |  | ||||||
|                                     double,      mass, |  | ||||||
|                                     std::string, output); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class TFreeProp: public Module<FreePropPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     SCALAR_TYPE_ALIASES(SIMPL,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TFreeProp(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TFreeProp(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| private: |  | ||||||
|     std::string freeMomPropName_; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MScalar_FreeProp_hpp_ |  | ||||||
| @@ -1,6 +0,0 @@ | |||||||
| #ifndef Hadrons_Scalar_hpp_ |  | ||||||
| #define Hadrons_Scalar_hpp_ |  | ||||||
|  |  | ||||||
| #define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) |  | ||||||
|  |  | ||||||
| #endif // Hadrons_Scalar_hpp_ |  | ||||||
| @@ -1,114 +0,0 @@ | |||||||
| #ifndef Hadrons_MSink_Point_hpp_ |  | ||||||
| #define Hadrons_MSink_Point_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                                   Point                                    * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MSink) |  | ||||||
|  |  | ||||||
| class PointPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, |  | ||||||
|                                     std::string, mom); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TPoint: public Module<PointPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
|     SINK_TYPE_ALIASES(); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TPoint(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TPoint(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSink); |  | ||||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                          TPoint implementation                             * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TPoint<FImpl>::TPoint(const std::string name) |  | ||||||
| : Module<PointPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TPoint<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TPoint<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TPoint<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     unsigned int size; |  | ||||||
|      |  | ||||||
|     size = env().template lattice4dSize<LatticeComplex>(); |  | ||||||
|     env().registerObject(getName(), size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TPoint<FImpl>::execute(void) |  | ||||||
| { |  | ||||||
|     std::vector<Real> p = strToVec<Real>(par().mom); |  | ||||||
|     LatticeComplex    ph(env().getGrid()), coor(env().getGrid()); |  | ||||||
|     Complex           i(0.0,1.0); |  | ||||||
|      |  | ||||||
|     LOG(Message) << "Setting up point sink function for momentum [" |  | ||||||
|                  << par().mom << "]" << std::endl; |  | ||||||
|     ph = zero; |  | ||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |  | ||||||
|     { |  | ||||||
|         LatticeCoordinate(coor, mu); |  | ||||||
|         ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; |  | ||||||
|     } |  | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |  | ||||||
|     auto sink = [ph](const PropagatorField &field) |  | ||||||
|     { |  | ||||||
|         SlicedPropagator res; |  | ||||||
|         PropagatorField  tmp = ph*field; |  | ||||||
|          |  | ||||||
|         sliceSum(tmp, res, Tp); |  | ||||||
|          |  | ||||||
|         return res; |  | ||||||
|     }; |  | ||||||
|     env().setObject(getName(), new SinkFn(sink)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MSink_Point_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSolver_RBPrecCG_hpp_ | #ifndef Hadrons_RBPrecCG_hpp_ | ||||||
| #define Hadrons_MSolver_RBPrecCG_hpp_ | #define Hadrons_RBPrecCG_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -53,7 +53,7 @@ template <typename FImpl> | |||||||
| class TRBPrecCG: public Module<RBPrecCGPar> | class TRBPrecCG: public Module<RBPrecCGPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TRBPrecCG(const std::string name); |     TRBPrecCG(const std::string name); | ||||||
| @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSolver_RBPrecCG_hpp_ | #endif // Hadrons_RBPrecCG_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_Point_hpp_ | #ifndef Hadrons_Point_hpp_ | ||||||
| #define Hadrons_MSource_Point_hpp_ | #define Hadrons_Point_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,7 +63,7 @@ template <typename FImpl> | |||||||
| class TPoint: public Module<PointPar> | class TPoint: public Module<PointPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TPoint(const std::string name); |     TPoint(const std::string name); | ||||||
| @@ -78,8 +78,7 @@ public: | |||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSource); | MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource); | ||||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TPoint template implementation                       * |  *                       TPoint template implementation                       * | ||||||
| @@ -133,4 +132,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_Point_hpp_ | #endif // Hadrons_Point_hpp_ | ||||||
|   | |||||||
| @@ -6,7 +6,6 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
| @@ -28,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_SeqGamma_hpp_ | #ifndef Hadrons_SeqGamma_hpp_ | ||||||
| #define Hadrons_MSource_SeqGamma_hpp_ | #define Hadrons_SeqGamma_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -72,7 +71,7 @@ template <typename FImpl> | |||||||
| class TSeqGamma: public Module<SeqGammaPar> | class TSeqGamma: public Module<SeqGammaPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TSeqGamma(const std::string name); |     TSeqGamma(const std::string name); | ||||||
| @@ -150,9 +149,9 @@ void TSeqGamma<FImpl>::execute(void) | |||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||||
|     { |     { | ||||||
|         LatticeCoordinate(coor, mu); |         LatticeCoordinate(coor, mu); | ||||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); |         ph = ph + p[mu]*coor; | ||||||
|     } |     } | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |     ph = exp(i*ph); | ||||||
|     LatticeCoordinate(t, Tp); |     LatticeCoordinate(t, Tp); | ||||||
|     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); |     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); | ||||||
| } | } | ||||||
| @@ -161,4 +160,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_SeqGamma_hpp_ | #endif // Hadrons_SeqGamma_hpp_ | ||||||
|   | |||||||
| @@ -1,147 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules/MSource/Wall.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_WallSource_hpp_ |  | ||||||
| #define Hadrons_MSource_WallSource_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> |  | ||||||
| #include <Grid/Hadrons/Module.hpp> |  | ||||||
| #include <Grid/Hadrons/ModuleFactory.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|   |  | ||||||
|  Wall source |  | ||||||
|  ----------------------------- |  | ||||||
|  * src_x = delta(x_3 - tW) * exp(i x.mom) |  | ||||||
|   |  | ||||||
|  * options: |  | ||||||
|  - tW: source timeslice (integer) |  | ||||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.") |  | ||||||
|   |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                         Wall                                               * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| BEGIN_MODULE_NAMESPACE(MSource) |  | ||||||
|  |  | ||||||
| class WallPar: Serializable |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar, |  | ||||||
|                                     unsigned int, tW, |  | ||||||
|                                     std::string, mom); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| class TWall: public Module<WallPar> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |  | ||||||
| public: |  | ||||||
|     // constructor |  | ||||||
|     TWall(const std::string name); |  | ||||||
|     // destructor |  | ||||||
|     virtual ~TWall(void) = default; |  | ||||||
|     // dependency relation |  | ||||||
|     virtual std::vector<std::string> getInput(void); |  | ||||||
|     virtual std::vector<std::string> getOutput(void); |  | ||||||
|     // setup |  | ||||||
|     virtual void setup(void); |  | ||||||
|     // execution |  | ||||||
|     virtual void execute(void); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** |  | ||||||
|  *                 TWall implementation                                       * |  | ||||||
|  ******************************************************************************/ |  | ||||||
| // constructor ///////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| TWall<FImpl>::TWall(const std::string name) |  | ||||||
| : Module<WallPar>(name) |  | ||||||
| {} |  | ||||||
|  |  | ||||||
| // dependencies/products /////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TWall<FImpl>::getInput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> in; |  | ||||||
|      |  | ||||||
|     return in; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <typename FImpl> |  | ||||||
| std::vector<std::string> TWall<FImpl>::getOutput(void) |  | ||||||
| { |  | ||||||
|     std::vector<std::string> out = {getName()}; |  | ||||||
|      |  | ||||||
|     return out; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // setup /////////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TWall<FImpl>::setup(void) |  | ||||||
| { |  | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// |  | ||||||
| template <typename FImpl> |  | ||||||
| void TWall<FImpl>::execute(void) |  | ||||||
| {     |  | ||||||
|     LOG(Message) << "Generating wall source at t = " << par().tW  |  | ||||||
|                  << " with momentum " << par().mom << std::endl; |  | ||||||
|      |  | ||||||
|     PropagatorField &src = *env().template createLattice<PropagatorField>(getName()); |  | ||||||
|     Lattice<iScalar<vInteger>> t(env().getGrid()); |  | ||||||
|     LatticeComplex             ph(env().getGrid()), coor(env().getGrid()); |  | ||||||
|     std::vector<Real>          p; |  | ||||||
|     Complex                    i(0.0,1.0); |  | ||||||
|      |  | ||||||
|     p  = strToVec<Real>(par().mom); |  | ||||||
|     ph = zero; |  | ||||||
|     for(unsigned int mu = 0; mu < Nd; mu++) |  | ||||||
|     { |  | ||||||
|         LatticeCoordinate(coor, mu); |  | ||||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); |  | ||||||
|     } |  | ||||||
|     ph = exp((Real)(2*M_PI)*i*ph); |  | ||||||
|     LatticeCoordinate(t, Tp); |  | ||||||
|     src = 1.; |  | ||||||
|     src = where((t == par().tW), src*ph, 0.*src); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE |  | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_WallSource_hpp_ |  | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_MSource_Z2_hpp_ | #ifndef Hadrons_Z2_hpp_ | ||||||
| #define Hadrons_MSource_Z2_hpp_ | #define Hadrons_Z2_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -67,7 +67,7 @@ template <typename FImpl> | |||||||
| class TZ2: public Module<Z2Par> | class TZ2: public Module<Z2Par> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FERM_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TZ2(const std::string name); |     TZ2(const std::string name); | ||||||
| @@ -82,8 +82,7 @@ public: | |||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Z2,       TZ2<FIMPL>,        MSource); | MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource); | ||||||
| MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource); |  | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TZ2 template implementation                          * |  *                       TZ2 template implementation                          * | ||||||
| @@ -149,4 +148,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_MSource_Z2_hpp_ | #endif // Hadrons_Z2_hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,34 @@ | |||||||
| #ifndef Hadrons_MFermion_GaugeProp_hpp_ | /*************************************************************************************
 | ||||||
| #define Hadrons_MFermion_GaugeProp_hpp_ | 
 | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  | 
 | ||||||
|  | Source file: extras/Hadrons/Modules/Quark.hpp | ||||||
|  | 
 | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  | 
 | ||||||
|  | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  | 
 | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  | 
 | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  | 
 | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | 
 | ||||||
|  | #ifndef Hadrons_Quark_hpp_ | ||||||
|  | #define Hadrons_Quark_hpp_ | ||||||
| 
 | 
 | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -8,29 +37,27 @@ | |||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                                GaugeProp                                   * |  *                               TQuark                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MFermion) | class QuarkPar: Serializable | ||||||
| 
 |  | ||||||
| class GaugePropPar: Serializable |  | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, | ||||||
|                                     std::string, source, |                                     std::string, source, | ||||||
|                                     std::string, solver); |                                     std::string, solver); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TGaugeProp: public Module<GaugePropPar> | class TQuark: public Module<QuarkPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     FGS_TYPE_ALIASES(FImpl,); |     TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor
 |     // constructor
 | ||||||
|     TGaugeProp(const std::string name); |     TQuark(const std::string name); | ||||||
|     // destructor
 |     // destructor
 | ||||||
|     virtual ~TGaugeProp(void) = default; |     virtual ~TQuark(void) = default; | ||||||
|     // dependency relation
 |     // dependencies/products
 | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|     // setup
 |     // setup
 | ||||||
| @@ -42,20 +69,20 @@ private: | |||||||
|     SolverFn     *solver_{nullptr}; |     SolverFn     *solver_{nullptr}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion); | MODULE_REGISTER(Quark, TQuark<FIMPL>); | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                      TGaugeProp implementation                             * |  *                          TQuark implementation                             * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| // constructor /////////////////////////////////////////////////////////////////
 | // constructor /////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| TGaugeProp<FImpl>::TGaugeProp(const std::string name) | TQuark<FImpl>::TQuark(const std::string name) | ||||||
| : Module<GaugePropPar>(name) | : Module(name) | ||||||
| {} | {} | ||||||
| 
 | 
 | ||||||
| // dependencies/products ///////////////////////////////////////////////////////
 | // dependencies/products ///////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | std::vector<std::string> TQuark<FImpl>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> in = {par().source, par().solver}; |     std::vector<std::string> in = {par().source, par().solver}; | ||||||
|      |      | ||||||
| @@ -63,7 +90,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | std::vector<std::string> TQuark<FImpl>::getOutput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> out = {getName(), getName() + "_5d"}; |     std::vector<std::string> out = {getName(), getName() + "_5d"}; | ||||||
|      |      | ||||||
| @@ -72,7 +99,7 @@ std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | |||||||
| 
 | 
 | ||||||
| // setup ///////////////////////////////////////////////////////////////////////
 | // setup ///////////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TGaugeProp<FImpl>::setup(void) | void TQuark<FImpl>::setup(void) | ||||||
| { | { | ||||||
|     Ls_ = env().getObjectLs(par().solver); |     Ls_ = env().getObjectLs(par().solver); | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |     env().template registerLattice<PropagatorField>(getName()); | ||||||
| @@ -84,13 +111,13 @@ void TGaugeProp<FImpl>::setup(void) | |||||||
| 
 | 
 | ||||||
| // execution ///////////////////////////////////////////////////////////////////
 | // execution ///////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TGaugeProp<FImpl>::execute(void) | void TQuark<FImpl>::execute(void) | ||||||
| { | { | ||||||
|     LOG(Message) << "Computing quark propagator '" << getName() << "'" |     LOG(Message) << "Computing quark propagator '" << getName() << "'" | ||||||
|     << std::endl; |                  << std::endl; | ||||||
|      |      | ||||||
|     FermionField    source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), |     FermionField    source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), | ||||||
|     tmp(env().getGrid()); |                     tmp(env().getGrid()); | ||||||
|     std::string     propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); |     std::string     propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); | ||||||
|     PropagatorField &prop    = *env().template createLattice<PropagatorField>(propName); |     PropagatorField &prop    = *env().template createLattice<PropagatorField>(propName); | ||||||
|     PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source); |     PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source); | ||||||
| @@ -101,7 +128,7 @@ void TGaugeProp<FImpl>::execute(void) | |||||||
|     } |     } | ||||||
|      |      | ||||||
|     LOG(Message) << "Inverting using solver '" << par().solver |     LOG(Message) << "Inverting using solver '" << par().solver | ||||||
|     << "' on source '" << par().source << "'" << std::endl; |                  << "' on source '" << par().source << "'" << std::endl; | ||||||
|     for (unsigned int s = 0; s < Ns; ++s) |     for (unsigned int s = 0; s < Ns; ++s) | ||||||
|     for (unsigned int c = 0; c < Nc; ++c) |     for (unsigned int c = 0; c < Nc; ++c) | ||||||
|     { |     { | ||||||
| @@ -143,18 +170,16 @@ void TGaugeProp<FImpl>::execute(void) | |||||||
|         if (Ls_ > 1) |         if (Ls_ > 1) | ||||||
|         { |         { | ||||||
|             PropagatorField &p4d = |             PropagatorField &p4d = | ||||||
|             *env().template getObject<PropagatorField>(getName()); |                 *env().template getObject<PropagatorField>(getName()); | ||||||
|              |              | ||||||
|             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); |             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); | ||||||
|             axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); |             axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1); | ||||||
|             ExtractSlice(tmp, sol, 0, 0); |             ExtractSlice(tmp, sol, 0, 0); | ||||||
|             FermToProp(p4d, tmp, s, c); |             FermToProp(p4d, tmp, s, c); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| END_MODULE_NAMESPACE |  | ||||||
| 
 |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| #endif // Hadrons_MFermion_GaugeProp_hpp_
 | #endif // Hadrons_Quark_hpp_
 | ||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #ifndef Hadrons____FILEBASENAME____hpp_ | ||||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #define Hadrons____FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #endif // Hadrons____FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #ifndef Hadrons____FILEBASENAME____hpp_ | ||||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #define Hadrons____FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | #endif // Hadrons____FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,38 +1,19 @@ | |||||||
| modules_cc =\ | modules_cc =\ | ||||||
|   Modules/MContraction/WeakHamiltonianEye.cc \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianNonEye.cc \ |  | ||||||
|   Modules/MContraction/WeakNeutral4ptDisc.cc \ |  | ||||||
|   Modules/MGauge/Load.cc \ |   Modules/MGauge/Load.cc \ | ||||||
|   Modules/MGauge/Random.cc \ |   Modules/MGauge/Random.cc \ | ||||||
|   Modules/MGauge/StochEm.cc \ |   Modules/MGauge/Unit.cc | ||||||
|   Modules/MGauge/Unit.cc \ |  | ||||||
|   Modules/MScalar/ChargedProp.cc \ |  | ||||||
|   Modules/MScalar/FreeProp.cc |  | ||||||
|  |  | ||||||
| modules_hpp =\ | modules_hpp =\ | ||||||
|   Modules/MAction/DWF.hpp \ |   Modules/MAction/DWF.hpp \ | ||||||
|   Modules/MAction/Wilson.hpp \ |   Modules/MAction/Wilson.hpp \ | ||||||
|   Modules/MContraction/Baryon.hpp \ |   Modules/MContraction/Baryon.hpp \ | ||||||
|   Modules/MContraction/DiscLoop.hpp \ |  | ||||||
|   Modules/MContraction/Gamma3pt.hpp \ |  | ||||||
|   Modules/MContraction/Meson.hpp \ |   Modules/MContraction/Meson.hpp \ | ||||||
|   Modules/MContraction/WeakHamiltonian.hpp \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianEye.hpp \ |  | ||||||
|   Modules/MContraction/WeakHamiltonianNonEye.hpp \ |  | ||||||
|   Modules/MContraction/WeakNeutral4ptDisc.hpp \ |  | ||||||
|   Modules/MFermion/GaugeProp.hpp \ |  | ||||||
|   Modules/MGauge/Load.hpp \ |   Modules/MGauge/Load.hpp \ | ||||||
|   Modules/MGauge/Random.hpp \ |   Modules/MGauge/Random.hpp \ | ||||||
|   Modules/MGauge/StochEm.hpp \ |  | ||||||
|   Modules/MGauge/Unit.hpp \ |   Modules/MGauge/Unit.hpp \ | ||||||
|   Modules/MLoop/NoiseLoop.hpp \ |  | ||||||
|   Modules/MScalar/ChargedProp.hpp \ |  | ||||||
|   Modules/MScalar/FreeProp.hpp \ |  | ||||||
|   Modules/MScalar/Scalar.hpp \ |  | ||||||
|   Modules/MSink/Point.hpp \ |  | ||||||
|   Modules/MSolver/RBPrecCG.hpp \ |   Modules/MSolver/RBPrecCG.hpp \ | ||||||
|   Modules/MSource/Point.hpp \ |   Modules/MSource/Point.hpp \ | ||||||
|   Modules/MSource/SeqGamma.hpp \ |   Modules/MSource/SeqGamma.hpp \ | ||||||
|   Modules/MSource/Wall.hpp \ |   Modules/MSource/Z2.hpp \ | ||||||
|   Modules/MSource/Z2.hpp |   Modules/Quark.hpp | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,11 +0,0 @@ | |||||||
| #include <qed-fvol/Global.hpp> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace QCD; |  | ||||||
| using namespace QedFVol; |  | ||||||
|  |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); |  | ||||||
| QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); |  | ||||||
| @@ -1,42 +0,0 @@ | |||||||
| #ifndef QedFVol_Global_hpp_ |  | ||||||
| #define QedFVol_Global_hpp_ |  | ||||||
|  |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
| #define BEGIN_QEDFVOL_NAMESPACE \ |  | ||||||
| namespace Grid {\ |  | ||||||
| using namespace QCD;\ |  | ||||||
| namespace QedFVol {\ |  | ||||||
| using Grid::operator<<; |  | ||||||
| #define END_QEDFVOL_NAMESPACE }} |  | ||||||
|  |  | ||||||
| /* the 'using Grid::operator<<;' statement prevents a very nasty compilation |  | ||||||
|  * error with GCC (clang compiles fine without it). |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| BEGIN_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| class QedFVolLogger: public Logger |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|     QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, |  | ||||||
|                                                   GridLogColours, "BLACK"){}; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| #define LOG(channel) std::cout << QedFVolLog##channel |  | ||||||
| #define QEDFVOL_ERROR(msg)\ |  | ||||||
| LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ |  | ||||||
|            << __LINE__ << ")" << std::endl;\ |  | ||||||
| abort(); |  | ||||||
|  |  | ||||||
| #define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; |  | ||||||
|  |  | ||||||
| extern QedFVolLogger QedFVolLogError; |  | ||||||
| extern QedFVolLogger QedFVolLogWarning; |  | ||||||
| extern QedFVolLogger QedFVolLogMessage; |  | ||||||
| extern QedFVolLogger QedFVolLogIterative; |  | ||||||
| extern QedFVolLogger QedFVolLogDebug; |  | ||||||
|  |  | ||||||
| END_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // QedFVol_Global_hpp_ |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| AM_CXXFLAGS += -I$(top_srcdir)/extras |  | ||||||
|  |  | ||||||
| bin_PROGRAMS = qed-fvol |  | ||||||
|  |  | ||||||
| qed_fvol_SOURCES =   \ |  | ||||||
|     qed-fvol.cc      \ |  | ||||||
|     Global.cc |  | ||||||
|  |  | ||||||
| qed_fvol_LDADD   = -lGrid |  | ||||||
| @@ -1,265 +0,0 @@ | |||||||
| #ifndef QEDFVOL_WILSONLOOPS_H |  | ||||||
| #define QEDFVOL_WILSONLOOPS_H |  | ||||||
|  |  | ||||||
| #include <Global.hpp> |  | ||||||
|  |  | ||||||
| BEGIN_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| template <class Gimpl> class NewWilsonLoops : public Gimpl { |  | ||||||
| public: |  | ||||||
|   INHERIT_GIMPL_TYPES(Gimpl); |  | ||||||
|  |  | ||||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; |  | ||||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // directed plaquette oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U, |  | ||||||
|                            const int mu, const int nu) { |  | ||||||
|     // Annoyingly, must use either scope resolution to find dependent base |  | ||||||
|     // class, |  | ||||||
|     // or this-> ; there is no "this" in a static method. This forces explicit |  | ||||||
|     // Gimpl scope |  | ||||||
|     // resolution throughout the usage in this file, and rather defeats the |  | ||||||
|     // purpose of deriving |  | ||||||
|     // from Gimpl. |  | ||||||
|     plaq = Gimpl::CovShiftBackward( |  | ||||||
|         U[mu], mu, Gimpl::CovShiftBackward( |  | ||||||
|                        U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // trace of directed plaquette oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void traceDirPlaquette(LatticeComplex &plaq, |  | ||||||
|                                 const std::vector<GaugeMat> &U, const int mu, |  | ||||||
|                                 const int nu) { |  | ||||||
|     GaugeMat sp(U[0]._grid); |  | ||||||
|     dirPlaquette(sp, U, mu, nu); |  | ||||||
|     plaq = trace(sp); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void sitePlaquette(LatticeComplex &Plaq, |  | ||||||
|                             const std::vector<GaugeMat> &U) { |  | ||||||
|     LatticeComplex sitePlaq(U[0]._grid); |  | ||||||
|     Plaq = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceDirPlaquette(sitePlaq, U, mu, nu); |  | ||||||
|         Plaq = Plaq + sitePlaq; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumPlaquette(const GaugeLorentz &Umu) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Plaq(Umu._grid); |  | ||||||
|  |  | ||||||
|     sitePlaquette(Plaq, U); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Plaq); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of plaquette |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgPlaquette(const GaugeLorentz &Umu) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumplaq = sumPlaquette(Umu); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; |  | ||||||
|     return sumplaq / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // Wilson loop of size (R1, R2), oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U, |  | ||||||
|                            const int Rmu, const int Rnu, |  | ||||||
|                            const int mu, const int nu) { |  | ||||||
|     wl = U[nu]; |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rnu-1; i++){ |  | ||||||
|       wl = Gimpl::CovShiftForward(U[nu], nu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rmu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftForward(U[mu], mu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rnu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftBackward(U[nu], nu, wl); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int i = 0; i < Rmu; i++){ |  | ||||||
|       wl = Gimpl::CovShiftBackward(U[mu], mu, wl); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // trace of Wilson Loop oriented in mu,nu plane |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void traceWilsonLoop(LatticeComplex &wl, |  | ||||||
|                                 const std::vector<GaugeMat> &U, |  | ||||||
|                                 const int Rmu, const int Rnu, |  | ||||||
|                                 const int mu, const int nu) { |  | ||||||
|     GaugeMat sp(U[0]._grid); |  | ||||||
|     wilsonLoop(sp, U, Rmu, Rnu, mu, nu); |  | ||||||
|     wl = trace(sp); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over planes of Wilson loop with length R1 |  | ||||||
|   // in the time direction |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteTimelikeWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|  |  | ||||||
|     int ndim = U[0]._grid->_ndimension; |  | ||||||
|  |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int nu = 0; nu < ndim - 1; nu++) { |  | ||||||
|       traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); |  | ||||||
|       Wl = Wl + siteWl; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum Wilson loop over all planes orthogonal to the time direction |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static void siteSpatialWilsonLoop(LatticeComplex &Wl, |  | ||||||
|                             const std::vector<GaugeMat> &U, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     LatticeComplex siteWl(U[0]._grid); |  | ||||||
|  |  | ||||||
|     Wl = zero; |  | ||||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { |  | ||||||
|       for (int nu = 0; nu < mu; nu++) { |  | ||||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); |  | ||||||
|         Wl = Wl + siteWl; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of timelike Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteTimelikeWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // sum over all x,y,z,t and over all planes of spatial Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     std::vector<GaugeMat> U(4, Umu._grid); |  | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { |  | ||||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     LatticeComplex Wl(Umu._grid); |  | ||||||
|  |  | ||||||
|     siteSpatialWilsonLoop(Wl, U, R1, R2); |  | ||||||
|  |  | ||||||
|     TComplex Tp = sum(Wl); |  | ||||||
|     Complex p = TensorRemove(Tp); |  | ||||||
|     return p.real(); |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * ndim * (ndim - 1); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of timelike Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * (ndim - 1); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   // average over all x,y,z,t and over all planes of spatial Wilson loop |  | ||||||
|   ////////////////////////////////////////////////// |  | ||||||
|   static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, |  | ||||||
|                             const int R1, const int R2) { |  | ||||||
|     int ndim = Umu._grid->_ndimension; |  | ||||||
|     Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); |  | ||||||
|     Real vol = Umu._grid->gSites(); |  | ||||||
|     Real faces = 1.0 * (ndim - 1) * (ndim - 2); |  | ||||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| END_QEDFVOL_NAMESPACE |  | ||||||
|  |  | ||||||
| #endif // QEDFVOL_WILSONLOOPS_H |  | ||||||
| @@ -1,88 +0,0 @@ | |||||||
| #include <Global.hpp> |  | ||||||
| #include <WilsonLoops.h> |  | ||||||
|  |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace QCD; |  | ||||||
| using namespace QedFVol; |  | ||||||
|  |  | ||||||
| typedef PeriodicGaugeImpl<QedGimplR>    QedPeriodicGimplR; |  | ||||||
| typedef PhotonR::GaugeField             EmField; |  | ||||||
| typedef PhotonR::GaugeLinkField         EmComp; |  | ||||||
|  |  | ||||||
| const int NCONFIGS = 10; |  | ||||||
| const int NWILSON = 10; |  | ||||||
|  |  | ||||||
| int main(int argc, char *argv[]) |  | ||||||
| { |  | ||||||
|     // parse command line |  | ||||||
|     std::string parameterFileName; |  | ||||||
|      |  | ||||||
|     if (argc < 2) |  | ||||||
|     { |  | ||||||
|         std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]"; |  | ||||||
|         std::cerr << std::endl; |  | ||||||
|         std::exit(EXIT_FAILURE); |  | ||||||
|     } |  | ||||||
|     parameterFileName = argv[1]; |  | ||||||
|      |  | ||||||
|     // initialization |  | ||||||
|     Grid_init(&argc, &argv); |  | ||||||
|     QedFVolLogError.Active(GridLogError.isActive()); |  | ||||||
|     QedFVolLogWarning.Active(GridLogWarning.isActive()); |  | ||||||
|     QedFVolLogMessage.Active(GridLogMessage.isActive()); |  | ||||||
|     QedFVolLogIterative.Active(GridLogIterative.isActive()); |  | ||||||
|     QedFVolLogDebug.Active(GridLogDebug.isActive()); |  | ||||||
|     LOG(Message) << "Grid initialized" << std::endl; |  | ||||||
|      |  | ||||||
|     // QED stuff |  | ||||||
|     std::vector<int> latt_size   = GridDefaultLatt(); |  | ||||||
|     std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); |  | ||||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|     GridCartesian    grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|     GridParallelRNG  pRNG(&grid); |  | ||||||
|     PhotonR          photon(PhotonR::Gauge::feynman, |  | ||||||
|                             PhotonR::ZmScheme::qedL); |  | ||||||
|     EmField          a(&grid); |  | ||||||
|     EmField          expA(&grid); |  | ||||||
|  |  | ||||||
|     Complex imag_unit(0, 1); |  | ||||||
|  |  | ||||||
|     Real wlA; |  | ||||||
|     std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); |  | ||||||
|  |  | ||||||
|     pRNG.SeedRandomDevice(); |  | ||||||
|  |  | ||||||
|     LOG(Message) << "Wilson loop calculation beginning" << std::endl; |  | ||||||
|     for(int ic = 0; ic < NCONFIGS; ic++){ |  | ||||||
|         LOG(Message) << "Configuration " << ic <<std::endl; |  | ||||||
|         photon.StochasticField(a, pRNG); |  | ||||||
|  |  | ||||||
|         // Exponentiate photon field |  | ||||||
|         expA = exp(imag_unit*a); |  | ||||||
|  |  | ||||||
|         // Calculate Wilson loops |  | ||||||
|         for(int iw=1; iw<=NWILSON; iw++){ |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlAvg[iw-1] -= 2*log(wlA); |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlTime[iw-1] -= 2*log(wlA); |  | ||||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3; |  | ||||||
|             logWlSpace[iw-1] -= 2*log(wlA); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     LOG(Message) << "Wilson loop calculation completed" << std::endl; |  | ||||||
|      |  | ||||||
|     // Calculate Wilson loops |  | ||||||
|     for(int iw=1; iw<=10; iw++){ |  | ||||||
|         LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; |  | ||||||
|         LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // epilogue |  | ||||||
|     LOG(Message) << "Grid is finalizing now" << std::endl; |  | ||||||
|     Grid_finalize(); |  | ||||||
|      |  | ||||||
|     return EXIT_SUCCESS; |  | ||||||
| } |  | ||||||
| @@ -20,17 +20,4 @@ The simple testcase in this directory is the submitted bug report that encapsula | |||||||
| problem. The test case works with icpc and with clang++, but fails consistently on g++ | problem. The test case works with icpc and with clang++, but fails consistently on g++ | ||||||
| current variants. | current variants. | ||||||
|  |  | ||||||
| Peter | Peter | ||||||
|  |  | ||||||
|  |  | ||||||
| ************ |  | ||||||
|  |  | ||||||
| Second GCC bug reported, see Issue 100. |  | ||||||
|  |  | ||||||
| https://wandbox.org/permlink/tzssJza6R9XnqANw |  | ||||||
| https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652 |  | ||||||
|  |  | ||||||
| Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the |  | ||||||
| CI test suite. The limitations of Travis runtime limits & weak cores are being shown. |  | ||||||
|  |  | ||||||
| Travis uses 5.4.1 for g++-5. |  | ||||||
| @@ -1,86 +0,0 @@ | |||||||
| #! /bin/sh |  | ||||||
|  |  | ||||||
| prefix=@prefix@ |  | ||||||
| exec_prefix=@exec_prefix@ |  | ||||||
| includedir=@includedir@ |  | ||||||
|  |  | ||||||
| usage() |  | ||||||
| { |  | ||||||
|   cat <<EOF |  | ||||||
| Usage: grid-config [OPTION] |  | ||||||
|  |  | ||||||
| Known values for OPTION are: |  | ||||||
|  |  | ||||||
|   --prefix     show Grid installation prefix |  | ||||||
|   --cxxflags   print pre-processor and compiler flags |  | ||||||
|   --ldflags    print library linking flags |  | ||||||
|   --libs       print library linking information |  | ||||||
|   --summary    print full build summary |  | ||||||
|   --help       display this help and exit |  | ||||||
|   --version    output version information |  | ||||||
|   --git        print git revision |  | ||||||
|  |  | ||||||
| EOF |  | ||||||
|    |  | ||||||
|   exit $1 |  | ||||||
| } |  | ||||||
|  |  | ||||||
| if test $# -eq 0; then |  | ||||||
|   usage 1 |  | ||||||
| fi |  | ||||||
|  |  | ||||||
| cflags=false |  | ||||||
| libs=false |  | ||||||
|  |  | ||||||
| while test $# -gt 0; do |  | ||||||
|   case "$1" in |  | ||||||
|     -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; |  | ||||||
|     *) optarg= ;; |  | ||||||
|   esac |  | ||||||
|    |  | ||||||
|   case "$1" in |  | ||||||
|     --prefix) |  | ||||||
|       echo $prefix |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --version) |  | ||||||
|       echo @VERSION@ |  | ||||||
|       exit 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --git) |  | ||||||
|       echo "@GRID_BRANCH@ @GRID_SHA@" |  | ||||||
|       exit 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --help) |  | ||||||
|       usage 0 |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --cxxflags) |  | ||||||
|       echo @GRID_CXXFLAGS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --ldflags) |  | ||||||
|       echo @GRID_LDFLAGS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --libs) |  | ||||||
|       echo @GRID_LIBS@ |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     --summary) |  | ||||||
|       echo "" |  | ||||||
|       echo "@GRID_SUMMARY@" |  | ||||||
|       echo "" |  | ||||||
|     ;; |  | ||||||
|      |  | ||||||
|     *) |  | ||||||
|       usage |  | ||||||
|       exit 1 |  | ||||||
|     ;; |  | ||||||
|   esac |  | ||||||
|   shift |  | ||||||
| done |  | ||||||
|  |  | ||||||
| exit 0 |  | ||||||
| @@ -1,37 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/DisableWarnings.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef DISABLE_WARNINGS_H |  | ||||||
| #define DISABLE_WARNINGS_H |  | ||||||
|  |  | ||||||
|  //disables and intel compiler specific warning (in json.hpp) |  | ||||||
| #pragma warning disable 488   |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
| @@ -41,9 +41,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/GridQCDcore.h> | #include <Grid/GridQCDcore.h> | ||||||
| #include <Grid/qcd/action/Action.h> | #include <Grid/qcd/action/Action.h> | ||||||
| #include <Grid/qcd/utils/GaugeFix.h> |  | ||||||
| #include <Grid/qcd/smearing/Smearing.h> | #include <Grid/qcd/smearing/Smearing.h> | ||||||
| #include <Grid/parallelIO/MetaData.h> |  | ||||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -38,7 +38,28 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_BASE_H | #ifndef GRID_BASE_H | ||||||
| #define GRID_BASE_H | #define GRID_BASE_H | ||||||
|  |  | ||||||
| #include <Grid/GridStd.h> | /////////////////// | ||||||
|  | // Std C++ dependencies | ||||||
|  | /////////////////// | ||||||
|  | #include <cassert> | ||||||
|  | #include <complex> | ||||||
|  | #include <vector> | ||||||
|  | #include <iostream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <random> | ||||||
|  | #include <functional> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <signal.h> | ||||||
|  | #include <ctime> | ||||||
|  | #include <sys/time.h> | ||||||
|  | #include <chrono> | ||||||
|  |  | ||||||
|  | /////////////////// | ||||||
|  | // Grid headers | ||||||
|  | /////////////////// | ||||||
|  | #include "Config.h" | ||||||
|  |  | ||||||
| #include <Grid/perfmon/Timer.h> | #include <Grid/perfmon/Timer.h> | ||||||
| #include <Grid/perfmon/PerfCount.h> | #include <Grid/perfmon/PerfCount.h> | ||||||
|   | |||||||
| @@ -1,29 +0,0 @@ | |||||||
| #ifndef GRID_STD_H |  | ||||||
| #define GRID_STD_H |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Std C++ dependencies |  | ||||||
| /////////////////// |  | ||||||
| #include <cassert> |  | ||||||
| #include <complex> |  | ||||||
| #include <vector> |  | ||||||
| #include <string> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <random> |  | ||||||
| #include <functional> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <signal.h> |  | ||||||
| #include <ctime> |  | ||||||
| #include <sys/time.h> |  | ||||||
| #include <chrono> |  | ||||||
| #include <zlib.h> |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Grid config |  | ||||||
| /////////////////// |  | ||||||
| #include "Config.h" |  | ||||||
|  |  | ||||||
| #endif /* GRID_STD_H */ |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| #pragma once |  | ||||||
| #if defined __GNUC__ |  | ||||||
| #pragma GCC diagnostic push |  | ||||||
| #pragma GCC diagnostic ignored "-Wdeprecated-declarations" |  | ||||||
| #endif |  | ||||||
| #include <Grid/Eigen/Dense> |  | ||||||
| #if defined __GNUC__ |  | ||||||
| #pragma GCC diagnostic pop |  | ||||||
| #endif |  | ||||||
| @@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3 | |||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
| if BUILD_COMMS_MPIT | if BUILD_COMMS_MPI3L | ||||||
|   extra_sources+=communicator/Communicator_mpit.cc |   extra_sources+=communicator/Communicator_mpi3_leader.cc | ||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
|     /************************************************************************************* |     /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./lib/Algorithms.h |     Source file: ./lib/Algorithms.h | ||||||
|  |  | ||||||
| @@ -37,7 +37,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/algorithms/approx/Chebyshev.h> | #include <Grid/algorithms/approx/Chebyshev.h> | ||||||
| #include <Grid/algorithms/approx/Remez.h> | #include <Grid/algorithms/approx/Remez.h> | ||||||
| #include <Grid/algorithms/approx/MultiShiftFunction.h> | #include <Grid/algorithms/approx/MultiShiftFunction.h> | ||||||
| #include <Grid/algorithms/approx/Forecast.h> |  | ||||||
|  |  | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradient.h> | #include <Grid/algorithms/iterative/ConjugateGradient.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateResidual.h> | #include <Grid/algorithms/iterative/ConjugateResidual.h> | ||||||
| @@ -45,16 +44,30 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> |  | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | // Lanczos support | ||||||
|  | //#include <Grid/algorithms/iterative/MatrixUtils.h> | ||||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | #include <Grid/algorithms/CoarsenedMatrix.h> | ||||||
| #include <Grid/algorithms/FFT.h> | #include <Grid/algorithms/FFT.h> | ||||||
|  |  | ||||||
|  | // Eigen/lanczos | ||||||
| // EigCg | // EigCg | ||||||
|  | // MCR | ||||||
| // Pcg | // Pcg | ||||||
|  | // Multishift CG | ||||||
| // Hdcg | // Hdcg | ||||||
| // GCR | // GCR | ||||||
| // etc.. | // etc.. | ||||||
|  |  | ||||||
|  | // integrator/Leapfrog | ||||||
|  | // integrator/Omelyan | ||||||
|  | // integrator/ForceGradient | ||||||
|  |  | ||||||
|  | // montecarlo/hmc | ||||||
|  | // montecarlo/rhmc | ||||||
|  | // montecarlo/metropolis | ||||||
|  | // etc... | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -103,32 +103,29 @@ namespace Grid { | |||||||
|     GridBase *CoarseGrid; |     GridBase *CoarseGrid; | ||||||
|     GridBase *FineGrid; |     GridBase *FineGrid; | ||||||
|     std::vector<Lattice<Fobj> > subspace; |     std::vector<Lattice<Fobj> > subspace; | ||||||
|     int checkerboard; |  | ||||||
|  |  | ||||||
|   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  |     Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :  | ||||||
|     CoarseGrid(_CoarseGrid), |       CoarseGrid(_CoarseGrid), | ||||||
|       FineGrid(_FineGrid), |       FineGrid(_FineGrid), | ||||||
|       subspace(nbasis,_FineGrid), |       subspace(nbasis,_FineGrid) | ||||||
|       checkerboard(_checkerboard) |  | ||||||
| 	{ | 	{ | ||||||
| 	}; | 	}; | ||||||
|    |    | ||||||
|     void Orthogonalise(void){ |     void Orthogonalise(void){ | ||||||
|       CoarseScalar InnerProd(CoarseGrid);  |       CoarseScalar InnerProd(CoarseGrid);  | ||||||
|       std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl; |  | ||||||
|       blockOrthogonalise(InnerProd,subspace); |       blockOrthogonalise(InnerProd,subspace); | ||||||
|       std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl; |  | ||||||
|       blockOrthogonalise(InnerProd,subspace); |  | ||||||
|       //      std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl; |  | ||||||
|       //      CheckOrthogonal(); |  | ||||||
|     }  |     }  | ||||||
|     void CheckOrthogonal(void){ |     void CheckOrthogonal(void){ | ||||||
|       CoarseVector iProj(CoarseGrid);  |       CoarseVector iProj(CoarseGrid);  | ||||||
|       CoarseVector eProj(CoarseGrid);  |       CoarseVector eProj(CoarseGrid);  | ||||||
|  |       Lattice<CComplex> pokey(CoarseGrid); | ||||||
|  |  | ||||||
|  |        | ||||||
|       for(int i=0;i<nbasis;i++){ |       for(int i=0;i<nbasis;i++){ | ||||||
| 	blockProject(iProj,subspace[i],subspace); | 	blockProject(iProj,subspace[i],subspace); | ||||||
|  |  | ||||||
| 	eProj=zero;  | 	eProj=zero;  | ||||||
| 	parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){ | 	for(int ss=0;ss<CoarseGrid->oSites();ss++){ | ||||||
| 	  eProj._odata[ss](i)=CComplex(1.0); | 	  eProj._odata[ss](i)=CComplex(1.0); | ||||||
| 	} | 	} | ||||||
| 	eProj=eProj - iProj; | 	eProj=eProj - iProj; | ||||||
| @@ -140,7 +137,6 @@ namespace Grid { | |||||||
|       blockProject(CoarseVec,FineVec,subspace); |       blockProject(CoarseVec,FineVec,subspace); | ||||||
|     } |     } | ||||||
|     void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ |     void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ | ||||||
|       FineVec.checkerboard = subspace[0].checkerboard; |  | ||||||
|       blockPromote(CoarseVec,FineVec,subspace); |       blockPromote(CoarseVec,FineVec,subspace); | ||||||
|     } |     } | ||||||
|     void CreateSubspaceRandom(GridParallelRNG &RNG){ |     void CreateSubspaceRandom(GridParallelRNG &RNG){ | ||||||
| @@ -151,7 +147,6 @@ namespace Grid { | |||||||
|       Orthogonalise(); |       Orthogonalise(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /* |  | ||||||
|     virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)  |     virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)  | ||||||
|     { |     { | ||||||
|       // Run a Lanczos with sloppy convergence |       // Run a Lanczos with sloppy convergence | ||||||
| @@ -200,7 +195,7 @@ namespace Grid { | |||||||
| 	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl; | 	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl; | ||||||
| 	} | 	} | ||||||
|     } |     } | ||||||
|     */ |  | ||||||
|     virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { |     virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { | ||||||
|  |  | ||||||
|       RealD scale; |       RealD scale; | ||||||
|   | |||||||
| @@ -230,7 +230,6 @@ namespace Grid { | |||||||
|       // Barrel shift and collect global pencil |       // Barrel shift and collect global pencil | ||||||
|       std::vector<int> lcoor(Nd), gcoor(Nd); |       std::vector<int> lcoor(Nd), gcoor(Nd); | ||||||
|       result = source; |       result = source; | ||||||
|       int pc = processor_coor[dim]; |  | ||||||
|       for(int p=0;p<processors[dim];p++) { |       for(int p=0;p<processors[dim];p++) { | ||||||
|         PARALLEL_REGION |         PARALLEL_REGION | ||||||
|         { |         { | ||||||
| @@ -241,8 +240,7 @@ namespace Grid { | |||||||
|           for(int idx=0;idx<sgrid->lSites();idx++) { |           for(int idx=0;idx<sgrid->lSites();idx++) { | ||||||
|             sgrid->LocalIndexToLocalCoor(idx,cbuf); |             sgrid->LocalIndexToLocalCoor(idx,cbuf); | ||||||
|             peekLocalSite(s,result,cbuf); |             peekLocalSite(s,result,cbuf); | ||||||
| 	    cbuf[dim]+=((pc+p) % processors[dim])*L; |             cbuf[dim]+=p*L; | ||||||
| 	    //            cbuf[dim]+=p*L; |  | ||||||
|             pokeLocalSite(s,pgbuf,cbuf); |             pokeLocalSite(s,pgbuf,cbuf); | ||||||
|           } |           } | ||||||
|         } |         } | ||||||
| @@ -280,6 +278,7 @@ namespace Grid { | |||||||
|       flops+= flops_call*NN; |       flops+= flops_call*NN; | ||||||
|        |        | ||||||
|       // writing out result |       // writing out result | ||||||
|  |       int pc = processor_coor[dim]; | ||||||
|       PARALLEL_REGION |       PARALLEL_REGION | ||||||
|       { |       { | ||||||
|         std::vector<int> clbuf(Nd), cgbuf(Nd); |         std::vector<int> clbuf(Nd), cgbuf(Nd); | ||||||
|   | |||||||
| @@ -162,10 +162,15 @@ namespace Grid { | |||||||
| 	_Mat.M(in,out); | 	_Mat.M(in,out); | ||||||
|       } |       } | ||||||
|       void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ |       void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||||
|  | 	ComplexD dot; | ||||||
|  |  | ||||||
| 	_Mat.M(in,out); | 	_Mat.M(in,out); | ||||||
| 	 | 	 | ||||||
| 	ComplexD dot= innerProduct(in,out); n1=real(dot); | 	dot= innerProduct(in,out); | ||||||
| 	n2=norm2(out); | 	n1=real(dot); | ||||||
|  |  | ||||||
|  | 	dot = innerProduct(out,out); | ||||||
|  | 	n2=real(dot); | ||||||
|       } |       } | ||||||
|       void HermOp(const Field &in, Field &out){ |       void HermOp(const Field &in, Field &out){ | ||||||
| 	_Mat.M(in,out); | 	_Mat.M(in,out); | ||||||
| @@ -187,10 +192,10 @@ namespace Grid { | |||||||
| 	ni=Mpc(in,tmp); | 	ni=Mpc(in,tmp); | ||||||
| 	no=MpcDag(tmp,out); | 	no=MpcDag(tmp,out); | ||||||
|       } |       } | ||||||
|       virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ |       void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||||
| 	MpcDagMpc(in,out,n1,n2); | 	MpcDagMpc(in,out,n1,n2); | ||||||
|       } |       } | ||||||
|       virtual void HermOp(const Field &in, Field &out){ |       void HermOp(const Field &in, Field &out){ | ||||||
| 	RealD n1,n2; | 	RealD n1,n2; | ||||||
| 	HermOpAndNorm(in,out,n1,n2); | 	HermOpAndNorm(in,out,n1,n2); | ||||||
|       } |       } | ||||||
| @@ -207,6 +212,7 @@ namespace Grid { | |||||||
|       void OpDir  (const Field &in, Field &out,int dir,int disp) { |       void OpDir  (const Field &in, Field &out,int dir,int disp) { | ||||||
| 	assert(0); | 	assert(0); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|     }; |     }; | ||||||
|     template<class Matrix,class Field> |     template<class Matrix,class Field> | ||||||
|       class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { |       class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { | ||||||
| @@ -229,7 +235,7 @@ namespace Grid { | |||||||
| 	Field tmp(in._grid); | 	Field tmp(in._grid); | ||||||
|  |  | ||||||
| 	_Mat.MeooeDag(in,tmp); | 	_Mat.MeooeDag(in,tmp); | ||||||
|         _Mat.MooeeInvDag(tmp,out); | 	_Mat.MooeeInvDag(tmp,out); | ||||||
| 	_Mat.MeooeDag(out,tmp); | 	_Mat.MeooeDag(out,tmp); | ||||||
|  |  | ||||||
| 	_Mat.MooeeDag(in,out); | 	_Mat.MooeeDag(in,out); | ||||||
| @@ -264,6 +270,7 @@ namespace Grid { | |||||||
| 	return axpy_norm(out,-1.0,tmp,in); | 	return axpy_norm(out,-1.0,tmp,in); | ||||||
|       } |       } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     template<class Matrix,class Field> |     template<class Matrix,class Field> | ||||||
|       class SchurDiagTwoOperator :  public SchurOperatorBase<Field> { |       class SchurDiagTwoOperator :  public SchurOperatorBase<Field> { | ||||||
|     protected: |     protected: | ||||||
| @@ -292,45 +299,6 @@ namespace Grid { | |||||||
| 	return axpy_norm(out,-1.0,tmp,in); | 	return axpy_norm(out,-1.0,tmp,in); | ||||||
|       } |       } | ||||||
|     }; |     }; | ||||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta |  | ||||||
|     // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi |  | ||||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ; |  | ||||||
|     template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ; |  | ||||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     //  Staggered use |  | ||||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     template<class Matrix,class Field> |  | ||||||
|       class SchurStaggeredOperator :  public SchurOperatorBase<Field> { |  | ||||||
|     protected: |  | ||||||
|       Matrix &_Mat; |  | ||||||
|     public: |  | ||||||
|       SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; |  | ||||||
|       virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ |  | ||||||
| 	n2 = Mpc(in,out); |  | ||||||
| 	ComplexD dot= innerProduct(in,out); |  | ||||||
| 	n1 = real(dot); |  | ||||||
|       } |  | ||||||
|       virtual void HermOp(const Field &in, Field &out){ |  | ||||||
| 	Mpc(in,out); |  | ||||||
|       } |  | ||||||
|       virtual  RealD Mpc      (const Field &in, Field &out) { |  | ||||||
| 	Field tmp(in._grid); |  | ||||||
| 	_Mat.Meooe(in,tmp); |  | ||||||
| 	_Mat.MooeeInv(tmp,out); |  | ||||||
| 	_Mat.Meooe(out,tmp); |  | ||||||
| 	_Mat.Mooee(in,out); |  | ||||||
|         return axpy_norm(out,-1.0,tmp,out); |  | ||||||
|       } |  | ||||||
|       virtual  RealD MpcDag   (const Field &in, Field &out){ |  | ||||||
| 	return Mpc(in,out); |  | ||||||
|       } |  | ||||||
|       virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { |  | ||||||
| 	assert(0);// Never need with staggered |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|     template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////// | ||||||
| @@ -346,14 +314,6 @@ namespace Grid { | |||||||
|       virtual void operator() (const Field &in, Field &out) = 0; |       virtual void operator() (const Field &in, Field &out) = 0; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { |  | ||||||
|     public: |  | ||||||
|       void operator() (const Field &in, Field &out){ |  | ||||||
| 	out = in; |  | ||||||
|       }; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////// | ||||||
|     // Base classes for Multishift solvers for operators |     // Base classes for Multishift solvers for operators | ||||||
|     ///////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////// | ||||||
| @@ -376,64 +336,6 @@ namespace Grid { | |||||||
|      }; |      }; | ||||||
|     */ |     */ | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Hermitian operator Linear function and operator function |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     template<class Field> |  | ||||||
|       class HermOpOperatorFunction : public OperatorFunction<Field> { |  | ||||||
|       void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { |  | ||||||
| 	Linop.HermOp(in,out); |  | ||||||
|       }; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<typename Field> |  | ||||||
|       class PlainHermOp : public LinearFunction<Field> { |  | ||||||
|     public: |  | ||||||
|       LinearOperatorBase<Field> &_Linop; |  | ||||||
|        |  | ||||||
|       PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)  |  | ||||||
|       {} |  | ||||||
|        |  | ||||||
|       void operator()(const Field& in, Field& out) { |  | ||||||
| 	_Linop.HermOp(in,out); |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<typename Field> |  | ||||||
|     class FunctionHermOp : public LinearFunction<Field> { |  | ||||||
|     public: |  | ||||||
|       OperatorFunction<Field>   & _poly; |  | ||||||
|       LinearOperatorBase<Field> &_Linop; |  | ||||||
|        |  | ||||||
|       FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)  |  | ||||||
| 	: _poly(poly), _Linop(linop) {}; |  | ||||||
|        |  | ||||||
|       void operator()(const Field& in, Field& out) { |  | ||||||
| 	_poly(_Linop,in,out); |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|   template<class Field> |  | ||||||
|   class Polynomial : public OperatorFunction<Field> { |  | ||||||
|   private: |  | ||||||
|     std::vector<RealD> Coeffs; |  | ||||||
|   public: |  | ||||||
|     Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; |  | ||||||
|  |  | ||||||
|     // Implement the required interface |  | ||||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { |  | ||||||
|  |  | ||||||
|       Field AtoN(in._grid); |  | ||||||
|       Field Mtmp(in._grid); |  | ||||||
|       AtoN = in; |  | ||||||
|       out = AtoN*Coeffs[0]; |  | ||||||
|       for(int n=1;n<Coeffs.size();n++){ |  | ||||||
| 	Mtmp = AtoN; |  | ||||||
| 	Linop.HermOp(Mtmp,AtoN); |  | ||||||
| 	out=out+AtoN*Coeffs[n]; |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -8,7 +8,6 @@ | |||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: Christoph Lehner <clehner@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -34,12 +33,41 @@ Author: Christoph Lehner <clehner@bnl.gov> | |||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| struct ChebyParams : Serializable { |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams, |   // Simple general polynomial with user supplied coefficients | ||||||
| 				  RealD, alpha,   |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| 				  RealD, beta,    |   template<class Field> | ||||||
| 				  int, Npoly); |   class HermOpOperatorFunction : public OperatorFunction<Field> { | ||||||
| }; |     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||||
|  |       Linop.HermOp(in,out); | ||||||
|  |     }; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   template<class Field> | ||||||
|  |   class Polynomial : public OperatorFunction<Field> { | ||||||
|  |   private: | ||||||
|  |     std::vector<RealD> Coeffs; | ||||||
|  |   public: | ||||||
|  |     Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; | ||||||
|  |  | ||||||
|  |     // Implement the required interface | ||||||
|  |     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||||
|  |  | ||||||
|  |       Field AtoN(in._grid); | ||||||
|  |       Field Mtmp(in._grid); | ||||||
|  |       AtoN = in; | ||||||
|  |       out = AtoN*Coeffs[0]; | ||||||
|  | //            std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl; | ||||||
|  | //            std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl; | ||||||
|  |       for(int n=1;n<Coeffs.size();n++){ | ||||||
|  | 	Mtmp = AtoN; | ||||||
|  | 	Linop.HermOp(Mtmp,AtoN); | ||||||
|  | 	out=out+AtoN*Coeffs[n]; | ||||||
|  | //            std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl; | ||||||
|  | //		std::cout << n<<" " <<norm2(out)<<std::endl; | ||||||
|  |       } | ||||||
|  |     }; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Generic Chebyshev approximations |   // Generic Chebyshev approximations | ||||||
| @@ -54,10 +82,8 @@ struct ChebyParams : Serializable { | |||||||
|  |  | ||||||
|   public: |   public: | ||||||
|     void csv(std::ostream &out){ |     void csv(std::ostream &out){ | ||||||
|       RealD diff = hi-lo; | 	RealD diff = hi-lo; | ||||||
|       RealD delta = (hi-lo)*1.0e-9; |       for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) { | ||||||
|       for (RealD x=lo; x<hi; x+=delta) { |  | ||||||
| 	delta*=1.1; |  | ||||||
| 	RealD f = approx(x); | 	RealD f = approx(x); | ||||||
| 	out<< x<<" "<<f<<std::endl; | 	out<< x<<" "<<f<<std::endl; | ||||||
|       } |       } | ||||||
| @@ -73,7 +99,6 @@ struct ChebyParams : Serializable { | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     Chebyshev(){}; |     Chebyshev(){}; | ||||||
|     Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);}; |  | ||||||
|     Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);}; |     Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);}; | ||||||
|     Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);}; |     Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);}; | ||||||
|  |  | ||||||
| @@ -168,54 +193,12 @@ struct ChebyParams : Serializable { | |||||||
|       return sum; |       return sum; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     RealD approxD(RealD x) |  | ||||||
|     { |  | ||||||
|       RealD Un; |  | ||||||
|       RealD Unm; |  | ||||||
|       RealD Unp; |  | ||||||
|        |  | ||||||
|       RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo)); |  | ||||||
|        |  | ||||||
|       RealD U0=1; |  | ||||||
|       RealD U1=2*y; |  | ||||||
|        |  | ||||||
|       RealD sum; |  | ||||||
|       sum = Coeffs[1]*U0; |  | ||||||
|       sum+= Coeffs[2]*U1*2.0; |  | ||||||
|        |  | ||||||
|       Un =U1; |  | ||||||
|       Unm=U0; |  | ||||||
|       for(int i=2;i<order-1;i++){ |  | ||||||
| 	Unp=2*y*Un-Unm; |  | ||||||
| 	Unm=Un; |  | ||||||
| 	Un =Unp; |  | ||||||
| 	sum+= Un*Coeffs[i+1]*(i+1.0); |  | ||||||
|       } |  | ||||||
|       return sum/(0.5*(hi-lo)); |  | ||||||
|     }; |  | ||||||
|      |  | ||||||
|     RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) { |  | ||||||
|       RealD x = x0; |  | ||||||
|       RealD eps; |  | ||||||
|        |  | ||||||
|       int i; |  | ||||||
|       for (i=0;i<maxiter;i++) { |  | ||||||
| 	eps = approx(x) - z; |  | ||||||
| 	if (fabs(eps / z) < resid) |  | ||||||
| 	  return x; |  | ||||||
| 	x = x - eps / approxD(x); |  | ||||||
|       } |  | ||||||
|        |  | ||||||
|       return std::numeric_limits<double>::quiet_NaN(); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     // Implement the required interface |     // Implement the required interface | ||||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { |     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||||
|  |  | ||||||
|       GridBase *grid=in._grid; |       GridBase *grid=in._grid; | ||||||
|  | //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | ||||||
|       // std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | ||||||
|       //std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; |  | ||||||
|  |  | ||||||
|       int vol=grid->gSites(); |       int vol=grid->gSites(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,152 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/algorithms/approx/Forecast.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: David Murphy <dmurphy@phys.columbia.edu> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| #ifndef INCLUDED_FORECAST_H |  | ||||||
| #define INCLUDED_FORECAST_H |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|   // Abstract base class. |  | ||||||
|   // Takes a matrix (Mat), a source (phi), and a vector of Fields (chi) |  | ||||||
|   // and returns a forecasted solution to the system D*psi = phi (psi). |  | ||||||
|   template<class Matrix, class Field> |  | ||||||
|   class Forecast |  | ||||||
|   { |  | ||||||
|     public: |  | ||||||
|       virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   // Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012), |  | ||||||
|   // used to forecast solutions across poles of the EOFA heatbath. |  | ||||||
|   // |  | ||||||
|   // Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C) |  | ||||||
|   template<class Matrix, class Field> |  | ||||||
|   class ChronoForecast : public Forecast<Matrix,Field> |  | ||||||
|   { |  | ||||||
|     public: |  | ||||||
|       Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns) |  | ||||||
|       { |  | ||||||
|         int degree = prev_solns.size(); |  | ||||||
|         Field chi(phi); // forecasted solution |  | ||||||
|  |  | ||||||
|         // Trivial cases |  | ||||||
|         if(degree == 0){ chi = zero; return chi; } |  | ||||||
|         else if(degree == 1){ return prev_solns[0]; } |  | ||||||
|  |  | ||||||
|         RealD dot; |  | ||||||
|         ComplexD xp; |  | ||||||
|         Field r(phi); // residual |  | ||||||
|         Field Mv(phi); |  | ||||||
|         std::vector<Field> v(prev_solns); // orthonormalized previous solutions |  | ||||||
|         std::vector<Field> MdagMv(degree,phi); |  | ||||||
|  |  | ||||||
|         // Array to hold the matrix elements |  | ||||||
|         std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree)); |  | ||||||
|  |  | ||||||
|         // Solution and source vectors |  | ||||||
|         std::vector<ComplexD> a(degree); |  | ||||||
|         std::vector<ComplexD> b(degree); |  | ||||||
|  |  | ||||||
|         // Orthonormalize the vector basis |  | ||||||
|         for(int i=0; i<degree; i++){ |  | ||||||
|           v[i] *= 1.0/std::sqrt(norm2(v[i])); |  | ||||||
|           for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Perform sparse matrix multiplication and construct rhs |  | ||||||
|         for(int i=0; i<degree; i++){ |  | ||||||
|           b[i] = innerProduct(v[i],phi); |  | ||||||
|           Mat.M(v[i],Mv); |  | ||||||
|           Mat.Mdag(Mv,MdagMv[i]); |  | ||||||
|           G[i][i] = innerProduct(v[i],MdagMv[i]); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Construct the matrix |  | ||||||
|         for(int j=0; j<degree; j++){ |  | ||||||
|         for(int k=j+1; k<degree; k++){ |  | ||||||
|           G[j][k] = innerProduct(v[j],MdagMv[k]); |  | ||||||
|           G[k][j] = std::conj(G[j][k]); |  | ||||||
|         }} |  | ||||||
|  |  | ||||||
|         // Gauss-Jordan elimination with partial pivoting |  | ||||||
|         for(int i=0; i<degree; i++){ |  | ||||||
|  |  | ||||||
|           // Perform partial pivoting |  | ||||||
|           int k = i; |  | ||||||
|           for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } } |  | ||||||
|           if(k != i){ |  | ||||||
|             xp = b[k]; |  | ||||||
|             b[k] = b[i]; |  | ||||||
|             b[i] = xp; |  | ||||||
|             for(int j=0; j<degree; j++){ |  | ||||||
|               xp = G[k][j]; |  | ||||||
|               G[k][j] = G[i][j]; |  | ||||||
|               G[i][j] = xp; |  | ||||||
|             } |  | ||||||
|           } |  | ||||||
|  |  | ||||||
|           // Convert matrix to upper triangular form |  | ||||||
|           for(int j=i+1; j<degree; j++){ |  | ||||||
|             xp = G[j][i]/G[i][i]; |  | ||||||
|             b[j] -= xp * b[i]; |  | ||||||
|             for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; } |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Use Gaussian elimination to solve equations and calculate initial guess |  | ||||||
|         chi = zero; |  | ||||||
|         r = phi; |  | ||||||
|         for(int i=degree-1; i>=0; i--){ |  | ||||||
|           a[i] = 0.0; |  | ||||||
|           for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; } |  | ||||||
|           a[i] = (b[i]-a[i])/G[i][i]; |  | ||||||
|           chi += a[i]*v[i]; |  | ||||||
|           r -= a[i]*MdagMv[i]; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         RealD true_r(0.0); |  | ||||||
|         ComplexD tmp; |  | ||||||
|         for(int i=0; i<degree; i++){ |  | ||||||
|           tmp = -b[i]; |  | ||||||
|           for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; } |  | ||||||
|           tmp = std::conj(tmp)*tmp; |  | ||||||
|           true_r += std::sqrt(tmp.real()); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         RealD error = std::sqrt(norm2(r)/norm2(phi)); |  | ||||||
|         std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl; |  | ||||||
|  |  | ||||||
|         return chi; |  | ||||||
|       }; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
| @@ -16,7 +16,7 @@ | |||||||
| #define INCLUDED_ALG_REMEZ_H | #define INCLUDED_ALG_REMEZ_H | ||||||
|  |  | ||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <Grid/GridStd.h> | #include <Config.h> | ||||||
|  |  | ||||||
| #ifdef HAVE_LIBGMP | #ifdef HAVE_LIBGMP | ||||||
| #include "bigfloat.h" | #include "bigfloat.h" | ||||||
|   | |||||||
							
								
								
									
										137
									
								
								lib/algorithms/densematrix/DenseMatrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								lib/algorithms/densematrix/DenseMatrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/DenseMatrix.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef GRID_DENSE_MATRIX_H | ||||||
|  | #define GRID_DENSE_MATRIX_H | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     // Matrix untils | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | template<class T> using DenseVector = std::vector<T>; | ||||||
|  | template<class T> using DenseMatrix = DenseVector<DenseVector<T> >; | ||||||
|  |  | ||||||
|  | template<class T> void Size(DenseVector<T> & vec, int &N)  | ||||||
|  | {  | ||||||
|  |   N= vec.size(); | ||||||
|  | } | ||||||
|  | template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)  | ||||||
|  | {  | ||||||
|  |   N= mat.size(); | ||||||
|  |   M= mat[0].size(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)  | ||||||
|  | {  | ||||||
|  |   int M; Size(mat,N,M); | ||||||
|  |   assert(N==M); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class T> void Resize(DenseVector<T > & mat, int N) {  | ||||||
|  |   mat.resize(N); | ||||||
|  | } | ||||||
|  | template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {  | ||||||
|  |   mat.resize(N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     mat[i].resize(M); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | template<class T> void Fill(DenseMatrix<T> & mat, T&val) {  | ||||||
|  |   int N,M; | ||||||
|  |   Size(mat,N,M); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |   for(int j=0;j<M;j++){ | ||||||
|  |     mat[i][j] = val; | ||||||
|  |   }} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Transpose of a matrix **/ | ||||||
|  | template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){ | ||||||
|  |   int N,M; | ||||||
|  |   Size(mat,N,M); | ||||||
|  |   DenseMatrix<T> C; Resize(C,M,N); | ||||||
|  |   for(int i=0;i<M;i++){ | ||||||
|  |   for(int j=0;j<N;j++){ | ||||||
|  |     C[i][j] = mat[j][i]; | ||||||
|  |   }}  | ||||||
|  |   return C; | ||||||
|  | } | ||||||
|  | /** Set DenseMatrix to unit matrix **/ | ||||||
|  | template<class T> void Unity(DenseMatrix<T> &A){ | ||||||
|  |   int N;  SizeSquare(A,N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     for(int j=0;j<N;j++){ | ||||||
|  |       if ( i==j ) A[i][j] = 1; | ||||||
|  |       else        A[i][j] = 0; | ||||||
|  |     }  | ||||||
|  |   }  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Add C * I to matrix **/ | ||||||
|  | template<class T> | ||||||
|  | void PlusUnit(DenseMatrix<T> & A,T c){ | ||||||
|  |   int dim;  SizeSquare(A,dim); | ||||||
|  |   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** return the Hermitian conjugate of matrix **/ | ||||||
|  | template<class T> | ||||||
|  | DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){ | ||||||
|  |  | ||||||
|  |   int dim; SizeSquare(mat,dim); | ||||||
|  |  | ||||||
|  |   DenseMatrix<T> C; Resize(C,dim,dim); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<dim;i++){ | ||||||
|  |     for(int j=0;j<dim;j++){ | ||||||
|  |       C[i][j] = conj(mat[j][i]); | ||||||
|  |     }  | ||||||
|  |   }  | ||||||
|  |   return C; | ||||||
|  | } | ||||||
|  | /**Get a square submatrix**/ | ||||||
|  | template <class T> | ||||||
|  | DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end) | ||||||
|  | { | ||||||
|  |   DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st); | ||||||
|  |  | ||||||
|  |   for(int i = row_st; i<row_end; i++){ | ||||||
|  |   for(int j = col_st; j<col_end; j++){ | ||||||
|  |     H[i-row_st][j-col_st]=A[i][j]; | ||||||
|  |   }} | ||||||
|  |   return H; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #include "Householder.h" | ||||||
|  | #include "Francis.h" | ||||||
|  |  | ||||||
|  | #endif | ||||||
|  |  | ||||||
							
								
								
									
										525
									
								
								lib/algorithms/densematrix/Francis.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										525
									
								
								lib/algorithms/densematrix/Francis.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,525 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/Francis.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef FRANCIS_H | ||||||
|  | #define FRANCIS_H | ||||||
|  |  | ||||||
|  | #include <cstdlib> | ||||||
|  | #include <string> | ||||||
|  | #include <cmath> | ||||||
|  | #include <iostream> | ||||||
|  | #include <sstream> | ||||||
|  | #include <stdexcept> | ||||||
|  | #include <fstream> | ||||||
|  | #include <complex> | ||||||
|  | #include <algorithm> | ||||||
|  |  | ||||||
|  | //#include <timer.h> | ||||||
|  | //#include <lapacke.h> | ||||||
|  | //#include <Eigen/Dense> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||||
|  | template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |   Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. | ||||||
|  | H = | ||||||
|  |       x  x  x  x  x  x  x  x  x | ||||||
|  |       x  x  x  x  x  x  x  x  x | ||||||
|  |       0  x  x  x  x  x  x  x  x | ||||||
|  |       0  0  x  x  x  x  x  x  x | ||||||
|  |       0  0  0  x  x  x  x  x  x | ||||||
|  |       0  0  0  0  x  x  x  x  x | ||||||
|  |       0  0  0  0  0  x  x  x  x | ||||||
|  |       0  0  0  0  0  0  x  x  x | ||||||
|  |       0  0  0  0  0  0  0  x  x | ||||||
|  | Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. | ||||||
|  | **/ | ||||||
|  | template <class T> | ||||||
|  | int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||||
|  | { | ||||||
|  |   DenseMatrix<T> H = Hin;  | ||||||
|  |  | ||||||
|  |   int N ; SizeSquare(H,N); | ||||||
|  |   int M = N; | ||||||
|  |  | ||||||
|  |   Fill(evals,0); | ||||||
|  |   Fill(evecs,0); | ||||||
|  |  | ||||||
|  |   T s,t,x=0,y=0,z=0; | ||||||
|  |   T u,d; | ||||||
|  |   T apd,amd,bc; | ||||||
|  |   DenseVector<T> p(N,0); | ||||||
|  |   T nrm = Norm(H);    ///DenseMatrix Norm | ||||||
|  |   int n, m; | ||||||
|  |   int e = 0; | ||||||
|  |   int it = 0; | ||||||
|  |   int tot_it = 0; | ||||||
|  |   int l = 0; | ||||||
|  |   int r = 0; | ||||||
|  |   DenseMatrix<T> P; Resize(P,N,N); Unity(P); | ||||||
|  |   DenseVector<int> trows(N,0); | ||||||
|  |  | ||||||
|  |   /// Check if the matrix is really hessenberg, if not abort | ||||||
|  |   RealD sth = 0; | ||||||
|  |   for(int j=0;j<N;j++){ | ||||||
|  |     for(int i=j+2;i<N;i++){ | ||||||
|  |       sth = abs(H[i][j]); | ||||||
|  |       if(sth > small){ | ||||||
|  | 	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; | ||||||
|  | 	exit(1); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   do{ | ||||||
|  |     std::cout << "Francis QR Step N = " << N << std::endl; | ||||||
|  |     /** Check for convergence | ||||||
|  |       x  x  x  x  x | ||||||
|  |       0  x  x  x  x | ||||||
|  |       0  0  x  x  x | ||||||
|  |       0  0  x  x  x | ||||||
|  |       0  0  0  0  x | ||||||
|  |       for this matrix l = 4 | ||||||
|  |      **/ | ||||||
|  |     do{ | ||||||
|  |       l = Chop_subdiag(H,nrm,e,small); | ||||||
|  |       r = 0;    ///May have converged on more than one eval | ||||||
|  |       ///Single eval | ||||||
|  |       if(l == N-1){ | ||||||
|  |         evals[e] = H[l][l]; | ||||||
|  |         N--; e++; r++; it = 0; | ||||||
|  |       } | ||||||
|  |       ///RealD eval | ||||||
|  |       if(l == N-2){ | ||||||
|  |         trows[l+1] = 1;    ///Needed for UTSolve | ||||||
|  |         apd = H[l][l] + H[l+1][l+1]; | ||||||
|  |         amd = H[l][l] - H[l+1][l+1]; | ||||||
|  |         bc =  (T)4.0*H[l+1][l]*H[l][l+1]; | ||||||
|  |         evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) ); | ||||||
|  |         evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); | ||||||
|  |         N-=2; e+=2; r++; it = 0; | ||||||
|  |       } | ||||||
|  |     } while(r>0); | ||||||
|  |  | ||||||
|  |     if(N ==0) break; | ||||||
|  |  | ||||||
|  |     DenseVector<T > ck; Resize(ck,3); | ||||||
|  |     DenseVector<T> v;   Resize(v,3); | ||||||
|  |  | ||||||
|  |     for(int m = N-3; m >= l; m--){ | ||||||
|  |       ///Starting vector essentially random shift. | ||||||
|  |       if(it%10 == 0 && N >= 3 && it > 0){ | ||||||
|  |         s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||||
|  |         t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||||
|  |         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||||
|  |         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||||
|  |         z = H[m+1][m]*H[m+2][m+1]; | ||||||
|  |       } | ||||||
|  |       ///Starting vector implicit Q theorem | ||||||
|  |       else{ | ||||||
|  |         s = (H[N-2][N-2] + H[N-1][N-1]); | ||||||
|  |         t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); | ||||||
|  |         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||||
|  |         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||||
|  |         z = H[m+1][m]*H[m+2][m+1]; | ||||||
|  |       } | ||||||
|  |       ck[0] = x; ck[1] = y; ck[2] = z; | ||||||
|  |  | ||||||
|  |       if(m == l) break; | ||||||
|  |  | ||||||
|  |       /** Some stupid thing from numerical recipies, seems to work**/ | ||||||
|  |       // PAB.. for heaven's sake quote page, purpose, evidence it works. | ||||||
|  |       //       what sort of comment is that!?!?!? | ||||||
|  |       u=abs(H[m][m-1])*(abs(y)+abs(z)); | ||||||
|  |       d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); | ||||||
|  |       if ((T)abs(u+d) == (T)abs(d) ){ | ||||||
|  | 	l = m; break; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       //if (u < small){l = m; break;} | ||||||
|  |     } | ||||||
|  |     if(it > 100000){ | ||||||
|  |      std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; | ||||||
|  |      std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||||
|  |       exit(1); | ||||||
|  |     } | ||||||
|  |     normalize(ck);    ///Normalization cancels in PHP anyway | ||||||
|  |     T beta; | ||||||
|  |     Householder_vector<T >(ck, 0, 2, v, beta); | ||||||
|  |     Householder_mult<T >(H,v,beta,0,l,l+2,0); | ||||||
|  |     Householder_mult<T >(H,v,beta,0,l,l+2,1); | ||||||
|  |     ///Accumulate eigenvector | ||||||
|  |     Householder_mult<T >(P,v,beta,0,l,l+2,1); | ||||||
|  |     int sw = 0;      ///Are we on the last row? | ||||||
|  |     for(int k=l;k<N-2;k++){ | ||||||
|  |       x = H[k+1][k]; | ||||||
|  |       y = H[k+2][k]; | ||||||
|  |       z = (T)0.0; | ||||||
|  |       if(k+3 <= N-1){ | ||||||
|  | 	z = H[k+3][k]; | ||||||
|  |       } else{ | ||||||
|  | 	sw = 1;  | ||||||
|  | 	v[2] = (T)0.0; | ||||||
|  |       } | ||||||
|  |       ck[0] = x; ck[1] = y; ck[2] = z; | ||||||
|  |       normalize(ck); | ||||||
|  |       Householder_vector<T >(ck, 0, 2-sw, v, beta); | ||||||
|  |       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0); | ||||||
|  |       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1); | ||||||
|  |       ///Accumulate eigenvector | ||||||
|  |       Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1); | ||||||
|  |     } | ||||||
|  |     it++; | ||||||
|  |     tot_it++; | ||||||
|  |   }while(N > 1); | ||||||
|  |   N = evals.size(); | ||||||
|  |   ///Annoying - UT solves in reverse order; | ||||||
|  |   DenseVector<T> tmp; Resize(tmp,N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     tmp[i] = evals[N-i-1]; | ||||||
|  |   }  | ||||||
|  |   evals = tmp; | ||||||
|  |   UTeigenvectors(H, trows, evals, evecs); | ||||||
|  |   for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);} | ||||||
|  |   return tot_it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||||
|  | { | ||||||
|  |   /** | ||||||
|  |   Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. | ||||||
|  |   H = | ||||||
|  |   x  x  0  0  0  0 | ||||||
|  |   x  x  x  0  0  0 | ||||||
|  |   0  x  x  x  0  0 | ||||||
|  |   0  0  x  x  x  0 | ||||||
|  |   0  0  0  x  x  x | ||||||
|  |   0  0  0  0  x  x | ||||||
|  |   Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/ | ||||||
|  |   return my_Wilkinson(Hin, evals, evecs, small, small); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol) | ||||||
|  | { | ||||||
|  |   int N; SizeSquare(Hin,N); | ||||||
|  |   int M = N; | ||||||
|  |  | ||||||
|  |   ///I don't want to modify the input but matricies must be passed by reference | ||||||
|  |   //Scale a matrix by its "norm" | ||||||
|  |   //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm); | ||||||
|  |   DenseMatrix<T> H;  H = Hin; | ||||||
|  |    | ||||||
|  |   RealD Hnorm = abs(Norm(Hin)); | ||||||
|  |   H = H * (1.0 / Hnorm); | ||||||
|  |  | ||||||
|  |   // TODO use openmp and memset | ||||||
|  |   Fill(evals,0); | ||||||
|  |   Fill(evecs,0); | ||||||
|  |  | ||||||
|  |   T s, t, x = 0, y = 0, z = 0; | ||||||
|  |   T u, d; | ||||||
|  |   T apd, amd, bc; | ||||||
|  |   DenseVector<T> p; Resize(p,N); Fill(p,0); | ||||||
|  |  | ||||||
|  |   T nrm = Norm(H);    ///DenseMatrix Norm | ||||||
|  |   int n, m; | ||||||
|  |   int e = 0; | ||||||
|  |   int it = 0; | ||||||
|  |   int tot_it = 0; | ||||||
|  |   int l = 0; | ||||||
|  |   int r = 0; | ||||||
|  |   DenseMatrix<T> P; Resize(P,N,N); | ||||||
|  |   Unity(P); | ||||||
|  |   DenseVector<int> trows(N, 0); | ||||||
|  |   /// Check if the matrix is really symm tridiag | ||||||
|  |   RealD sth = 0; | ||||||
|  |   for(int j = 0; j < N; ++j) | ||||||
|  |   { | ||||||
|  |     for(int i = j + 2; i < N; ++i) | ||||||
|  |     { | ||||||
|  |       if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) | ||||||
|  |       { | ||||||
|  | 	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; | ||||||
|  | 	std::cout << "Warning tridiagonalize and call again" << std::endl; | ||||||
|  |         // exit(1); // see what is going on | ||||||
|  |         //return; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   do{ | ||||||
|  |     do{ | ||||||
|  |       //Jasper | ||||||
|  |       //Check if the subdiagonal term is small enough (<small) | ||||||
|  |       //if true then it is converged. | ||||||
|  |       //check start from H.dim - e - 1 | ||||||
|  |       //How to deal with more than 2 are converged? | ||||||
|  |       //What if Chop_symm_subdiag return something int the middle? | ||||||
|  |       //-------------- | ||||||
|  |       l = Chop_symm_subdiag(H,nrm, e, small); | ||||||
|  |       r = 0;    ///May have converged on more than one eval | ||||||
|  |       //Jasper | ||||||
|  |       //In this case | ||||||
|  |       // x  x  0  0  0  0 | ||||||
|  |       // x  x  x  0  0  0 | ||||||
|  |       // 0  x  x  x  0  0 | ||||||
|  |       // 0  0  x  x  x  0 | ||||||
|  |       // 0  0  0  x  x  0 | ||||||
|  |       // 0  0  0  0  0  x  <- l | ||||||
|  |       //-------------- | ||||||
|  |       ///Single eval | ||||||
|  |       if(l == N - 1) | ||||||
|  |       { | ||||||
|  |         evals[e] = H[l][l]; | ||||||
|  |         N--; | ||||||
|  |         e++; | ||||||
|  |         r++; | ||||||
|  |         it = 0; | ||||||
|  |       } | ||||||
|  |       //Jasper | ||||||
|  |       // x  x  0  0  0  0 | ||||||
|  |       // x  x  x  0  0  0 | ||||||
|  |       // 0  x  x  x  0  0 | ||||||
|  |       // 0  0  x  x  0  0 | ||||||
|  |       // 0  0  0  0  x  x  <- l | ||||||
|  |       // 0  0  0  0  x  x | ||||||
|  |       //-------------- | ||||||
|  |       ///RealD eval | ||||||
|  |       if(l == N - 2) | ||||||
|  |       { | ||||||
|  |         trows[l + 1] = 1;    ///Needed for UTSolve | ||||||
|  |         apd = H[l][l] + H[l + 1][ l + 1]; | ||||||
|  |         amd = H[l][l] - H[l + 1][l + 1]; | ||||||
|  |         bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1]; | ||||||
|  |         evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc)); | ||||||
|  |         evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc)); | ||||||
|  |         N -= 2; | ||||||
|  |         e += 2; | ||||||
|  |         r++; | ||||||
|  |         it = 0; | ||||||
|  |       } | ||||||
|  |     }while(r > 0); | ||||||
|  |     //Jasper | ||||||
|  |     //Already converged | ||||||
|  |     //-------------- | ||||||
|  |     if(N == 0) break; | ||||||
|  |  | ||||||
|  |     DenseVector<T> ck,v; Resize(ck,2); Resize(v,2); | ||||||
|  |  | ||||||
|  |     for(int m = N - 3; m >= l; m--) | ||||||
|  |     { | ||||||
|  |       ///Starting vector essentially random shift. | ||||||
|  |       if(it%10 == 0 && N >= 3 && it > 0) | ||||||
|  |       { | ||||||
|  |         t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); | ||||||
|  |         x = H[m][m] - t; | ||||||
|  |         z = H[m + 1][m]; | ||||||
|  |       } else { | ||||||
|  |       ///Starting vector implicit Q theorem | ||||||
|  |         d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; | ||||||
|  |         t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]  | ||||||
|  | 	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); | ||||||
|  |         x = H[m][m] - t; | ||||||
|  |         z = H[m + 1][m]; | ||||||
|  |       } | ||||||
|  |       //Jasper | ||||||
|  |       //why it is here???? | ||||||
|  |       //----------------------- | ||||||
|  |       if(m == l) | ||||||
|  |         break; | ||||||
|  |  | ||||||
|  |       u = abs(H[m][m - 1]) * (abs(y) + abs(z)); | ||||||
|  |       d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); | ||||||
|  |       if ((T)abs(u + d) == (T)abs(d)) | ||||||
|  |       { | ||||||
|  |         l = m; | ||||||
|  |         break; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     //Jasper | ||||||
|  |     if(it > 1000000) | ||||||
|  |     { | ||||||
|  |       std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; | ||||||
|  |       std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||||
|  |       exit(1); | ||||||
|  |     } | ||||||
|  |     // | ||||||
|  |     T s, c; | ||||||
|  |     Givens_calc<T>(x, z, c, s); | ||||||
|  |     Givens_mult<T>(H, l, l + 1, c, -s, 0); | ||||||
|  |     Givens_mult<T>(H, l, l + 1, c,  s, 1); | ||||||
|  |     Givens_mult<T>(P, l, l + 1, c,  s, 1); | ||||||
|  |     // | ||||||
|  |     for(int k = l; k < N - 2; ++k) | ||||||
|  |     { | ||||||
|  |       x = H.A[k + 1][k]; | ||||||
|  |       z = H.A[k + 2][k]; | ||||||
|  |       Givens_calc<T>(x, z, c, s); | ||||||
|  |       Givens_mult<T>(H, k + 1, k + 2, c, -s, 0); | ||||||
|  |       Givens_mult<T>(H, k + 1, k + 2, c,  s, 1); | ||||||
|  |       Givens_mult<T>(P, k + 1, k + 2, c,  s, 1); | ||||||
|  |     } | ||||||
|  |     it++; | ||||||
|  |     tot_it++; | ||||||
|  |   }while(N > 1); | ||||||
|  |  | ||||||
|  |   N = evals.size(); | ||||||
|  |   ///Annoying - UT solves in reverse order; | ||||||
|  |   DenseVector<T> tmp(N); | ||||||
|  |   for(int i = 0; i < N; ++i) | ||||||
|  |     tmp[i] = evals[N-i-1]; | ||||||
|  |   evals = tmp; | ||||||
|  |   // | ||||||
|  |   UTeigenvectors(H, trows, evals, evecs); | ||||||
|  |   //UTSymmEigenvectors(H, trows, evals, evecs); | ||||||
|  |   for(int i = 0; i < evals.size(); ++i) | ||||||
|  |   { | ||||||
|  |     evecs[i] = P * evecs[i]; | ||||||
|  |     normalize(evecs[i]); | ||||||
|  |     evals[i] = evals[i] * Hnorm; | ||||||
|  |   } | ||||||
|  |   // // FIXME this is to test | ||||||
|  |   // Hin.write("evecs3", evecs); | ||||||
|  |   // Hin.write("evals3", evals); | ||||||
|  |   // // check rsd | ||||||
|  |   // for(int i = 0; i < M; i++) { | ||||||
|  |   //   vector<T> Aevec = Hin * evecs[i]; | ||||||
|  |   //   RealD norm2(0.); | ||||||
|  |   //   for(int j = 0; j < M; j++) { | ||||||
|  |   //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); | ||||||
|  |   //   } | ||||||
|  |   // } | ||||||
|  |   return tot_it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |   turn a matrix A = | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   into | ||||||
|  |   x  x  x  x  x | ||||||
|  |   x  x  x  x  x | ||||||
|  |   0  x  x  x  x | ||||||
|  |   0  0  x  x  x | ||||||
|  |   0  0  0  x  x | ||||||
|  |   with householder rotations | ||||||
|  |   Slow. | ||||||
|  |   */ | ||||||
|  |   int N ; SizeSquare(A,N); | ||||||
|  |   DenseVector<T > p; Resize(p,N); Fill(p,0); | ||||||
|  |  | ||||||
|  |   for(int k=start;k<N-2;k++){ | ||||||
|  |     //cerr << "hess" << k << std::endl; | ||||||
|  |     DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); | ||||||
|  |     for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column | ||||||
|  |     normalize(ck);    ///Normalization cancels in PHP anyway | ||||||
|  |     T beta; | ||||||
|  |     Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector | ||||||
|  |     Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA | ||||||
|  |     Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H | ||||||
|  |     ///Accumulate eigenvector | ||||||
|  |     Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H | ||||||
|  |   } | ||||||
|  |   /*for(int l=0;l<N-2;l++){ | ||||||
|  |     for(int k=l+2;k<N;k++){ | ||||||
|  |     A(0,k,l); | ||||||
|  |     } | ||||||
|  |     }*/ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||||
|  | ///Tridiagonalize a matrix | ||||||
|  |   int N; SizeSquare(A,N); | ||||||
|  |   Hess(A,Q,start); | ||||||
|  |   /*for(int l=0;l<N-2;l++){ | ||||||
|  |     for(int k=l+2;k<N;k++){ | ||||||
|  |     A(0,l,k); | ||||||
|  |     } | ||||||
|  |     }*/ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | void ForceTridiagonal(DenseMatrix<T> &A){ | ||||||
|  | ///Tridiagonalize a matrix | ||||||
|  |   int N ; SizeSquare(A,N); | ||||||
|  |   for(int l=0;l<N-2;l++){ | ||||||
|  |     for(int k=l+2;k<N;k++){ | ||||||
|  |       A[l][k]=0; | ||||||
|  |       A[k][l]=0; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  |   ///Solve a symmetric eigensystem, not necessarily in tridiagonal form | ||||||
|  |   int N; SizeSquare(Ain,N); | ||||||
|  |   DenseMatrix<T > A; A = Ain; | ||||||
|  |   DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q); | ||||||
|  |   Tri(A,Q,0); | ||||||
|  |   int it = my_Wilkinson<T>(A, evals, evecs, small); | ||||||
|  |   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||||
|  |   return it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  |   return my_Wilkinson(Ain, evals, evecs, small); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  |   return my_SymmEigensystem(Ain, evals, evecs, small); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||||
|  | ///Solve a general eigensystem, not necessarily in tridiagonal form | ||||||
|  |   int N = Ain.dim; | ||||||
|  |   DenseMatrix<T > A(N); A = Ain; | ||||||
|  |   DenseMatrix<T > Q(N);Q.Unity(); | ||||||
|  |   Hess(A,Q,0); | ||||||
|  |   int it = QReigensystem<T>(A, evals, evecs, small); | ||||||
|  |   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||||
|  |   return it; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
							
								
								
									
										242
									
								
								lib/algorithms/densematrix/Householder.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								lib/algorithms/densematrix/Householder.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,242 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/Householder.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef HOUSEHOLDER_H | ||||||
|  | #define HOUSEHOLDER_H | ||||||
|  |  | ||||||
|  | #define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||||
|  | #define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||||
|  | #define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||||
|  |  | ||||||
|  | #include <cstdlib> | ||||||
|  | #include <string> | ||||||
|  | #include <cmath> | ||||||
|  | #include <iostream> | ||||||
|  | #include <sstream> | ||||||
|  | #include <stdexcept> | ||||||
|  | #include <fstream> | ||||||
|  | #include <complex> | ||||||
|  | #include <algorithm> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  | /** Comparison function for finding the max element in a vector **/ | ||||||
|  | template <class T> bool cf(T i, T j) {  | ||||||
|  |   return abs(i) < abs(j);  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**  | ||||||
|  | 	Calculate a real Givens angle  | ||||||
|  |  **/ | ||||||
|  | template <class T> inline void Givens_calc(T y, T z, T &c, T &s){ | ||||||
|  |  | ||||||
|  |   RealD mz = (RealD)abs(z); | ||||||
|  |    | ||||||
|  |   if(mz==0.0){ | ||||||
|  |     c = 1; s = 0; | ||||||
|  |   } | ||||||
|  |   if(mz >= (RealD)abs(y)){ | ||||||
|  |     T t = -y/z; | ||||||
|  |     s = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||||
|  |     c = s * t; | ||||||
|  |   } else { | ||||||
|  |     T t = -z/y; | ||||||
|  |     c = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||||
|  |     s = c * t; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir) | ||||||
|  | { | ||||||
|  |   int q ; SizeSquare(A,q); | ||||||
|  |  | ||||||
|  |   if(dir == 0){ | ||||||
|  |     for(int j=0;j<q;j++){ | ||||||
|  |       T nu = A[i][j]; | ||||||
|  |       T w  = A[k][j]; | ||||||
|  |       A[i][j] = (c*nu + s*w); | ||||||
|  |       A[k][j] = (-s*nu + c*w); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if(dir == 1){ | ||||||
|  |     for(int j=0;j<q;j++){ | ||||||
|  |       T nu = A[j][i]; | ||||||
|  |       T w  = A[j][k]; | ||||||
|  |       A[j][i] = (c*nu - s*w); | ||||||
|  |       A[j][k] = (s*nu + c*w); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	from input = x; | ||||||
|  | 	Compute the complex Householder vector, v, such that | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  |  | ||||||
|  | 	P | x |    | x | k = 0 | ||||||
|  | 	| x |    | 0 |  | ||||||
|  | 	| x | =  | 0 | | ||||||
|  | 	| x |    | 0 | j = 3 | ||||||
|  | 	| x |	   | x | | ||||||
|  |  | ||||||
|  | 	These are the "Unreduced" Householder vectors. | ||||||
|  |  | ||||||
|  |  **/ | ||||||
|  | template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta) | ||||||
|  | { | ||||||
|  |   int N ; Size(input,N); | ||||||
|  |   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> ); | ||||||
|  |  | ||||||
|  |   if(abs(m) > 0.0){ | ||||||
|  |     T alpha = 0; | ||||||
|  |  | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = input[i]/m; | ||||||
|  |       alpha = alpha + v[i]*conj(v[i]); | ||||||
|  |     } | ||||||
|  |     alpha = sqrt(alpha); | ||||||
|  |     beta = (T)1.0/(alpha*(alpha + abs(v[k]) )); | ||||||
|  |  | ||||||
|  |     if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha; | ||||||
|  |     else                 v[k] = -alpha; | ||||||
|  |   } else{ | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = 0.0; | ||||||
|  |     }  | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	from input = x; | ||||||
|  | 	Compute the complex Householder vector, v, such that | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  |  | ||||||
|  | 	Px = alpha*e_dir | ||||||
|  |  | ||||||
|  | 	These are the "Unreduced" Householder vectors. | ||||||
|  |  | ||||||
|  |  **/ | ||||||
|  |  | ||||||
|  | template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta) | ||||||
|  | { | ||||||
|  |   int N = input.size(); | ||||||
|  |   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); | ||||||
|  |    | ||||||
|  |   if(abs(m) > 0.0){ | ||||||
|  |     T alpha = 0; | ||||||
|  |  | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = input[i]/m; | ||||||
|  |       alpha = alpha + v[i]*conj(v[i]); | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     alpha = sqrt(alpha); | ||||||
|  |     beta = 1.0/(alpha*(alpha + abs(v[dir]) )); | ||||||
|  | 	 | ||||||
|  |     if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; | ||||||
|  |     else                  v[dir] = -alpha; | ||||||
|  |   }else{ | ||||||
|  |     for(int i=k; i<j+1; i++){ | ||||||
|  |       v[i] = 0.0; | ||||||
|  |     }  | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	Compute the product PA if trans = 0 | ||||||
|  | 	AP if trans = 1 | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  | 	start at element l of matrix A | ||||||
|  | 	v is of length j - k + 1 of v are nonzero | ||||||
|  |  **/ | ||||||
|  |  | ||||||
|  | template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans) | ||||||
|  | { | ||||||
|  |   int N ; SizeSquare(A,N); | ||||||
|  |  | ||||||
|  |   if(abs(beta) > 0.0){ | ||||||
|  |     for(int p=l; p<N; p++){ | ||||||
|  |       T s = 0; | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p]; | ||||||
|  | 	s *= beta; | ||||||
|  | 	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);} | ||||||
|  |       } else { | ||||||
|  | 	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];} | ||||||
|  | 	s *= beta; | ||||||
|  | 	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);} | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  | 	Compute the product PA if trans = 0 | ||||||
|  | 	AP if trans = 1 | ||||||
|  | 	P = (I - b v transpose(v) ) | ||||||
|  | 	b = 2/v.v | ||||||
|  | 	start at element l of matrix A | ||||||
|  | 	v is of length j - k + 1 of v are nonzero | ||||||
|  | 	A is tridiagonal | ||||||
|  |  **/ | ||||||
|  | template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans) | ||||||
|  | { | ||||||
|  |   if(abs(beta) > 0.0){ | ||||||
|  |  | ||||||
|  |     int N ; SizeSquare(A,N); | ||||||
|  |  | ||||||
|  |     DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);  | ||||||
|  |  | ||||||
|  |     T s; | ||||||
|  |     for(int p=l; p<M; p++){ | ||||||
|  |       s = 0; | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p]; | ||||||
|  |       }else{ | ||||||
|  | 	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i]; | ||||||
|  |       } | ||||||
|  |       s = beta*s; | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k]; | ||||||
|  |       }else{ | ||||||
|  | 	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     for(int p=l; p<M; p++){ | ||||||
|  |       if(trans==0){ | ||||||
|  | 	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p]; | ||||||
|  |       }else{ | ||||||
|  | 	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i]; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | } | ||||||
|  | #endif | ||||||
| @@ -33,8 +33,6 @@ directory | |||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| // Block conjugate gradient. Dimension zero should be the block direction | // Block conjugate gradient. Dimension zero should be the block direction | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -42,286 +40,25 @@ template <class Field> | |||||||
| class BlockConjugateGradient : public OperatorFunction<Field> { | class BlockConjugateGradient : public OperatorFunction<Field> { | ||||||
|  public: |  public: | ||||||
|  |  | ||||||
|  |  | ||||||
|   typedef typename Field::scalar_type scomplex; |   typedef typename Field::scalar_type scomplex; | ||||||
|  |  | ||||||
|   int blockDim ; |   const int blockDim = 0; | ||||||
|   int Nblock; |  | ||||||
|  |  | ||||||
|   BlockCGtype CGtype; |   int Nblock; | ||||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. |   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||||
|                            // Defaults true. |                            // Defaults true. | ||||||
|   RealD Tolerance; |   RealD Tolerance; | ||||||
|   Integer MaxIterations; |   Integer MaxIterations; | ||||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion |   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||||
|    |    | ||||||
|   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) |   BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) |     : Tolerance(tol), | ||||||
|   {}; |     MaxIterations(maxit), | ||||||
|  |     ErrorOnNoConverge(err_on_no_conv){}; | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Thin QR factorisation (google it) |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, |  | ||||||
| 		 Eigen::MatrixXcd &C, |  | ||||||
| 		 Eigen::MatrixXcd &Cinv, |  | ||||||
| 		 Field & Q, |  | ||||||
| 		 const Field & R) |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   //Dimensions |  | ||||||
|   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock |  | ||||||
|   // |  | ||||||
|   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) |  | ||||||
|   // |  | ||||||
|   //   Q  C = R => Q = R C^{-1} |  | ||||||
|   // |  | ||||||
|   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  |  | ||||||
|   // |  | ||||||
|   // Set C = L^{dag}, and then Q^dag Q = ident  |  | ||||||
|   // |  | ||||||
|   // Checks: |  | ||||||
|   // Cdag C = Rdag R ; passes. |  | ||||||
|   // QdagQ  = 1      ; passes |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); |  | ||||||
|  |  | ||||||
|   // Force manifest hermitian to avoid rounding related |  | ||||||
|   m_rr = 0.5*(m_rr+m_rr.adjoint()); |  | ||||||
|  |  | ||||||
| #if 0 |  | ||||||
|   std::cout << " Calling Cholesky  ldlt on m_rr "  << m_rr <<std::endl; |  | ||||||
|   Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();  |  | ||||||
|   std::cout << " Called Cholesky  ldlt on m_rr "  << L_ldlt <<std::endl; |  | ||||||
|   auto  D_ldlt = m_rr.ldlt().vectorD();  |  | ||||||
|   std::cout << " Called Cholesky  ldlt on m_rr "  << D_ldlt <<std::endl; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
|   //  std::cout << " Calling Cholesky  llt on m_rr "  <<std::endl; |  | ||||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  |  | ||||||
|   //  std::cout << " Called Cholesky  llt on m_rr "  << L <<std::endl; |  | ||||||
|   C    = L.adjoint(); |  | ||||||
|   Cinv = C.inverse(); |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Q = R C^{-1} |  | ||||||
|   // |  | ||||||
|   // Q_j  = R_i Cinv(i,j)  |  | ||||||
|   // |  | ||||||
|   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); |  | ||||||
| } |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Call one of several implementations |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
| { | { | ||||||
|   if ( CGtype == BlockCGrQ ) { |   int Orthog = 0; // First dimension is block dim | ||||||
|     BlockCGrQsolve(Linop,Src,Psi); |  | ||||||
|   } else if (CGtype == BlockCG ) { |  | ||||||
|     BlockCGsolve(Linop,Src,Psi); |  | ||||||
|   } else if (CGtype == CGmultiRHS ) { |  | ||||||
|     CGmultiRHSsolve(Linop,Src,Psi); |  | ||||||
|   } else { |  | ||||||
|     assert(0); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////// |  | ||||||
| // BlockCGrQ implementation: |  | ||||||
| //-------------------------- |  | ||||||
| // X is guess/Solution |  | ||||||
| // B is RHS |  | ||||||
| // Solve A X_i = B_i    ;        i refers to Nblock index |  | ||||||
| //////////////////////////////////////////////////////////////////////////// |  | ||||||
| void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   Nblock = B._grid->_fdimensions[Orthog]; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |  | ||||||
|  |  | ||||||
|   X.checkerboard = B.checkerboard; |  | ||||||
|   conformable(X, B); |  | ||||||
|  |  | ||||||
|   Field tmp(B); |  | ||||||
|   Field Q(B); |  | ||||||
|   Field D(B); |  | ||||||
|   Field Z(B); |  | ||||||
|   Field AD(B); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   // Initial residual computation & set up |  | ||||||
|   std::vector<RealD> residuals(Nblock); |  | ||||||
|   std::vector<RealD> ssq(Nblock); |  | ||||||
|  |  | ||||||
|   sliceNorm(ssq,B,Orthog); |  | ||||||
|   RealD sssum=0; |  | ||||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,B,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,X,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   /************************************************************************ |  | ||||||
|    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) |  | ||||||
|    ************************************************************************ |  | ||||||
|    * Dimensions: |  | ||||||
|    * |  | ||||||
|    *   X,B==(Nferm x Nblock) |  | ||||||
|    *   A==(Nferm x Nferm) |  | ||||||
|    *   |  | ||||||
|    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site |  | ||||||
|    *  |  | ||||||
|    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) |  | ||||||
|    * for k:  |  | ||||||
|    *   Z  = AD |  | ||||||
|    *   M  = [D^dag Z]^{-1} |  | ||||||
|    *   X  = X + D MC |  | ||||||
|    *   QS = Q - ZM |  | ||||||
|    *   D  = Q + D S^dag |  | ||||||
|    *   C  = S C |  | ||||||
|    */ |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   // Initial block: initial search dir is guess |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; |  | ||||||
|  |  | ||||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) |  | ||||||
|  |  | ||||||
|   Linop.HermOp(X, AD); |  | ||||||
|   tmp = B - AD;   |  | ||||||
|   //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; |  | ||||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); |  | ||||||
|   //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_C " << m_C<<std::endl; |  | ||||||
|   //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl; |  | ||||||
|   D=Q; |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   // Timers |  | ||||||
|   /////////////////////////////////////// |  | ||||||
|   GridStopWatch sliceInnerTimer; |  | ||||||
|   GridStopWatch sliceMaddTimer; |  | ||||||
|   GridStopWatch QRTimer; |  | ||||||
|   GridStopWatch MatrixTimer; |  | ||||||
|   GridStopWatch SolverTimer; |  | ||||||
|   SolverTimer.Start(); |  | ||||||
|  |  | ||||||
|   int k; |  | ||||||
|   for (k = 1; k <= MaxIterations; k++) { |  | ||||||
|  |  | ||||||
|     //3. Z  = AD |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     Linop.HermOp(D, Z);       |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|     //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl; |  | ||||||
|  |  | ||||||
|     //4. M  = [D^dag Z]^{-1} |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     m_M       = m_DZ.inverse(); |  | ||||||
|     //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl; |  | ||||||
|      |  | ||||||
|     //5. X  = X + D MC |  | ||||||
|     m_tmp     = m_M * m_C; |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     //6. QS = Q - ZM |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|     QRTimer.Start(); |  | ||||||
|     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); |  | ||||||
|     QRTimer.Stop(); |  | ||||||
|      |  | ||||||
|     //7. D  = Q + D S^dag |  | ||||||
|     m_tmp = m_S.adjoint(); |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     //8. C  = S C |  | ||||||
|     m_C = m_S*m_C; |  | ||||||
|      |  | ||||||
|     /********************* |  | ||||||
|      * convergence monitor |  | ||||||
|      ********************* |  | ||||||
|      */ |  | ||||||
|     m_rr = m_C.adjoint() * m_C; |  | ||||||
|  |  | ||||||
|     RealD max_resid=0; |  | ||||||
|     RealD rrsum=0; |  | ||||||
|     RealD rr; |  | ||||||
|  |  | ||||||
|     for(int b=0;b<Nblock;b++) { |  | ||||||
|       rrsum+=real(m_rr(b,b)); |  | ||||||
|       rr = real(m_rr(b,b))/ssq[b]; |  | ||||||
|       if ( rr > max_resid ) max_resid = rr; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum |  | ||||||
| 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; |  | ||||||
|  |  | ||||||
|     if ( max_resid < Tolerance*Tolerance ) {  |  | ||||||
|  |  | ||||||
|       SolverTimer.Stop(); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; |  | ||||||
|  |  | ||||||
|       for(int b=0;b<Nblock;b++){ |  | ||||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " |  | ||||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |  | ||||||
|  |  | ||||||
|       Linop.HermOp(X, AD); |  | ||||||
|       AD = AD-B; |  | ||||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; |  | ||||||
| 	     |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; |  | ||||||
|  |  | ||||||
|   if (ErrorOnNoConverge) assert(0); |  | ||||||
|   IterationsToComplete = k; |  | ||||||
| } |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  |  | ||||||
| { |  | ||||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption |  | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
| @@ -425,9 +162,8 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi | |||||||
|      ********************* |      ********************* | ||||||
|      */ |      */ | ||||||
|     RealD max_resid=0; |     RealD max_resid=0; | ||||||
|     RealD rr; |  | ||||||
|     for(int b=0;b<Nblock;b++){ |     for(int b=0;b<Nblock;b++){ | ||||||
|       rr = real(m_rr(b,b))/ssq[b]; |       RealD rr = real(m_rr(b,b))/ssq[b]; | ||||||
|       if ( rr > max_resid ) max_resid = rr; |       if ( rr > max_resid ) max_resid = rr; | ||||||
|     } |     } | ||||||
|      |      | ||||||
| @@ -437,14 +173,13 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi | |||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; |       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||||
|       for(int b=0;b<Nblock;b++){ |       for(int b=0;b<Nblock;b++){ | ||||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; |  | ||||||
|       } |       } | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|       Linop.HermOp(Psi, AP); |       Linop.HermOp(Psi, AP); | ||||||
|       AP = AP-Src; |       AP = AP-Src; | ||||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; |       std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
| @@ -462,13 +197,35 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi | |||||||
|   if (ErrorOnNoConverge) assert(0); |   if (ErrorOnNoConverge) assert(0); | ||||||
|   IterationsToComplete = k; |   IterationsToComplete = k; | ||||||
| } | } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction | // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||||
| // Use this for spread out across nodes |  | ||||||
| ////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////// | ||||||
| void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | template <class Field> | ||||||
|  | class MultiRHSConjugateGradient : public OperatorFunction<Field> { | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |   typedef typename Field::scalar_type scomplex; | ||||||
|  |  | ||||||
|  |   const int blockDim = 0; | ||||||
|  |  | ||||||
|  |   int Nblock; | ||||||
|  |   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||||
|  |                            // Defaults true. | ||||||
|  |   RealD Tolerance; | ||||||
|  |   Integer MaxIterations; | ||||||
|  |   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||||
|  |    | ||||||
|  |    MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||||
|  |     : Tolerance(tol), | ||||||
|  |     MaxIterations(maxit), | ||||||
|  |     ErrorOnNoConverge(err_on_no_conv){}; | ||||||
|  |  | ||||||
|  | void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
| { | { | ||||||
|   int Orthog = blockDim; // First dimension is block dim |   int Orthog = 0; // First dimension is block dim | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
| @@ -528,10 +285,12 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field & | |||||||
|     MatrixTimer.Stop(); |     MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|     // Alpha |     // Alpha | ||||||
|  |     //    sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog); | ||||||
|     sliceInnerTimer.Start(); |     sliceInnerTimer.Start(); | ||||||
|     sliceInnerProductVector(v_pAp,P,AP,Orthog); |     sliceInnerProductVector(v_pAp,P,AP,Orthog); | ||||||
|     sliceInnerTimer.Stop(); |     sliceInnerTimer.Stop(); | ||||||
|     for(int b=0;b<Nblock;b++){ |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       //      std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl; | ||||||
|       v_alpha[b] = v_rr[b]/real(v_pAp[b]); |       v_alpha[b] = v_rr[b]/real(v_pAp[b]); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -573,7 +332,7 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field & | |||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; |       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; | ||||||
|       for(int b=0;b<Nblock;b++){ |       for(int b=0;b<Nblock;b++){ | ||||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||||
|       } |       } | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
| @@ -599,8 +358,9 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field & | |||||||
|   if (ErrorOnNoConverge) assert(0); |   if (ErrorOnNoConverge) assert(0); | ||||||
|   IterationsToComplete = k; |   IterationsToComplete = k; | ||||||
| } | } | ||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|         MaxIterations(maxit), |         MaxIterations(maxit), | ||||||
|         ErrorOnNoConverge(err_on_no_conv){}; |         ErrorOnNoConverge(err_on_no_conv){}; | ||||||
|  |  | ||||||
|   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) { |   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, | ||||||
|  |                   Field &psi) { | ||||||
|     psi.checkerboard = src.checkerboard; |     psi.checkerboard = src.checkerboard; | ||||||
|     conformable(psi, src); |     conformable(psi, src); | ||||||
|  |  | ||||||
| @@ -78,12 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     cp = a; |     cp = a; | ||||||
|     ssq = norm2(src); |     ssq = norm2(src); | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   src " << ssq << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   src " << ssq << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:    mp " << d << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:    mp " << d << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   mmp " << b << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   mmp " << b << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:  cp,r " << cp << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:     p " << a << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:     p " << a << std::endl; | ||||||
|  |  | ||||||
|     RealD rsq = Tolerance * Tolerance * ssq; |     RealD rsq = Tolerance * Tolerance * ssq; | ||||||
|  |  | ||||||
| @@ -92,7 +92,7 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|       return; |       return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(8) |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; |               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; | ||||||
|  |  | ||||||
|     GridStopWatch LinalgTimer; |     GridStopWatch LinalgTimer; | ||||||
| @@ -123,11 +123,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|       p = p * b + r; |       p = p * b + r; | ||||||
|  |  | ||||||
|       LinalgTimer.Stop(); |       LinalgTimer.Stop(); | ||||||
|  |  | ||||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k |       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||||
|                 << " residual " << cp << " target " << rsq << std::endl; |                 << " residual " << cp << " target " << rsq << std::endl; | ||||||
|       std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl; |  | ||||||
|       std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl; |  | ||||||
|  |  | ||||||
|       // Stopping condition |       // Stopping condition | ||||||
|       if (cp <= rsq) { |       if (cp <= rsq) { | ||||||
| @@ -135,6 +132,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|         Linop.HermOpAndNorm(psi, mmp, d, qq); |         Linop.HermOpAndNorm(psi, mmp, d, qq); | ||||||
|         p = mmp - src; |         p = mmp - src; | ||||||
|  |  | ||||||
|  |         RealD mmpnorm = sqrt(norm2(mmp)); | ||||||
|  |         RealD psinorm = sqrt(norm2(psi)); | ||||||
|         RealD srcnorm = sqrt(norm2(src)); |         RealD srcnorm = sqrt(norm2(src)); | ||||||
|         RealD resnorm = sqrt(norm2(p)); |         RealD resnorm = sqrt(norm2(p)); | ||||||
|         RealD true_residual = resnorm / srcnorm; |         RealD true_residual = resnorm / srcnorm; | ||||||
| @@ -158,10 +157,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     } |     } | ||||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" |     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||||
|               << std::endl; |               << std::endl; | ||||||
|  |  | ||||||
|     if (ErrorOnNoConverge) assert(0); |     if (ErrorOnNoConverge) assert(0); | ||||||
|     IterationsToComplete = k; |     IterationsToComplete = k; | ||||||
|  |  | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,256 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Christopher Kelly <ckelly@phys.columbia.edu> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H |  | ||||||
| #define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|   template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  |  | ||||||
|   class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> { |  | ||||||
|   public: |  | ||||||
|     bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. |  | ||||||
|     // Defaults true. |  | ||||||
|     RealD Tolerance; |  | ||||||
|     Integer MaxIterations; |  | ||||||
|     Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion |  | ||||||
|     Integer ReliableUpdatesPerformed; |  | ||||||
|  |  | ||||||
|     bool DoFinalCleanup; //Final DP cleanup, defaults to true |  | ||||||
|     Integer IterationsToCleanup; //Final DP cleanup step iterations |  | ||||||
|      |  | ||||||
|     LinearOperatorBase<FieldF> &Linop_f; |  | ||||||
|     LinearOperatorBase<FieldD> &Linop_d; |  | ||||||
|     GridBase* SinglePrecGrid; |  | ||||||
|     RealD Delta; //reliable update parameter |  | ||||||
|  |  | ||||||
|     //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single |  | ||||||
|     LinearOperatorBase<FieldF> *Linop_fallback; |  | ||||||
|     RealD fallback_transition_tol; |  | ||||||
|  |  | ||||||
|      |  | ||||||
|     ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true) |  | ||||||
|       : Tolerance(tol), |  | ||||||
|         MaxIterations(maxit), |  | ||||||
| 	Delta(_delta), |  | ||||||
| 	Linop_f(_Linop_f), |  | ||||||
| 	Linop_d(_Linop_d), |  | ||||||
| 	SinglePrecGrid(_sp_grid), |  | ||||||
|         ErrorOnNoConverge(err_on_no_conv), |  | ||||||
| 	DoFinalCleanup(true), |  | ||||||
| 	Linop_fallback(NULL) |  | ||||||
|     {}; |  | ||||||
|  |  | ||||||
|     void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){ |  | ||||||
|       Linop_fallback = &_Linop_fallback; |  | ||||||
|       fallback_transition_tol = _fallback_transition_tol;       |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     void operator()(const FieldD &src, FieldD &psi) { |  | ||||||
|       LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f; |  | ||||||
|       bool using_fallback = false; |  | ||||||
|        |  | ||||||
|       psi.checkerboard = src.checkerboard; |  | ||||||
|       conformable(psi, src); |  | ||||||
|  |  | ||||||
|       RealD cp, c, a, d, b, ssq, qq, b_pred; |  | ||||||
|  |  | ||||||
|       FieldD p(src); |  | ||||||
|       FieldD mmp(src); |  | ||||||
|       FieldD r(src); |  | ||||||
|  |  | ||||||
|       // Initial residual computation & set up |  | ||||||
|       RealD guess = norm2(psi); |  | ||||||
|       assert(std::isnan(guess) == 0); |  | ||||||
|      |  | ||||||
|       Linop_d.HermOpAndNorm(psi, mmp, d, b); |  | ||||||
|      |  | ||||||
|       r = src - mmp; |  | ||||||
|       p = r; |  | ||||||
|  |  | ||||||
|       a = norm2(p); |  | ||||||
|       cp = a; |  | ||||||
|       ssq = norm2(src); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl; |  | ||||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:   src " << ssq << std::endl; |  | ||||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:    mp " << d << std::endl; |  | ||||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:   mmp " << b << std::endl; |  | ||||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:  cp,r " << cp << std::endl; |  | ||||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:     p " << a << std::endl; |  | ||||||
|  |  | ||||||
|       RealD rsq = Tolerance * Tolerance * ssq; |  | ||||||
|  |  | ||||||
|       // Check if guess is really REALLY good :) |  | ||||||
|       if (cp <= rsq) { |  | ||||||
| 	std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n"; |  | ||||||
| 	std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; |  | ||||||
| 	return; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       //Single prec initialization |  | ||||||
|       FieldF r_f(SinglePrecGrid); |  | ||||||
|       r_f.checkerboard = r.checkerboard; |  | ||||||
|       precisionChange(r_f, r); |  | ||||||
|  |  | ||||||
|       FieldF psi_f(r_f); |  | ||||||
|       psi_f = zero; |  | ||||||
|  |  | ||||||
|       FieldF p_f(r_f); |  | ||||||
|       FieldF mmp_f(r_f); |  | ||||||
|  |  | ||||||
|       RealD MaxResidSinceLastRelUp = cp; //initial residual     |  | ||||||
|      |  | ||||||
|       std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
| 		<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; |  | ||||||
|  |  | ||||||
|       GridStopWatch LinalgTimer; |  | ||||||
|       GridStopWatch MatrixTimer; |  | ||||||
|       GridStopWatch SolverTimer; |  | ||||||
|  |  | ||||||
|       SolverTimer.Start(); |  | ||||||
|       int k = 0; |  | ||||||
|       int l = 0; |  | ||||||
|      |  | ||||||
|       for (k = 1; k <= MaxIterations; k++) { |  | ||||||
| 	c = cp; |  | ||||||
|  |  | ||||||
| 	MatrixTimer.Start(); |  | ||||||
| 	Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq); |  | ||||||
| 	MatrixTimer.Stop(); |  | ||||||
|  |  | ||||||
| 	LinalgTimer.Start(); |  | ||||||
|  |  | ||||||
| 	a = c / d; |  | ||||||
| 	b_pred = a * (a * qq - d) / c; |  | ||||||
|  |  | ||||||
| 	cp = axpy_norm(r_f, -a, mmp_f, r_f); |  | ||||||
| 	b = cp / c; |  | ||||||
|  |  | ||||||
| 	// Fuse these loops ; should be really easy |  | ||||||
| 	psi_f = a * p_f + psi_f; |  | ||||||
| 	//p_f = p_f * b + r_f; |  | ||||||
|  |  | ||||||
| 	LinalgTimer.Stop(); |  | ||||||
|  |  | ||||||
| 	std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k |  | ||||||
| 		  << " residual " << cp << " target " << rsq << std::endl; |  | ||||||
| 	std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl; |  | ||||||
| 	std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl; |  | ||||||
|  |  | ||||||
| 	if(cp > MaxResidSinceLastRelUp){ |  | ||||||
| 	  std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl; |  | ||||||
| 	  MaxResidSinceLastRelUp = cp; |  | ||||||
| 	} |  | ||||||
| 	   |  | ||||||
| 	// Stopping condition |  | ||||||
| 	if (cp <= rsq) { |  | ||||||
| 	  //Although not written in the paper, I assume that I have to add on the final solution |  | ||||||
| 	  precisionChange(mmp, psi_f); |  | ||||||
| 	  psi = psi + mmp; |  | ||||||
| 	 |  | ||||||
| 	 |  | ||||||
| 	  SolverTimer.Stop(); |  | ||||||
| 	  Linop_d.HermOpAndNorm(psi, mmp, d, qq); |  | ||||||
| 	  p = mmp - src; |  | ||||||
|  |  | ||||||
| 	  RealD srcnorm = sqrt(norm2(src)); |  | ||||||
| 	  RealD resnorm = sqrt(norm2(p)); |  | ||||||
| 	  RealD true_residual = resnorm / srcnorm; |  | ||||||
|  |  | ||||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl; |  | ||||||
| 	  std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; |  | ||||||
| 	  std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; |  | ||||||
| 	  std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; |  | ||||||
|  |  | ||||||
| 	  std::cout << GridLogMessage << "Time breakdown "<<std::endl; |  | ||||||
| 	  std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; |  | ||||||
| 	  std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; |  | ||||||
| 	  std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; |  | ||||||
|  |  | ||||||
| 	  IterationsToComplete = k;	 |  | ||||||
| 	  ReliableUpdatesPerformed = l; |  | ||||||
| 	   |  | ||||||
| 	  if(DoFinalCleanup){ |  | ||||||
| 	    //Do a final CG to cleanup |  | ||||||
| 	    std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n"; |  | ||||||
| 	    ConjugateGradient<FieldD> CG(Tolerance,MaxIterations); |  | ||||||
| 	    CG.ErrorOnNoConverge = ErrorOnNoConverge; |  | ||||||
| 	    CG(Linop_d,src,psi); |  | ||||||
| 	    IterationsToCleanup = CG.IterationsToComplete; |  | ||||||
| 	  } |  | ||||||
| 	  else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); |  | ||||||
|  |  | ||||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n"; |  | ||||||
| 	  return; |  | ||||||
| 	} |  | ||||||
| 	else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update |  | ||||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate " |  | ||||||
| 		    << cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n"; |  | ||||||
| 	  precisionChange(mmp, psi_f); |  | ||||||
| 	  psi = psi + mmp; |  | ||||||
|  |  | ||||||
| 	  Linop_d.HermOpAndNorm(psi, mmp, d, qq); |  | ||||||
| 	  r = src - mmp; |  | ||||||
|  |  | ||||||
| 	  psi_f = zero; |  | ||||||
| 	  precisionChange(r_f, r); |  | ||||||
| 	  cp = norm2(r); |  | ||||||
| 	  MaxResidSinceLastRelUp = cp; |  | ||||||
|  |  | ||||||
| 	  b = cp/c; |  | ||||||
| 	   |  | ||||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl; |  | ||||||
| 	   |  | ||||||
| 	  l = l+1; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence |  | ||||||
|  |  | ||||||
| 	if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){ |  | ||||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl; |  | ||||||
| 	  Linop_f_use = Linop_fallback; |  | ||||||
| 	  using_fallback = true; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	 |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge" |  | ||||||
| 		<< std::endl; |  | ||||||
|        |  | ||||||
|       if (ErrorOnNoConverge) assert(0); |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       ReliableUpdatesPerformed = l;       |  | ||||||
|     }     |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
							
								
								
									
										81
									
								
								lib/algorithms/iterative/EigenSort.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								lib/algorithms/iterative/EigenSort.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/EigenSort.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef GRID_EIGENSORT_H | ||||||
|  | #define GRID_EIGENSORT_H | ||||||
|  |  | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |     // Eigen sorter to begin with | ||||||
|  |     ///////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | template<class Field> | ||||||
|  | class SortEigen { | ||||||
|  |  private: | ||||||
|  |    | ||||||
|  | //hacking for testing for now | ||||||
|  |  private: | ||||||
|  |   static bool less_lmd(RealD left,RealD right){ | ||||||
|  |     return left > right; | ||||||
|  |   }   | ||||||
|  |   static bool less_pair(std::pair<RealD,Field const*>& left, | ||||||
|  |                         std::pair<RealD,Field const*>& right){ | ||||||
|  |     return left.first > (right.first); | ||||||
|  |   }   | ||||||
|  |    | ||||||
|  |    | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |   void push(DenseVector<RealD>& lmd, | ||||||
|  |             DenseVector<Field>& evec,int N) { | ||||||
|  |     DenseVector<Field> cpy(lmd.size(),evec[0]._grid); | ||||||
|  |     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; | ||||||
|  |      | ||||||
|  |     DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());     | ||||||
|  |     for(int i=0;i<lmd.size();++i) | ||||||
|  |       emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); | ||||||
|  |  | ||||||
|  |     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); | ||||||
|  |  | ||||||
|  |     typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); | ||||||
|  |     for(int i=0;i<N;++i){ | ||||||
|  |       lmd[i]=it->first; | ||||||
|  |       evec[i]=*(it->second); | ||||||
|  |       ++it; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   void push(DenseVector<RealD>& lmd,int N) { | ||||||
|  |     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); | ||||||
|  |   } | ||||||
|  |   bool saturated(RealD lmd, RealD thrs) { | ||||||
|  |     return fabs(lmd) > fabs(thrs); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,352 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Christoph Lehner <clehner@bnl.gov> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_LOCAL_COHERENCE_IRL_H |  | ||||||
| #define GRID_LOCAL_COHERENCE_IRL_H |  | ||||||
| namespace Grid {  |  | ||||||
| struct LanczosParams : Serializable { |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, |  | ||||||
| 				  ChebyParams, Cheby,/*Chebyshev*/ |  | ||||||
| 				  int, Nstop,    /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ |  | ||||||
| 				  int, Nk,       /*Vecs in Lanczos seek converge*/ |  | ||||||
| 				  int, Nm,       /*Total vecs in Lanczos include restart*/ |  | ||||||
| 				  RealD, resid,  /*residual*/ |  | ||||||
|  				  int, MaxIt,  |  | ||||||
| 				  RealD, betastp,  /* ? */ |  | ||||||
| 				  int, MinRes);    // Must restart |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| struct LocalCoherenceLanczosParams : Serializable { |  | ||||||
|  public: |  | ||||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, |  | ||||||
| 				  bool, doFine, |  | ||||||
| 				  bool, doFineRead, |  | ||||||
| 				  bool, doCoarse, |  | ||||||
| 	       			  bool, doCoarseRead, |  | ||||||
| 				  LanczosParams, FineParams, |  | ||||||
| 				  LanczosParams, CoarseParams, |  | ||||||
| 				  ChebyParams,   Smoother, |  | ||||||
| 				  RealD        , coarse_relax_tol, |  | ||||||
| 				  std::vector<int>, blockSize, |  | ||||||
| 				  std::string, config, |  | ||||||
| 				  std::vector < std::complex<double>  >, omega, |  | ||||||
| 				  RealD, mass, |  | ||||||
| 				  RealD, M5); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| // Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { |  | ||||||
| public: |  | ||||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; |  | ||||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; |  | ||||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj>          FineField; |  | ||||||
|  |  | ||||||
|   LinearOperatorBase<FineField> &_Linop; |  | ||||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; |  | ||||||
|  |  | ||||||
|   ProjectedHermOp(LinearOperatorBase<FineField>& linop,  Aggregation<Fobj,CComplex,nbasis> &aggregate) :  |  | ||||||
|     _Linop(linop), |  | ||||||
|     _Aggregate(aggregate)  {  }; |  | ||||||
|  |  | ||||||
|   void operator()(const CoarseField& in, CoarseField& out) { |  | ||||||
|  |  | ||||||
|     GridBase *FineGrid = _Aggregate.FineGrid; |  | ||||||
|     FineField fin(FineGrid); |  | ||||||
|     FineField fout(FineGrid); |  | ||||||
|  |  | ||||||
|     _Aggregate.PromoteFromSubspace(in,fin);    std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl; |  | ||||||
|     _Linop.HermOp(fin,fout);                   std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl; |  | ||||||
|     _Aggregate.ProjectToSubspace(out,fout);    std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl; |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { |  | ||||||
| public: |  | ||||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; |  | ||||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; |  | ||||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj>          FineField; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   OperatorFunction<FineField>   & _poly; |  | ||||||
|   LinearOperatorBase<FineField> &_Linop; |  | ||||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; |  | ||||||
|  |  | ||||||
|   ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,  |  | ||||||
| 			  Aggregation<Fobj,CComplex,nbasis> &aggregate) :  |  | ||||||
|     _poly(poly), |  | ||||||
|     _Linop(linop), |  | ||||||
|     _Aggregate(aggregate)  {  }; |  | ||||||
|  |  | ||||||
|   void operator()(const CoarseField& in, CoarseField& out) { |  | ||||||
|  |  | ||||||
|     GridBase *FineGrid = _Aggregate.FineGrid; |  | ||||||
|  |  | ||||||
|     FineField fin(FineGrid) ;fin.checkerboard  =_Aggregate.checkerboard; |  | ||||||
|     FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; |  | ||||||
|      |  | ||||||
|     _Aggregate.PromoteFromSubspace(in,fin);    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl; |  | ||||||
|     _poly(_Linop,fin,fout);                    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl; |  | ||||||
|     _Aggregate.ProjectToSubspace(out,fout);    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl; |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class ImplicitlyRestartedLanczosSmoothedTester  : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > > |  | ||||||
| { |  | ||||||
|  public: |  | ||||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; |  | ||||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; |  | ||||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<Fobj>          FineField; |  | ||||||
|  |  | ||||||
|   LinearFunction<CoarseField> & _Poly; |  | ||||||
|   OperatorFunction<FineField>   & _smoother; |  | ||||||
|   LinearOperatorBase<FineField> &_Linop; |  | ||||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; |  | ||||||
|   RealD                             _coarse_relax_tol; |  | ||||||
|   ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField>   &Poly, |  | ||||||
| 					   OperatorFunction<FineField>   &smoother, |  | ||||||
| 					   LinearOperatorBase<FineField> &Linop, |  | ||||||
| 					   Aggregation<Fobj,CComplex,nbasis> &Aggregate, |  | ||||||
| 					   RealD coarse_relax_tol=5.0e3)  |  | ||||||
|     : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol)  {    }; |  | ||||||
|  |  | ||||||
|   int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) |  | ||||||
|   { |  | ||||||
|     CoarseField v(B); |  | ||||||
|     RealD eval_poly = eval; |  | ||||||
|     // Apply operator |  | ||||||
|     _Poly(B,v); |  | ||||||
|  |  | ||||||
|     RealD vnum = real(innerProduct(B,v)); // HermOp. |  | ||||||
|     RealD vden = norm2(B); |  | ||||||
|     RealD vv0  = norm2(v); |  | ||||||
|     eval   = vnum/vden; |  | ||||||
|     v -= eval*B; |  | ||||||
|  |  | ||||||
|     RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); |  | ||||||
|  |  | ||||||
|     std::cout.precision(13); |  | ||||||
|     std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] " |  | ||||||
| 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" |  | ||||||
| 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv |  | ||||||
| 	     <<std::endl; |  | ||||||
|  |  | ||||||
|     int conv=0; |  | ||||||
|     if( (vv<eresid*eresid) ) conv = 1; |  | ||||||
|     return conv; |  | ||||||
|   } |  | ||||||
|   int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) |  | ||||||
|   { |  | ||||||
|     GridBase *FineGrid = _Aggregate.FineGrid; |  | ||||||
|  |  | ||||||
|     int checkerboard   = _Aggregate.checkerboard; |  | ||||||
|  |  | ||||||
|     FineField fB(FineGrid);fB.checkerboard =checkerboard; |  | ||||||
|     FineField fv(FineGrid);fv.checkerboard =checkerboard; |  | ||||||
|  |  | ||||||
|     _Aggregate.PromoteFromSubspace(B,fv); |  | ||||||
|     _smoother(_Linop,fv,fB);  |  | ||||||
|  |  | ||||||
|     RealD eval_poly = eval; |  | ||||||
|     _Linop.HermOp(fB,fv); |  | ||||||
|  |  | ||||||
|     RealD vnum = real(innerProduct(fB,fv)); // HermOp. |  | ||||||
|     RealD vden = norm2(fB); |  | ||||||
|     RealD vv0  = norm2(fv); |  | ||||||
|     eval   = vnum/vden; |  | ||||||
|     fv -= eval*fB; |  | ||||||
|     RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0); |  | ||||||
|  |  | ||||||
|     std::cout.precision(13); |  | ||||||
|     std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] " |  | ||||||
| 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" |  | ||||||
| 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv |  | ||||||
| 	     <<std::endl; |  | ||||||
|     if ( j > nbasis ) eresid = eresid*_coarse_relax_tol; |  | ||||||
|     if( (vv<eresid*eresid) ) return 1; |  | ||||||
|     return 0; |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////// |  | ||||||
| // Make serializable Lanczos params |  | ||||||
| //////////////////////////////////////////// |  | ||||||
| template<class Fobj,class CComplex,int nbasis> |  | ||||||
| class LocalCoherenceLanczos  |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; |  | ||||||
|   typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field |  | ||||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; |  | ||||||
|   typedef Lattice<Fobj>                       FineField; |  | ||||||
|  |  | ||||||
| protected: |  | ||||||
|   GridBase *_CoarseGrid; |  | ||||||
|   GridBase *_FineGrid; |  | ||||||
|   int _checkerboard; |  | ||||||
|   LinearOperatorBase<FineField>                 & _FineOp; |  | ||||||
|    |  | ||||||
|   // FIXME replace Aggregation with vector of fine; the code reuse is too small for |  | ||||||
|   // the hassle and complexity of cross coupling. |  | ||||||
|   Aggregation<Fobj,CComplex,nbasis>               _Aggregate;   |  | ||||||
|   std::vector<RealD>                              evals_fine; |  | ||||||
|   std::vector<RealD>                              evals_coarse;  |  | ||||||
|   std::vector<CoarseField>                        evec_coarse; |  | ||||||
| public: |  | ||||||
|   LocalCoherenceLanczos(GridBase *FineGrid, |  | ||||||
| 		GridBase *CoarseGrid, |  | ||||||
| 		LinearOperatorBase<FineField> &FineOp, |  | ||||||
| 		int checkerboard) : |  | ||||||
|     _CoarseGrid(CoarseGrid), |  | ||||||
|     _FineGrid(FineGrid), |  | ||||||
|     _Aggregate(CoarseGrid,FineGrid,checkerboard), |  | ||||||
|     _FineOp(FineOp), |  | ||||||
|     _checkerboard(checkerboard) |  | ||||||
|   { |  | ||||||
|     evals_fine.resize(0); |  | ||||||
|     evals_coarse.resize(0); |  | ||||||
|   }; |  | ||||||
|   void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } |  | ||||||
|  |  | ||||||
|   template<typename T>  static RealD normalise(T& v)  |  | ||||||
|   { |  | ||||||
|     RealD nn = norm2(v); |  | ||||||
|     nn = ::sqrt(nn); |  | ||||||
|     v = v * (1.0/nn); |  | ||||||
|     return nn; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void fakeFine(void) |  | ||||||
|   { |  | ||||||
|     int Nk = nbasis; |  | ||||||
|     _Aggregate.subspace.resize(Nk,_FineGrid); |  | ||||||
|     _Aggregate.subspace[0]=1.0; |  | ||||||
|     _Aggregate.subspace[0].checkerboard=_checkerboard; |  | ||||||
|     normalise(_Aggregate.subspace[0]); |  | ||||||
|     PlainHermOp<FineField>    Op(_FineOp); |  | ||||||
|     for(int k=1;k<Nk;k++){ |  | ||||||
|       _Aggregate.subspace[k].checkerboard=_checkerboard; |  | ||||||
|       Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]); |  | ||||||
|       normalise(_Aggregate.subspace[k]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void testFine(RealD resid)  |  | ||||||
|   { |  | ||||||
|     assert(evals_fine.size() == nbasis); |  | ||||||
|     assert(_Aggregate.subspace.size() == nbasis); |  | ||||||
|     PlainHermOp<FineField>    Op(_FineOp); |  | ||||||
|     ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op); |  | ||||||
|     for(int k=0;k<nbasis;k++){ |  | ||||||
|       assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)  |  | ||||||
|   { |  | ||||||
|     assert(evals_fine.size() == nbasis); |  | ||||||
|     assert(_Aggregate.subspace.size() == nbasis); |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     Chebyshev<FineField>                          ChebySmooth(cheby_smooth); |  | ||||||
|     ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate); |  | ||||||
|     ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); |  | ||||||
|  |  | ||||||
|     for(int k=0;k<evec_coarse.size();k++){ |  | ||||||
|       if ( k < nbasis ) {  |  | ||||||
| 	assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); |  | ||||||
|       } else {  |  | ||||||
| 	assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,  |  | ||||||
| 		RealD MaxIt, RealD betastp, int MinRes) |  | ||||||
|   { |  | ||||||
|     assert(nbasis<=Nm); |  | ||||||
|     Chebyshev<FineField>      Cheby(cheby_parms); |  | ||||||
|     FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp); |  | ||||||
|     PlainHermOp<FineField>    Op(_FineOp); |  | ||||||
|  |  | ||||||
|     evals_fine.resize(Nm); |  | ||||||
|     _Aggregate.subspace.resize(Nm,_FineGrid); |  | ||||||
|  |  | ||||||
|     ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); |  | ||||||
|  |  | ||||||
|     FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; |  | ||||||
|  |  | ||||||
|     int Nconv; |  | ||||||
|     IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); |  | ||||||
|      |  | ||||||
|     // Shrink down to number saved |  | ||||||
|     assert(Nstop>=nbasis); |  | ||||||
|     assert(Nconv>=nbasis); |  | ||||||
|     evals_fine.resize(nbasis); |  | ||||||
|     _Aggregate.subspace.resize(nbasis,_FineGrid); |  | ||||||
|   } |  | ||||||
|   void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, |  | ||||||
| 		  int Nstop, int Nk, int Nm,RealD resid,  |  | ||||||
| 		  RealD MaxIt, RealD betastp, int MinRes) |  | ||||||
|   { |  | ||||||
|     Chebyshev<FineField>                          Cheby(cheby_op); |  | ||||||
|     ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,_Aggregate); |  | ||||||
|     ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate); |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|     Chebyshev<FineField>                                           ChebySmooth(cheby_smooth); |  | ||||||
|     ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); |  | ||||||
|  |  | ||||||
|     evals_coarse.resize(Nm); |  | ||||||
|     evec_coarse.resize(Nm,_CoarseGrid); |  | ||||||
|  |  | ||||||
|     CoarseField src(_CoarseGrid);     src=1.0;  |  | ||||||
|  |  | ||||||
|     ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); |  | ||||||
|     int Nconv=0; |  | ||||||
|     IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); |  | ||||||
|     assert(Nconv>=Nstop); |  | ||||||
|     evals_coarse.resize(Nstop); |  | ||||||
|     evec_coarse.resize (Nstop,_CoarseGrid); |  | ||||||
|     for (int i=0;i<Nstop;i++){ |  | ||||||
|       std::cout << i << " Coarse eval = " << evals_coarse[i]  << std::endl; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -53,119 +53,16 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    *     M psi = eta |    *     M psi = eta | ||||||
|    *********************** |    *********************** | ||||||
|    *Odd |    *Odd | ||||||
|    * i)                 D_oo psi_o =  L^{-1}  eta_o |    * i)   (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o | ||||||
|    *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) |    *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||||
|    * |  | ||||||
|    * Wilson: |  | ||||||
|    *      (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o |  | ||||||
|    * Stag: |  | ||||||
|    *      D_oo psi_o = L^{-1}  eta =    (eta_o - Moe Mee^{-1} eta_e) |  | ||||||
|    * |  | ||||||
|    * L^-1 eta_o= (1              0 ) (e |  | ||||||
|    *             (-MoeMee^{-1}   1 )    |  | ||||||
|    * |  | ||||||
|    *Even |    *Even | ||||||
|    * ii)  Mee psi_e + Meo psi_o = src_e |    * ii)  Mee psi_e + Meo psi_o = src_e | ||||||
|    * |    * | ||||||
|    *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )... |    *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||||
|    * |    * | ||||||
|    *  |  | ||||||
|    * TODO: Other options: |  | ||||||
|    *  |  | ||||||
|    * a) change checkerboards for Schur e<->o |  | ||||||
|    * |  | ||||||
|    * Left precon by Moo^-1 |  | ||||||
|    * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 =  (D_oo)^dag M_oo^-dag Moo^-1 L^{-1}  eta_o |  | ||||||
|    *                              eta_o'     = (D_oo)^dag  M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e) |  | ||||||
|    * |  | ||||||
|    * Right precon by Moo^-1 |  | ||||||
|    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o |  | ||||||
|    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) |  | ||||||
|    *                              psi_o = M_oo^-1 phi_o |  | ||||||
|    * TODO: Deflation  |  | ||||||
|    */ |    */ | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Take a matrix and form a Red Black solver calling a Herm solver |  | ||||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|   template<class Field> class SchurRedBlackStaggeredSolve { |  | ||||||
|   private: |  | ||||||
|     OperatorFunction<Field> & _HermitianRBSolver; |  | ||||||
|     int CBfactorise; |  | ||||||
|   public: |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|     // Wrap the usual normal equations Schur trick |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|   SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver)  : |  | ||||||
|      _HermitianRBSolver(HermitianRBSolver)  |  | ||||||
|     {  |  | ||||||
|       CBfactorise=0; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class Matrix> |  | ||||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ |  | ||||||
|  |  | ||||||
|       // FIXME CGdiagonalMee not implemented virtual function |  | ||||||
|       // FIXME use CBfactorise to control schur decomp |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       GridBase *fgrid= _Matrix.Grid(); |  | ||||||
|  |  | ||||||
|       SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix); |  | ||||||
|   |  | ||||||
|       Field src_e(grid); |  | ||||||
|       Field src_o(grid); |  | ||||||
|       Field sol_e(grid); |  | ||||||
|       Field sol_o(grid); |  | ||||||
|       Field   tmp(grid); |  | ||||||
|       Field  Mtmp(grid); |  | ||||||
|       Field resid(fgrid); |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,in); |  | ||||||
|       pickCheckerboard(Odd ,src_o,in); |  | ||||||
|       pickCheckerboard(Even,sol_e,out); |  | ||||||
|       pickCheckerboard(Odd ,sol_o,out); |  | ||||||
|      |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       // src_o = (source_o - Moe MeeInv source_e) |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); |  | ||||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      |  | ||||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      |  | ||||||
|  |  | ||||||
|       src_o = tmp;     assert(src_o.checkerboard ==Odd); |  | ||||||
|       //  _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source |  | ||||||
|  |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       // Call the red-black solver |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl; |  | ||||||
|       _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); |  | ||||||
|  |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); |  | ||||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); |  | ||||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); |  | ||||||
|       |  | ||||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); |  | ||||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); |  | ||||||
|  |  | ||||||
|       // Verify the unprec residual |  | ||||||
|       _Matrix.M(out,resid);  |  | ||||||
|       resid = resid-in; |  | ||||||
|       RealD ns = norm2(in); |  | ||||||
|       RealD nr = norm2(resid); |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; |  | ||||||
|     }      |  | ||||||
|   }; |  | ||||||
|   template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>; |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Take a matrix and form a Red Black solver calling a Herm solver |   // Take a matrix and form a Red Black solver calling a Herm solver | ||||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface |   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||||
| @@ -179,10 +76,12 @@ namespace Grid { | |||||||
|     ///////////////////////////////////////////////////// |     ///////////////////////////////////////////////////// | ||||||
|     // Wrap the usual normal equations Schur trick |     // Wrap the usual normal equations Schur trick | ||||||
|     ///////////////////////////////////////////////////// |     ///////////////////////////////////////////////////// | ||||||
|   SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0)  :  _HermitianRBSolver(HermitianRBSolver)  |   SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||||
|   {  |      _HermitianRBSolver(HermitianRBSolver)  | ||||||
|     CBfactorise=cb; |     {  | ||||||
|   }; |       CBfactorise=0; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     template<class Matrix> |     template<class Matrix> | ||||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ |       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||||
|  |  | ||||||
| @@ -242,166 +141,5 @@ namespace Grid { | |||||||
|     }      |     }      | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Take a matrix and form a Red Black solver calling a Herm solver |  | ||||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   template<class Field> class SchurRedBlackDiagTwoSolve { |  | ||||||
|   private: |  | ||||||
|     OperatorFunction<Field> & _HermitianRBSolver; |  | ||||||
|     int CBfactorise; |  | ||||||
|   public: |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|     // Wrap the usual normal equations Schur trick |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|   SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver)  : |  | ||||||
|      _HermitianRBSolver(HermitianRBSolver)  |  | ||||||
|     {  |  | ||||||
|       CBfactorise=0; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class Matrix> |  | ||||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ |  | ||||||
|  |  | ||||||
|       // FIXME CGdiagonalMee not implemented virtual function |  | ||||||
|       // FIXME use CBfactorise to control schur decomp |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       GridBase *fgrid= _Matrix.Grid(); |  | ||||||
|  |  | ||||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); |  | ||||||
|   |  | ||||||
|       Field src_e(grid); |  | ||||||
|       Field src_o(grid); |  | ||||||
|       Field sol_e(grid); |  | ||||||
|       Field sol_o(grid); |  | ||||||
|       Field   tmp(grid); |  | ||||||
|       Field  Mtmp(grid); |  | ||||||
|       Field resid(fgrid); |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,in); |  | ||||||
|       pickCheckerboard(Odd ,src_o,in); |  | ||||||
|       pickCheckerboard(Even,sol_e,out); |  | ||||||
|       pickCheckerboard(Odd ,sol_o,out); |  | ||||||
|      |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); |  | ||||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      |  | ||||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      |  | ||||||
|  |  | ||||||
|       // get the right MpcDag |  | ||||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        |  | ||||||
|  |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       // Call the red-black solver |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; |  | ||||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); |  | ||||||
|       _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); |  | ||||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); |  | ||||||
|  |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); |  | ||||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); |  | ||||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); |  | ||||||
|       |  | ||||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); |  | ||||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); |  | ||||||
|  |  | ||||||
|       // Verify the unprec residual |  | ||||||
|       _Matrix.M(out,resid);  |  | ||||||
|       resid = resid-in; |  | ||||||
|       RealD ns = norm2(in); |  | ||||||
|       RealD nr = norm2(resid); |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; |  | ||||||
|     }      |  | ||||||
|   }; |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Take a matrix and form a Red Black solver calling a Herm solver |  | ||||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   template<class Field> class SchurRedBlackDiagTwoMixed { |  | ||||||
|   private: |  | ||||||
|     LinearFunction<Field> & _HermitianRBSolver; |  | ||||||
|     int CBfactorise; |  | ||||||
|   public: |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|     // Wrap the usual normal equations Schur trick |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|   SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver)  : |  | ||||||
|      _HermitianRBSolver(HermitianRBSolver)  |  | ||||||
|     {  |  | ||||||
|       CBfactorise=0; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class Matrix> |  | ||||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ |  | ||||||
|  |  | ||||||
|       // FIXME CGdiagonalMee not implemented virtual function |  | ||||||
|       // FIXME use CBfactorise to control schur decomp |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       GridBase *fgrid= _Matrix.Grid(); |  | ||||||
|  |  | ||||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); |  | ||||||
|   |  | ||||||
|       Field src_e(grid); |  | ||||||
|       Field src_o(grid); |  | ||||||
|       Field sol_e(grid); |  | ||||||
|       Field sol_o(grid); |  | ||||||
|       Field   tmp(grid); |  | ||||||
|       Field  Mtmp(grid); |  | ||||||
|       Field resid(fgrid); |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,in); |  | ||||||
|       pickCheckerboard(Odd ,src_o,in); |  | ||||||
|       pickCheckerboard(Even,sol_e,out); |  | ||||||
|       pickCheckerboard(Odd ,sol_o,out); |  | ||||||
|      |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); |  | ||||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      |  | ||||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      |  | ||||||
|  |  | ||||||
|       // get the right MpcDag |  | ||||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        |  | ||||||
|  |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       // Call the red-black solver |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; |  | ||||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); |  | ||||||
| //      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); |  | ||||||
|       _HermitianRBSolver(src_o,tmp);  assert(tmp.checkerboard==Odd); |  | ||||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); |  | ||||||
|  |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); |  | ||||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); |  | ||||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); |  | ||||||
|       |  | ||||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); |  | ||||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); |  | ||||||
|  |  | ||||||
|       // Verify the unprec residual |  | ||||||
|       _Matrix.M(out,resid);  |  | ||||||
|       resid = resid-in; |  | ||||||
|       RealD ns = norm2(in); |  | ||||||
|       RealD nr = norm2(resid); |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; |  | ||||||
|     }      |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -1,5 +1,7 @@ | |||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <fcntl.h> |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -9,7 +11,7 @@ int PointerCache::victim; | |||||||
|  |  | ||||||
| void *PointerCache::Insert(void *ptr,size_t bytes) { | void *PointerCache::Insert(void *ptr,size_t bytes) { | ||||||
|  |  | ||||||
|   if (bytes < 4096 ) return ptr; |   if (bytes < 4096 ) return NULL; | ||||||
|  |  | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
|   assert(omp_in_parallel()==0); |   assert(omp_in_parallel()==0); | ||||||
| @@ -61,37 +63,4 @@ void *PointerCache::Lookup(size_t bytes) { | |||||||
|   return NULL; |   return NULL; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| void check_huge_pages(void *Buf,uint64_t BYTES) |  | ||||||
| { |  | ||||||
| #ifdef __linux__ |  | ||||||
|   int fd = open("/proc/self/pagemap", O_RDONLY); |  | ||||||
|   assert(fd >= 0); |  | ||||||
|   const int page_size = 4096; |  | ||||||
|   uint64_t virt_pfn = (uint64_t)Buf / page_size; |  | ||||||
|   off_t offset = sizeof(uint64_t) * virt_pfn; |  | ||||||
|   uint64_t npages = (BYTES + page_size-1) / page_size; |  | ||||||
|   uint64_t pagedata[npages]; |  | ||||||
|   uint64_t ret = lseek(fd, offset, SEEK_SET); |  | ||||||
|   assert(ret == offset); |  | ||||||
|   ret = ::read(fd, pagedata, sizeof(uint64_t)*npages); |  | ||||||
|   assert(ret == sizeof(uint64_t) * npages); |  | ||||||
|   int nhugepages = npages / 512; |  | ||||||
|   int n4ktotal, nnothuge; |  | ||||||
|   n4ktotal = 0; |  | ||||||
|   nnothuge = 0; |  | ||||||
|   for (int i = 0; i < nhugepages; ++i) { |  | ||||||
|     uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size; |  | ||||||
|     for (int j = 0; j < 512; ++j) { |  | ||||||
|       uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size; |  | ||||||
|       ++n4ktotal; |  | ||||||
|       if (pageaddr != baseaddr + j * page_size) |  | ||||||
| 	++nnothuge; |  | ||||||
|       } |  | ||||||
|   } |  | ||||||
|   int rank = CartesianCommunicator::RankWorld(); |  | ||||||
|   printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); |  | ||||||
| #endif |  | ||||||
| } |  | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -64,8 +64,6 @@ namespace Grid { | |||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   void check_huge_pages(void *Buf,uint64_t BYTES); |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////// | ||||||
| // A lattice of something, but assume the something is SIMDized. | // A lattice of something, but assume the something is SIMDized. | ||||||
| //////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////// | ||||||
| @@ -94,34 +92,18 @@ public: | |||||||
|     size_type bytes = __n*sizeof(_Tp); |     size_type bytes = __n*sizeof(_Tp); | ||||||
|  |  | ||||||
|     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); |     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); | ||||||
|     //    if ( ptr != NULL )  |      | ||||||
|     //      std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl; |  | ||||||
|  |  | ||||||
|     ////////////////// |  | ||||||
|     // Hack 2MB align; could make option probably doesn't need configurability |  | ||||||
|     ////////////////// |  | ||||||
| //define GRID_ALLOC_ALIGN (128) |  | ||||||
| #define GRID_ALLOC_ALIGN (2*1024*1024) |  | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); | ||||||
| #else | #else | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); | ||||||
| #endif | #endif | ||||||
|     //    std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl; |  | ||||||
|     // First touch optimise in threaded loop |  | ||||||
|     uint8_t *cp = (uint8_t *)ptr; |  | ||||||
| #ifdef GRID_OMP |  | ||||||
| #pragma omp parallel for |  | ||||||
| #endif |  | ||||||
|     for(size_type n=0;n<bytes;n+=4096){ |  | ||||||
|       cp[n]=0; |  | ||||||
|     } |  | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void deallocate(pointer __p, size_type __n) {  |   void deallocate(pointer __p, size_type __n) {  | ||||||
|     size_type bytes = __n * sizeof(_Tp); |     size_type bytes = __n * sizeof(_Tp); | ||||||
|  |  | ||||||
|     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); |     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); | ||||||
|  |  | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
| @@ -200,19 +182,10 @@ public: | |||||||
|   pointer allocate(size_type __n, const void* _p= 0)  |   pointer allocate(size_type __n, const void* _p= 0)  | ||||||
|   { |   { | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); |     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); | ||||||
| #else | #else | ||||||
|     _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); |     _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); | ||||||
| #endif | #endif | ||||||
|     size_type bytes = __n*sizeof(_Tp); |  | ||||||
|     uint8_t *cp = (uint8_t *)ptr; |  | ||||||
|     if ( ptr ) {  |  | ||||||
|     // One touch per 4k page, static OMP loop to catch same loop order |  | ||||||
| #pragma omp parallel for schedule(static) |  | ||||||
|       for(size_type n=0;n<bytes;n+=4096){ |  | ||||||
| 	cp[n]=0; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|   void deallocate(pointer __p, size_type) {  |   void deallocate(pointer __p, size_type) {  | ||||||
|   | |||||||
| @@ -6,9 +6,8 @@ | |||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|     Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|     Author: Guido Cossu <guido.cossu@ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -44,20 +43,12 @@ namespace Grid{ | |||||||
|   class GridBase : public CartesianCommunicator , public GridThread { |   class GridBase : public CartesianCommunicator , public GridThread { | ||||||
|  |  | ||||||
| public: | public: | ||||||
|     int dummy; |  | ||||||
|     // Give Lattice access |     // Give Lattice access | ||||||
|     template<class object> friend class Lattice; |     template<class object> friend class Lattice; | ||||||
|  |  | ||||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; |     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||||
|     GridBase(const std::vector<int> & processor_grid, |  | ||||||
| 	     const CartesianCommunicator &parent, |  | ||||||
| 	     int &split_rank)  |  | ||||||
|       : CartesianCommunicator(processor_grid,parent,split_rank) {}; |  | ||||||
|     GridBase(const std::vector<int> & processor_grid, |  | ||||||
| 	     const CartesianCommunicator &parent)  |  | ||||||
|       : CartesianCommunicator(processor_grid,parent,dummy) {}; |  | ||||||
|  |  | ||||||
|     virtual ~GridBase() = default; |  | ||||||
|  |  | ||||||
|     // Physics Grid information. |     // Physics Grid information. | ||||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. |     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||||
| @@ -71,12 +62,13 @@ public: | |||||||
|     int _isites; |     int _isites; | ||||||
|     int _fsites;                  // _isites*_osites = product(dimensions). |     int _fsites;                  // _isites*_osites = product(dimensions). | ||||||
|     int _gsites; |     int _gsites; | ||||||
|     std::vector<int> _slice_block;// subslice information |     std::vector<int> _slice_block;   // subslice information | ||||||
|     std::vector<int> _slice_stride; |     std::vector<int> _slice_stride; | ||||||
|     std::vector<int> _slice_nblock; |     std::vector<int> _slice_nblock; | ||||||
|  |  | ||||||
|     std::vector<int> _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] |     // Might need these at some point | ||||||
|     std::vector<int> _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 |     //    std::vector<int> _lstart;     // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] | ||||||
|  |     //    std::vector<int> _lend;       // local end of array in gcoors    _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
| @@ -107,7 +99,7 @@ public: | |||||||
|     virtual int oIndex(std::vector<int> &coor) |     virtual int oIndex(std::vector<int> &coor) | ||||||
|     { |     { | ||||||
|         int idx=0; |         int idx=0; | ||||||
|         // Works with either global or local coordinates | 	// Works with either global or local coordinates | ||||||
|         for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); |         for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||||
|         return idx; |         return idx; | ||||||
|     } |     } | ||||||
| @@ -129,12 +121,6 @@ public: | |||||||
|       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); |       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) { |  | ||||||
|       lcoor.resize(_ndimension); |  | ||||||
|       for (int d = 0; d < _ndimension; d++) |  | ||||||
|         lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
|     // SIMD lane addressing |     // SIMD lane addressing | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
| @@ -142,7 +128,6 @@ public: | |||||||
|     { |     { | ||||||
|       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); |       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     inline int PermuteDim(int dimension){ |     inline int PermuteDim(int dimension){ | ||||||
|       return _simd_layout[dimension]>1; |       return _simd_layout[dimension]>1; | ||||||
|     } |     } | ||||||
| @@ -160,15 +145,15 @@ public: | |||||||
|       // Distance should be either 0,1,2.. |       // Distance should be either 0,1,2.. | ||||||
|       // |       // | ||||||
|       if ( _simd_layout[dimension] > 2 ) {  |       if ( _simd_layout[dimension] > 2 ) {  | ||||||
|         for(int d=0;d<_ndimension;d++){ | 	for(int d=0;d<_ndimension;d++){ | ||||||
|           if ( d != dimension ) assert ( (_simd_layout[d]==1)  ); | 	  if ( d != dimension ) assert ( (_simd_layout[d]==1)  ); | ||||||
|         } | 	} | ||||||
|         permute_type = RotateBit; // How to specify distance; this is not just direction. | 	permute_type = RotateBit; // How to specify distance; this is not just direction. | ||||||
|         return permute_type; | 	return permute_type; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       for(int d=_ndimension-1;d>dimension;d--){ |       for(int d=_ndimension-1;d>dimension;d--){ | ||||||
|         if (_simd_layout[d]>1 ) permute_type++; | 	if (_simd_layout[d]>1 ) permute_type++; | ||||||
|       } |       } | ||||||
|       return permute_type; |       return permute_type; | ||||||
|     } |     } | ||||||
| @@ -183,31 +168,11 @@ public: | |||||||
|     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  |     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||||
|     inline int Nd    (void) const { return _ndimension;}; |     inline int Nd    (void) const { return _ndimension;}; | ||||||
|  |  | ||||||
|     inline const std::vector<int> LocalStarts(void)             { return _lstart;    }; |  | ||||||
|     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; |     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; | ||||||
|     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; |     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; | ||||||
|     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; |     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; | ||||||
|     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; |     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////// |  | ||||||
|     // Utility to print the full decomposition details  |  | ||||||
|     //////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|     void show_decomposition(){ |  | ||||||
|       std::cout << GridLogMessage << "\tFull Dimensions    : " << _fdimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tSIMD layout        : " << _simd_layout << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tGlobal Dimensions  : " << _gdimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tLocal Dimensions   : " << _ldimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tOuter strides      : " << _ostride << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInner strides      : " << _istride << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tiSites             : " << _isites << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\toSites             : " << _osites << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tlSites             : " << lSites() << std::endl;         |  | ||||||
|       std::cout << GridLogMessage << "\tgSites             : " << gSites() << std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tNd                 : " << _ndimension << std::endl;              |  | ||||||
|     }  |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
|     // Global addressing |     // Global addressing | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
| @@ -223,8 +188,8 @@ public: | |||||||
|       gidx=0; |       gidx=0; | ||||||
|       int mult=1; |       int mult=1; | ||||||
|       for(int mu=0;mu<_ndimension;mu++) { |       for(int mu=0;mu<_ndimension;mu++) { | ||||||
|         gidx+=mult*gcoor[mu]; | 	gidx+=mult*gcoor[mu]; | ||||||
|         mult*=_gdimensions[mu]; | 	mult*=_gdimensions[mu]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) |     void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) | ||||||
| @@ -232,9 +197,9 @@ public: | |||||||
|       pcoor.resize(_ndimension); |       pcoor.resize(_ndimension); | ||||||
|       lcoor.resize(_ndimension); |       lcoor.resize(_ndimension); | ||||||
|       for(int mu=0;mu<_ndimension;mu++){ |       for(int mu=0;mu<_ndimension;mu++){ | ||||||
|         int _fld  = _fdimensions[mu]/_processors[mu]; | 	int _fld  = _fdimensions[mu]/_processors[mu]; | ||||||
|         pcoor[mu] = gcoor[mu]/_fld; | 	pcoor[mu] = gcoor[mu]/_fld; | ||||||
|         lcoor[mu] = gcoor[mu]%_fld; | 	lcoor[mu] = gcoor[mu]%_fld; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) |     void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) | ||||||
| @@ -246,9 +211,9 @@ public: | |||||||
|       /* |       /* | ||||||
|       std::vector<int> cblcoor(lcoor); |       std::vector<int> cblcoor(lcoor); | ||||||
|       for(int d=0;d<cblcoor.size();d++){ |       for(int d=0;d<cblcoor.size();d++){ | ||||||
|         if( this->CheckerBoarded(d) ) { | 	if( this->CheckerBoarded(d) ) { | ||||||
|           cblcoor[d] = lcoor[d]/2; | 	  cblcoor[d] = lcoor[d]/2; | ||||||
|         } | 	} | ||||||
|       } |       } | ||||||
|       */ |       */ | ||||||
|       i_idx= iIndex(lcoor); |       i_idx= iIndex(lcoor); | ||||||
| @@ -274,7 +239,7 @@ public: | |||||||
|     { |     { | ||||||
|       RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); |       RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); | ||||||
|       if(CheckerBoarded(0)){ |       if(CheckerBoarded(0)){ | ||||||
|         fcoor[0] = fcoor[0]*2+cb; | 	fcoor[0] = fcoor[0]*2+cb; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) |     void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) | ||||||
|   | |||||||
| @@ -38,7 +38,7 @@ namespace Grid{ | |||||||
| class GridCartesian: public GridBase { | class GridCartesian: public GridBase { | ||||||
|  |  | ||||||
| public: | public: | ||||||
|     int dummy; |  | ||||||
|     virtual int  CheckerBoardFromOindexTable (int Oindex) { |     virtual int  CheckerBoardFromOindexTable (int Oindex) { | ||||||
|       return 0; |       return 0; | ||||||
|     } |     } | ||||||
| @@ -61,111 +61,74 @@ public: | |||||||
|     virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){ |     virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){ | ||||||
|       return shift; |       return shift; | ||||||
|     } |     } | ||||||
|     ///////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Constructor takes a parent grid and possibly subdivides communicator. |  | ||||||
|     ///////////////////////////////////////////////////////////////////////// |  | ||||||
|     GridCartesian(const std::vector<int> &dimensions, |     GridCartesian(const std::vector<int> &dimensions, | ||||||
| 		  const std::vector<int> &simd_layout, | 		  const std::vector<int> &simd_layout, | ||||||
| 		  const std::vector<int> &processor_grid, | 		  const std::vector<int> &processor_grid | ||||||
| 		  const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) | 		  ) : GridBase(processor_grid) | ||||||
|     { |     { | ||||||
|       Init(dimensions,simd_layout,processor_grid); |         /////////////////////// | ||||||
|     } |         // Grid information | ||||||
|     GridCartesian(const std::vector<int> &dimensions, |         /////////////////////// | ||||||
| 		  const std::vector<int> &simd_layout, |         _ndimension = dimensions.size(); | ||||||
| 		  const std::vector<int> &processor_grid, |              | ||||||
| 		  const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) |         _fdimensions.resize(_ndimension); | ||||||
|     { |         _gdimensions.resize(_ndimension); | ||||||
|       Init(dimensions,simd_layout,processor_grid); |         _ldimensions.resize(_ndimension); | ||||||
|     } |         _rdimensions.resize(_ndimension); | ||||||
|     ///////////////////////////////////////////////////////////////////////// |         _simd_layout.resize(_ndimension); | ||||||
|     // Construct from comm world |              | ||||||
|     ///////////////////////////////////////////////////////////////////////// |         _ostride.resize(_ndimension); | ||||||
|     GridCartesian(const std::vector<int> &dimensions, |         _istride.resize(_ndimension); | ||||||
| 		  const std::vector<int> &simd_layout, |              | ||||||
| 		  const std::vector<int> &processor_grid) : GridBase(processor_grid) |         _fsites = _gsites = _osites = _isites = 1; | ||||||
|     { |  | ||||||
|       Init(dimensions,simd_layout,processor_grid); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     virtual ~GridCartesian() = default; |         for(int d=0;d<_ndimension;d++){ | ||||||
|  | 	  _fdimensions[d] = dimensions[d]; // Global dimensions | ||||||
|  | 	  _gdimensions[d] = _fdimensions[d]; // Global dimensions | ||||||
|  | 	  _simd_layout[d] = simd_layout[d]; | ||||||
|  | 	  _fsites = _fsites * _fdimensions[d]; | ||||||
|  | 	  _gsites = _gsites * _gdimensions[d]; | ||||||
|  |  | ||||||
|     void Init(const std::vector<int> &dimensions, | 	  //FIXME check for exact division | ||||||
| 	      const std::vector<int> &simd_layout, |  | ||||||
| 	      const std::vector<int> &processor_grid) |  | ||||||
|     { |  | ||||||
|       /////////////////////// |  | ||||||
|       // Grid information |  | ||||||
|       /////////////////////// |  | ||||||
|       _ndimension = dimensions.size(); |  | ||||||
|  |  | ||||||
|       _fdimensions.resize(_ndimension); | 	  // Use a reduced simd grid | ||||||
|       _gdimensions.resize(_ndimension); | 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions | ||||||
|       _ldimensions.resize(_ndimension); | 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition | ||||||
|       _rdimensions.resize(_ndimension); | 	  _osites *= _rdimensions[d]; | ||||||
|       _simd_layout.resize(_ndimension); | 	  _isites *= _simd_layout[d]; | ||||||
|       _lstart.resize(_ndimension); |                  | ||||||
|       _lend.resize(_ndimension); | 	  // Addressing support | ||||||
|  | 	  if ( d==0 ) { | ||||||
|       _ostride.resize(_ndimension); | 	    _ostride[d] = 1; | ||||||
|       _istride.resize(_ndimension); | 	    _istride[d] = 1; | ||||||
|  | 	  } else { | ||||||
|       _fsites = _gsites = _osites = _isites = 1; | 	    _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||||
|  | 	    _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||||
|       for (int d = 0; d < _ndimension; d++) | 	  } | ||||||
|       { |  | ||||||
|         _fdimensions[d] = dimensions[d];   // Global dimensions |  | ||||||
|         _gdimensions[d] = _fdimensions[d]; // Global dimensions |  | ||||||
|         _simd_layout[d] = simd_layout[d]; |  | ||||||
|         _fsites = _fsites * _fdimensions[d]; |  | ||||||
|         _gsites = _gsites * _gdimensions[d]; |  | ||||||
|  |  | ||||||
|         // Use a reduced simd grid |  | ||||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions |  | ||||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); |  | ||||||
|  |  | ||||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition |  | ||||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); |  | ||||||
|  |  | ||||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; |  | ||||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; |  | ||||||
|         _osites *= _rdimensions[d]; |  | ||||||
|         _isites *= _simd_layout[d]; |  | ||||||
|  |  | ||||||
|         // Addressing support |  | ||||||
|         if (d == 0) |  | ||||||
|         { |  | ||||||
|           _ostride[d] = 1; |  | ||||||
|           _istride[d] = 1; |  | ||||||
|         } |         } | ||||||
|         else |          | ||||||
|         { |         /////////////////////// | ||||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; |         // subplane information | ||||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; |         /////////////////////// | ||||||
|  |         _slice_block.resize(_ndimension); | ||||||
|  |         _slice_stride.resize(_ndimension); | ||||||
|  |         _slice_nblock.resize(_ndimension); | ||||||
|  |              | ||||||
|  |         int block =1; | ||||||
|  |         int nblock=1; | ||||||
|  |         for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||||
|  |              | ||||||
|  |         for(int d=0;d<_ndimension;d++){ | ||||||
|  |             nblock/=_rdimensions[d]; | ||||||
|  |             _slice_block[d] =block; | ||||||
|  |             _slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||||
|  |             _slice_nblock[d]=nblock; | ||||||
|  |             block = block*_rdimensions[d]; | ||||||
|         } |         } | ||||||
|       } |  | ||||||
|  |  | ||||||
|       /////////////////////// |  | ||||||
|       // subplane information |  | ||||||
|       /////////////////////// |  | ||||||
|       _slice_block.resize(_ndimension); |  | ||||||
|       _slice_stride.resize(_ndimension); |  | ||||||
|       _slice_nblock.resize(_ndimension); |  | ||||||
|  |  | ||||||
|       int block = 1; |  | ||||||
|       int nblock = 1; |  | ||||||
|       for (int d = 0; d < _ndimension; d++) |  | ||||||
|         nblock *= _rdimensions[d]; |  | ||||||
|  |  | ||||||
|       for (int d = 0; d < _ndimension; d++) |  | ||||||
|       { |  | ||||||
|         nblock /= _rdimensions[d]; |  | ||||||
|         _slice_block[d] = block; |  | ||||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; |  | ||||||
|         _slice_nblock[d] = nblock; |  | ||||||
|         block = block * _rdimensions[d]; |  | ||||||
|       } |  | ||||||
|     }; |     }; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -112,210 +112,151 @@ public: | |||||||
|       } |       } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////// |     GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors)  {}; | ||||||
|     // Create Redblack from original grid; require full grid pointer ? |  | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base) |  | ||||||
|     { |  | ||||||
|       int dims = base->_ndimension; |  | ||||||
|       std::vector<int> checker_dim_mask(dims,1); |  | ||||||
|       int checker_dim = 0; |  | ||||||
|       Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim); |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////// |     GridRedBlackCartesian(const std::vector<int> &dimensions, | ||||||
|     // Create redblack from original grid, with non-trivial checker dim mask |  | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     GridRedBlackCartesian(const GridBase *base, |  | ||||||
| 			  const std::vector<int> &checker_dim_mask, |  | ||||||
| 			  int checker_dim |  | ||||||
| 			  ) :  GridBase(base->_processors,*base)  |  | ||||||
|     { |  | ||||||
|       Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim)  ; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     virtual ~GridRedBlackCartesian() = default; |  | ||||||
| #if 0 |  | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     // Create redblack grid ;; deprecate these. Should not |  | ||||||
|     // need direct creation of redblack without a full grid to base on |  | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     GridRedBlackCartesian(const GridBase *base, |  | ||||||
| 			  const std::vector<int> &dimensions, |  | ||||||
| 			  const std::vector<int> &simd_layout, | 			  const std::vector<int> &simd_layout, | ||||||
| 			  const std::vector<int> &processor_grid, | 			  const std::vector<int> &processor_grid, | ||||||
| 			  const std::vector<int> &checker_dim_mask, | 			  const std::vector<int> &checker_dim_mask, | ||||||
| 			  int checker_dim | 			  int checker_dim | ||||||
| 			  ) :  GridBase(processor_grid,*base)  | 			  ) :  GridBase(processor_grid)  | ||||||
|     { |     { | ||||||
|       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); |       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); | ||||||
|     } |     } | ||||||
|  |     GridRedBlackCartesian(const std::vector<int> &dimensions, | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     // Create redblack grid |  | ||||||
|     //////////////////////////////////////////////////////////// |  | ||||||
|     GridRedBlackCartesian(const GridBase *base, |  | ||||||
| 			  const std::vector<int> &dimensions, |  | ||||||
| 			  const std::vector<int> &simd_layout, | 			  const std::vector<int> &simd_layout, | ||||||
| 			  const std::vector<int> &processor_grid) : GridBase(processor_grid,*base)  | 			  const std::vector<int> &processor_grid) : GridBase(processor_grid)  | ||||||
|     { |     { | ||||||
|       std::vector<int> checker_dim_mask(dimensions.size(),1); |       std::vector<int> checker_dim_mask(dimensions.size(),1); | ||||||
|       int checker_dim = 0; |       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); | ||||||
|       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); |  | ||||||
|     } |     } | ||||||
| #endif |  | ||||||
|  |  | ||||||
|     void Init(const std::vector<int> &dimensions, |     void Init(const std::vector<int> &dimensions, | ||||||
|               const std::vector<int> &simd_layout, | 	      const std::vector<int> &simd_layout, | ||||||
|               const std::vector<int> &processor_grid, | 	      const std::vector<int> &processor_grid, | ||||||
|               const std::vector<int> &checker_dim_mask, | 	      const std::vector<int> &checker_dim_mask, | ||||||
|               int checker_dim) | 	      int checker_dim) | ||||||
|     { |     { | ||||||
|       /////////////////////// |     /////////////////////// | ||||||
|       // Grid information |     // Grid information | ||||||
|       /////////////////////// |     /////////////////////// | ||||||
|       _checker_dim = checker_dim; |       _checker_dim = checker_dim; | ||||||
|       assert(checker_dim_mask[checker_dim] == 1); |       assert(checker_dim_mask[checker_dim]==1); | ||||||
|       _ndimension = dimensions.size(); |       _ndimension = dimensions.size(); | ||||||
|       assert(checker_dim_mask.size() == _ndimension); |       assert(checker_dim_mask.size()==_ndimension); | ||||||
|       assert(processor_grid.size() == _ndimension); |       assert(processor_grid.size()==_ndimension); | ||||||
|       assert(simd_layout.size() == _ndimension); |       assert(simd_layout.size()==_ndimension); | ||||||
|  |        | ||||||
|       _fdimensions.resize(_ndimension); |       _fdimensions.resize(_ndimension); | ||||||
|       _gdimensions.resize(_ndimension); |       _gdimensions.resize(_ndimension); | ||||||
|       _ldimensions.resize(_ndimension); |       _ldimensions.resize(_ndimension); | ||||||
|       _rdimensions.resize(_ndimension); |       _rdimensions.resize(_ndimension); | ||||||
|       _simd_layout.resize(_ndimension); |       _simd_layout.resize(_ndimension); | ||||||
|       _lstart.resize(_ndimension); |        | ||||||
|       _lend.resize(_ndimension); |  | ||||||
|  |  | ||||||
|       _ostride.resize(_ndimension); |       _ostride.resize(_ndimension); | ||||||
|       _istride.resize(_ndimension); |       _istride.resize(_ndimension); | ||||||
|  |        | ||||||
|       _fsites = _gsites = _osites = _isites = 1; |       _fsites = _gsites = _osites = _isites = 1; | ||||||
|  | 	 | ||||||
|  |       _checker_dim_mask=checker_dim_mask; | ||||||
|  |  | ||||||
|       _checker_dim_mask = checker_dim_mask; |       for(int d=0;d<_ndimension;d++){ | ||||||
|  | 	_fdimensions[d] = dimensions[d]; | ||||||
|  | 	_gdimensions[d] = _fdimensions[d]; | ||||||
|  | 	_fsites = _fsites * _fdimensions[d]; | ||||||
|  | 	_gsites = _gsites * _gdimensions[d]; | ||||||
|  |          | ||||||
|  | 	if (d==_checker_dim) { | ||||||
|  | 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard | ||||||
|  | 	} | ||||||
|  | 	_ldimensions[d] = _gdimensions[d]/_processors[d]; | ||||||
|  |  | ||||||
|       for (int d = 0; d < _ndimension; d++) | 	// Use a reduced simd grid | ||||||
|       { | 	_simd_layout[d] = simd_layout[d]; | ||||||
|         _fdimensions[d] = dimensions[d]; | 	_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; | ||||||
|         _gdimensions[d] = _fdimensions[d]; | 	assert(_rdimensions[d]>0); | ||||||
|         _fsites = _fsites * _fdimensions[d]; |  | ||||||
|         _gsites = _gsites * _gdimensions[d]; |  | ||||||
|  |  | ||||||
|         if (d == _checker_dim) | 	// all elements of a simd vector must have same checkerboard. | ||||||
|         { | 	// If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||||
|           assert((_gdimensions[d] & 0x1) == 0); | 	if ( _simd_layout[d]>1 ) { | ||||||
|           _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard | 	  if ( checker_dim_mask[d] ) {  | ||||||
| 	  _gsites /= 2; | 	    assert( (_rdimensions[d]&0x1) == 0 ); | ||||||
|         } | 	  } | ||||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; | 	} | ||||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); |  | ||||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; |  | ||||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; |  | ||||||
|  |  | ||||||
|         // Use a reduced simd grid | 	_osites *= _rdimensions[d]; | ||||||
|         _simd_layout[d] = simd_layout[d]; | 	_isites *= _simd_layout[d]; | ||||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer |          | ||||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | 	// Addressing support | ||||||
|         assert(_rdimensions[d] > 0); | 	if ( d==0 ) { | ||||||
|  | 	  _ostride[d] = 1; | ||||||
|  | 	  _istride[d] = 1; | ||||||
|  | 	} else { | ||||||
|  | 	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||||
|  | 	  _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|         // all elements of a simd vector must have same checkerboard. |  | ||||||
|         // If Ls vectorised, this must still be the case; e.g. dwf rb5d |  | ||||||
|         if (_simd_layout[d] > 1) |  | ||||||
|         { |  | ||||||
|           if (checker_dim_mask[d]) |  | ||||||
|           { |  | ||||||
|             assert((_rdimensions[d] & 0x1) == 0); |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         _osites *= _rdimensions[d]; |  | ||||||
|         _isites *= _simd_layout[d]; |  | ||||||
|  |  | ||||||
|         // Addressing support |  | ||||||
|         if (d == 0) |  | ||||||
|         { |  | ||||||
|           _ostride[d] = 1; |  | ||||||
|           _istride[d] = 1; |  | ||||||
|         } |  | ||||||
|         else |  | ||||||
|         { |  | ||||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; |  | ||||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; |  | ||||||
|         } |  | ||||||
|       } |       } | ||||||
|  |              | ||||||
|       //////////////////////////////////////////////////////////////////////////////////////////// |       //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|       // subplane information |       // subplane information | ||||||
|       //////////////////////////////////////////////////////////////////////////////////////////// |       //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|       _slice_block.resize(_ndimension); |       _slice_block.resize(_ndimension); | ||||||
|       _slice_stride.resize(_ndimension); |       _slice_stride.resize(_ndimension); | ||||||
|       _slice_nblock.resize(_ndimension); |       _slice_nblock.resize(_ndimension); | ||||||
|  |          | ||||||
|       int block = 1; |       int block =1; | ||||||
|       int nblock = 1; |       int nblock=1; | ||||||
|       for (int d = 0; d < _ndimension; d++) |       for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||||
|         nblock *= _rdimensions[d]; |        | ||||||
|  |       for(int d=0;d<_ndimension;d++){ | ||||||
|       for (int d = 0; d < _ndimension; d++) | 	nblock/=_rdimensions[d]; | ||||||
|       { | 	_slice_block[d] =block; | ||||||
|         nblock /= _rdimensions[d]; | 	_slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||||
|         _slice_block[d] = block; | 	_slice_nblock[d]=nblock; | ||||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | 	block = block*_rdimensions[d]; | ||||||
|         _slice_nblock[d] = nblock; |  | ||||||
|         block = block * _rdimensions[d]; |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Create a checkerboard lookup table |       // Create a checkerboard lookup table | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       int rvol = 1; |       int rvol = 1; | ||||||
|       for (int d = 0; d < _ndimension; d++) |       for(int d=0;d<_ndimension;d++){ | ||||||
|       { | 	rvol=rvol * _rdimensions[d]; | ||||||
|         rvol = rvol * _rdimensions[d]; |  | ||||||
|       } |       } | ||||||
|       _checker_board.resize(rvol); |       _checker_board.resize(rvol); | ||||||
|       for (int osite = 0; osite < _osites; osite++) |       for(int osite=0;osite<_osites;osite++){ | ||||||
|       { | 	_checker_board[osite] = CheckerBoardFromOindex (osite); | ||||||
|         _checker_board[osite] = CheckerBoardFromOindex(osite); |  | ||||||
|       } |       } | ||||||
|  |        | ||||||
|     }; |     }; | ||||||
|  | protected: | ||||||
|   protected: |  | ||||||
|     virtual int oIndex(std::vector<int> &coor) |     virtual int oIndex(std::vector<int> &coor) | ||||||
|     { |     { | ||||||
|       int idx = 0; |       int idx=0; | ||||||
|       for (int d = 0; d < _ndimension; d++) |       for(int d=0;d<_ndimension;d++) { | ||||||
|       { | 	if( d==_checker_dim ) { | ||||||
|         if (d == _checker_dim) | 	  idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]); | ||||||
|         { | 	} else { | ||||||
|           idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); | 	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||||
|         } | 	} | ||||||
|         else |  | ||||||
|         { |  | ||||||
|           idx += _ostride[d] * (coor[d] % _rdimensions[d]); |  | ||||||
|         } |  | ||||||
|       } |       } | ||||||
|       return idx; |       return idx; | ||||||
|     }; |     }; | ||||||
|  |          | ||||||
|     virtual int iIndex(std::vector<int> &lcoor) |     virtual int iIndex(std::vector<int> &lcoor) | ||||||
|     { |     { | ||||||
|       int idx = 0; |         int idx=0; | ||||||
|       for (int d = 0; d < _ndimension; d++) |         for(int d=0;d<_ndimension;d++) { | ||||||
|       { | 	  if( d==_checker_dim ) { | ||||||
|         if (d == _checker_dim) | 	    idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); | ||||||
|         { | 	  } else {  | ||||||
|           idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); | 	    idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); | ||||||
|         } | 	  } | ||||||
|         else | 	} | ||||||
|         { |         return idx; | ||||||
|           idx += _istride[d] * (lcoor[d] / _rdimensions[d]); |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|       return idx; |  | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -26,10 +26,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <fcntl.h> |  | ||||||
| #include <unistd.h> |  | ||||||
| #include <limits.h> |  | ||||||
| #include <sys/mman.h> |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -37,11 +33,8 @@ namespace Grid { | |||||||
| // Info that is setup once and indept of cartesian layout | // Info that is setup once and indept of cartesian layout | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
| void *              CartesianCommunicator::ShmCommBuf; | void *              CartesianCommunicator::ShmCommBuf; | ||||||
| uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 1024LL*1024LL*1024LL;  | uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024;  | ||||||
| CartesianCommunicator::CommunicatorPolicy_t   | CartesianCommunicator::CommunicatorPolicy_t  CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | ||||||
| CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; |  | ||||||
| int CartesianCommunicator::nCommThreads = -1; |  | ||||||
| int CartesianCommunicator::Hugepages = 0; |  | ||||||
|  |  | ||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| // Alloc, free shmem region | // Alloc, free shmem region | ||||||
| @@ -67,7 +60,6 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { | |||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| // Grid information queries | // Grid information queries | ||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| int                      CartesianCommunicator::Dimensions(void)        { return _ndimension; }; |  | ||||||
| int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | ||||||
| int                      CartesianCommunicator::BossRank(void)          { return 0; }; | int                      CartesianCommunicator::BossRank(void)          { return 0; }; | ||||||
| int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | ||||||
| @@ -96,175 +88,24 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | |||||||
|   GlobalSumVector((double *)c,2*N); |   GlobalSumVector((double *)c,2*N); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | ||||||
| #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) |  | ||||||
|  |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)  |  | ||||||
| { |  | ||||||
|   _ndimension = processors.size(); |  | ||||||
|   assert(_ndimension = parent._ndimension); |  | ||||||
|    |  | ||||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // split the communicator |  | ||||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   int Nparent; |  | ||||||
|   MPI_Comm_size(parent.communicator,&Nparent); |  | ||||||
|  |  | ||||||
|   int childsize=1; |  | ||||||
|   for(int d=0;d<processors.size();d++) { |  | ||||||
|     childsize *= processors[d]; |  | ||||||
|   } |  | ||||||
|   int Nchild = Nparent/childsize; |  | ||||||
|   assert (childsize * Nchild == Nparent); |  | ||||||
|  |  | ||||||
|   std::vector<int> ccoor(_ndimension); // coor within subcommunicator |  | ||||||
|   std::vector<int> scoor(_ndimension); // coor of split within parent |  | ||||||
|   std::vector<int> ssize(_ndimension); // coor of split within parent |  | ||||||
|  |  | ||||||
|   for(int d=0;d<_ndimension;d++){ |  | ||||||
|     ccoor[d] = parent._processor_coor[d] % processors[d]; |  | ||||||
|     scoor[d] = parent._processor_coor[d] / processors[d]; |  | ||||||
|     ssize[d] = parent._processors[d]     / processors[d]; |  | ||||||
|   } |  | ||||||
|   int crank;  // rank within subcomm ; srank is rank of subcomm within blocks of subcomms |  | ||||||
|   // Mpi uses the reverse Lexico convention to us |  | ||||||
|   Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); |  | ||||||
|   Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); |  | ||||||
|  |  | ||||||
|   MPI_Comm comm_split; |  | ||||||
|   if ( Nchild > 1 ) {  |  | ||||||
|  |  | ||||||
|     /* |  | ||||||
|     std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; |  | ||||||
|     std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    "; |  | ||||||
|     for(int d=0;d<parent._processors.size();d++)  std::cout << parent._processors[d] << " "; |  | ||||||
|     std::cout<<std::endl; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    "; |  | ||||||
|     for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " "; |  | ||||||
|     std::cout<<std::endl; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"]    "; |  | ||||||
|     for(int d=0;d<processors.size();d++)  std::cout << parent._processor_coor[d] << " "; |  | ||||||
|     std::cout<<std::endl; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"]    "; |  | ||||||
|     for(int d=0;d<processors.size();d++)  std::cout << ccoor[d] << " "; |  | ||||||
|     std::cout<<std::endl; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"]    "; |  | ||||||
|     for(int d=0;d<processors.size();d++)  std::cout << parent._processor_coor[d] << " "; |  | ||||||
|     std::cout<<std::endl; |  | ||||||
|     */ |  | ||||||
|  |  | ||||||
|     int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split); |  | ||||||
|     assert(ierr==0); |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Declare victory |  | ||||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|     /* |  | ||||||
|     std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into " |  | ||||||
| 	      << Nchild <<" communicators with " << childsize << " ranks"<<std::endl; |  | ||||||
|     */ |  | ||||||
|   } else { |  | ||||||
|     comm_split=parent.communicator; |  | ||||||
|     srank = 0; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Set up from the new split communicator |  | ||||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   InitFromMPICommunicator(processors,comm_split); |  | ||||||
| } |  | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Take an MPI_Comm and self assemble |  | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base) |  | ||||||
| { |  | ||||||
|   _ndimension = processors.size(); |  | ||||||
|   _processor_coor.resize(_ndimension); |  | ||||||
|  |  | ||||||
|   ///////////////////////////////// |  | ||||||
|   // Count the requested nodes |  | ||||||
|   ///////////////////////////////// |  | ||||||
|   _Nprocessors=1; |  | ||||||
|   _processors = processors; |  | ||||||
|   for(int i=0;i<_ndimension;i++){ |  | ||||||
|     _Nprocessors*=_processors[i]; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::vector<int> periodic(_ndimension,1); |  | ||||||
|   MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); |  | ||||||
|   MPI_Comm_rank(communicator,&_processor); |  | ||||||
|   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); |  | ||||||
|  |  | ||||||
|   if ( communicator_base != communicator_world ) { |  | ||||||
|     std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl; |  | ||||||
|      |  | ||||||
|     std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] "; |  | ||||||
|     for(int d=0;d<_processors.size();d++){ |  | ||||||
|       std::cout << _processor_coor[d]<<" "; |  | ||||||
|     } |  | ||||||
|     std::cout << std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int Size; |  | ||||||
|   MPI_Comm_size(communicator,&Size); |  | ||||||
|  |  | ||||||
| #ifdef GRID_COMMS_MPIT |  | ||||||
|   communicator_halo.resize (2*_ndimension); |  | ||||||
|   for(int i=0;i<_ndimension*2;i++){ |  | ||||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); |  | ||||||
|   } |  | ||||||
| #endif |  | ||||||
|    |  | ||||||
|   assert(Size==_Nprocessors); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)  |  | ||||||
| { |  | ||||||
|   InitFromMPICommunicator(processors,communicator_world); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if !defined( GRID_COMMS_MPI3)  |  | ||||||
|  |  | ||||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||||
| int                      CartesianCommunicator::RankCount(void)    { return ProcessorCount();}; |  | ||||||
| #endif |  | ||||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, |  | ||||||
| 						     int xmit_to_rank, |  | ||||||
| 						     void *recv, |  | ||||||
| 						     int recv_from_rank, |  | ||||||
| 						     int bytes, int dir) |  | ||||||
| { |  | ||||||
|   std::vector<CommsRequest_t> list; |  | ||||||
|   // Discard the "dir" |  | ||||||
|   SendToRecvFromBegin   (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); |  | ||||||
|   SendToRecvFromComplete(list); |  | ||||||
|   return 2.0*bytes; |  | ||||||
| } |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 							 void *xmit, | 						       void *xmit, | ||||||
| 							 int xmit_to_rank, | 						       int xmit_to_rank, | ||||||
| 							 void *recv, | 						       void *recv, | ||||||
| 							 int recv_from_rank, | 						       int recv_from_rank, | ||||||
| 							 int bytes, int dir) | 						       int bytes) | ||||||
| { | { | ||||||
|   // Discard the "dir" |  | ||||||
|   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); |   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||||
|   return 2.0*bytes; |   return 2.0*bytes; | ||||||
| } | } | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | ||||||
| { | { | ||||||
|   SendToRecvFromComplete(waitall); |   SendToRecvFromComplete(waitall); | ||||||
| } | } | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if !defined( GRID_COMMS_MPI3)  |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::StencilBarrier(void){}; | void CartesianCommunicator::StencilBarrier(void){}; | ||||||
|  |  | ||||||
| commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector; | commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector; | ||||||
| @@ -278,30 +119,8 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { | |||||||
|   return NULL; |   return NULL; | ||||||
| } | } | ||||||
| void CartesianCommunicator::ShmInitGeneric(void){ | void CartesianCommunicator::ShmInitGeneric(void){ | ||||||
| #if 1 |  | ||||||
|   int mmap_flag =0; |  | ||||||
| #ifdef MAP_ANONYMOUS |  | ||||||
|   mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; |  | ||||||
| #endif |  | ||||||
| #ifdef MAP_ANON |  | ||||||
|   mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; |  | ||||||
| #endif |  | ||||||
| #ifdef MAP_HUGETLB |  | ||||||
|   if ( Hugepages ) mmap_flag |= MAP_HUGETLB; |  | ||||||
| #endif |  | ||||||
|   ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);  |  | ||||||
|   if (ShmCommBuf == (void *)MAP_FAILED) { |  | ||||||
|     perror("mmap failed "); |  | ||||||
|     exit(EXIT_FAILURE);   |  | ||||||
|   } |  | ||||||
| #ifdef MADV_HUGEPAGE |  | ||||||
|   if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); |  | ||||||
| #endif |  | ||||||
| #else  |  | ||||||
|   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); |   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); | ||||||
|   ShmCommBuf=(void *)&ShmBufStorageVector[0]; |   ShmCommBuf=(void *)&ShmBufStorageVector[0]; | ||||||
| #endif |  | ||||||
|   bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -38,7 +38,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifdef GRID_COMMS_MPI3 | #ifdef GRID_COMMS_MPI3 | ||||||
| #include <mpi.h> | #include <mpi.h> | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_MPIT | #ifdef GRID_COMMS_MPI3L | ||||||
| #include <mpi.h> | #include <mpi.h> | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_SHMEM | #ifdef GRID_COMMS_SHMEM | ||||||
| @@ -50,24 +50,12 @@ namespace Grid { | |||||||
| class CartesianCommunicator { | class CartesianCommunicator { | ||||||
|   public:     |   public:     | ||||||
|  |  | ||||||
|  |   // 65536 ranks per node adequate for now | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   // Isend/Irecv/Wait, or Sendrecv blocking |  | ||||||
|   //////////////////////////////////////////// |  | ||||||
|   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; |  | ||||||
|   static CommunicatorPolicy_t CommunicatorPolicy; |  | ||||||
|   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////// |  | ||||||
|   // Up to 65536 ranks per node adequate for now |  | ||||||
|   // 128MB shared memory for comms enought for 48^4 local vol comms |   // 128MB shared memory for comms enought for 48^4 local vol comms | ||||||
|   // Give external control (command line override?) of this |   // Give external control (command line override?) of this | ||||||
|   /////////////////////////////////////////// |  | ||||||
|   static const int MAXLOG2RANKSPERNODE = 16;             |   static const int      MAXLOG2RANKSPERNODE = 16;             | ||||||
|   static uint64_t  MAX_MPI_SHM_BYTES; |   static uint64_t MAX_MPI_SHM_BYTES; | ||||||
|   static int       nCommThreads; |  | ||||||
|   // use explicit huge pages |  | ||||||
|   static int       Hugepages; |  | ||||||
|  |  | ||||||
|   // Communicator should know nothing of the physics grid, only processor grid. |   // Communicator should know nothing of the physics grid, only processor grid. | ||||||
|   int              _Nprocessors;     // How many in all |   int              _Nprocessors;     // How many in all | ||||||
| @@ -76,19 +64,14 @@ class CartesianCommunicator { | |||||||
|   std::vector<int> _processor_coor;  // linear processor coordinate |   std::vector<int> _processor_coor;  // linear processor coordinate | ||||||
|   unsigned long _ndimension; |   unsigned long _ndimension; | ||||||
|  |  | ||||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) | #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L) | ||||||
|   static MPI_Comm communicator_world; |   static MPI_Comm communicator_world; | ||||||
|  |          MPI_Comm communicator; | ||||||
|   MPI_Comm              communicator; |  | ||||||
|   std::vector<MPI_Comm> communicator_halo; |  | ||||||
|  |  | ||||||
|   typedef MPI_Request CommsRequest_t; |   typedef MPI_Request CommsRequest_t; | ||||||
|  |  | ||||||
| #else  | #else  | ||||||
|   typedef int CommsRequest_t; |   typedef int CommsRequest_t; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////// | ||||||
|   // Helper functionality for SHM Windows common to all other impls |   // Helper functionality for SHM Windows common to all other impls | ||||||
|   //////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////// | ||||||
| @@ -134,7 +117,11 @@ class CartesianCommunicator { | |||||||
|   ///////////////////////////////// |   ///////////////////////////////// | ||||||
|   static void * ShmCommBuf; |   static void * ShmCommBuf; | ||||||
|  |  | ||||||
|    |   // Isend/Irecv/Wait, or Sendrecv blocking | ||||||
|  |   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; | ||||||
|  |   static CommunicatorPolicy_t CommunicatorPolicy; | ||||||
|  |   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } | ||||||
|  |  | ||||||
|   size_t heap_top; |   size_t heap_top; | ||||||
|   size_t heap_bytes; |   size_t heap_bytes; | ||||||
|  |  | ||||||
| @@ -148,24 +135,11 @@ class CartesianCommunicator { | |||||||
|   // Must call in Grid startup |   // Must call in Grid startup | ||||||
|   //////////////////////////////////////////////// |   //////////////////////////////////////////////// | ||||||
|   static void Init(int *argc, char ***argv); |   static void Init(int *argc, char ***argv); | ||||||
|  |    | ||||||
|   //////////////////////////////////////////////// |   //////////////////////////////////////////////// | ||||||
|   // Constructors to sub-divide a parent communicator |   // Constructor of any given grid | ||||||
|   // and default to comm world |  | ||||||
|   //////////////////////////////////////////////// |   //////////////////////////////////////////////// | ||||||
|   CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank); |  | ||||||
|   CartesianCommunicator(const std::vector<int> &pdimensions_in); |   CartesianCommunicator(const std::vector<int> &pdimensions_in); | ||||||
|   virtual ~CartesianCommunicator(); |  | ||||||
|  |  | ||||||
|  private: |  | ||||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)  |  | ||||||
|   //////////////////////////////////////////////// |  | ||||||
|   // Private initialise from an MPI communicator |  | ||||||
|   // Can use after an MPI_Comm_split, but hidden from user so private |  | ||||||
|   //////////////////////////////////////////////// |  | ||||||
|   void InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base); |  | ||||||
| #endif |  | ||||||
|  public: |  | ||||||
|    |    | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Wraps MPI_Cart routines, or implements equivalent on other impls |   // Wraps MPI_Cart routines, or implements equivalent on other impls | ||||||
| @@ -174,7 +148,6 @@ class CartesianCommunicator { | |||||||
|   int  RankFromProcessorCoor(std::vector<int> &coor); |   int  RankFromProcessorCoor(std::vector<int> &coor); | ||||||
|   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); |   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); | ||||||
|    |    | ||||||
|   int                      Dimensions(void)        ; |  | ||||||
|   int                      IsBoss(void)            ; |   int                      IsBoss(void)            ; | ||||||
|   int                      BossRank(void)          ; |   int                      BossRank(void)          ; | ||||||
|   int                      ThisRank(void)          ; |   int                      ThisRank(void)          ; | ||||||
| @@ -182,7 +155,6 @@ class CartesianCommunicator { | |||||||
|   const std::vector<int> & ProcessorGrid(void)     ; |   const std::vector<int> & ProcessorGrid(void)     ; | ||||||
|   int                      ProcessorCount(void)    ; |   int                      ProcessorCount(void)    ; | ||||||
|   int                      NodeCount(void)    ; |   int                      NodeCount(void)    ; | ||||||
|   int                      RankCount(void)    ; |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid |   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||||
| @@ -203,8 +175,6 @@ class CartesianCommunicator { | |||||||
|   void GlobalSumVector(ComplexF *c,int N); |   void GlobalSumVector(ComplexF *c,int N); | ||||||
|   void GlobalSum(ComplexD &c); |   void GlobalSum(ComplexD &c); | ||||||
|   void GlobalSumVector(ComplexD *c,int N); |   void GlobalSumVector(ComplexD *c,int N); | ||||||
|   void GlobalXOR(uint32_t &); |  | ||||||
|   void GlobalXOR(uint64_t &); |  | ||||||
|    |    | ||||||
|   template<class obj> void GlobalSum(obj &o){ |   template<class obj> void GlobalSum(obj &o){ | ||||||
|     typedef typename obj::scalar_type scalar_type; |     typedef typename obj::scalar_type scalar_type; | ||||||
| @@ -237,21 +207,14 @@ class CartesianCommunicator { | |||||||
|    |    | ||||||
|   void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); |   void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||||
|  |  | ||||||
|   double StencilSendToRecvFrom(void *xmit, |  | ||||||
| 			       int xmit_to_rank, |  | ||||||
| 			       void *recv, |  | ||||||
| 			       int recv_from_rank, |  | ||||||
| 			       int bytes,int dir); |  | ||||||
|  |  | ||||||
|   double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, |   double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 				    void *xmit, | 				  void *xmit, | ||||||
| 				    int xmit_to_rank, | 				  int xmit_to_rank, | ||||||
| 				    void *recv, | 				  void *recv, | ||||||
| 				    int recv_from_rank, | 				  int recv_from_rank, | ||||||
| 				    int bytes,int dir); | 				  int bytes); | ||||||
|    |    | ||||||
|    |   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||||
|   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i); |  | ||||||
|   void StencilBarrier(void); |   void StencilBarrier(void); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////// | ||||||
| @@ -263,27 +226,6 @@ class CartesianCommunicator { | |||||||
|   // Broadcast a buffer and composite larger |   // Broadcast a buffer and composite larger | ||||||
|   //////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////// | ||||||
|   void Broadcast(int root,void* data, int bytes); |   void Broadcast(int root,void* data, int bytes); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|   // All2All down one dimension |  | ||||||
|   //////////////////////////////////////////////////////////// |  | ||||||
|   template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){ |  | ||||||
|     assert(dim>=0); |  | ||||||
|     assert(dim<_ndimension); |  | ||||||
|     int numnode = _processors[dim]; |  | ||||||
|     //    std::cerr << " AllToAll in.size()  "<<in.size()<<std::endl; |  | ||||||
|     //    std::cerr << " AllToAll out.size() "<<out.size()<<std::endl; |  | ||||||
|     assert(in.size()==out.size()); |  | ||||||
|     uint64_t bytes=sizeof(T); |  | ||||||
|     uint64_t words=in.size()/numnode; |  | ||||||
|  |  | ||||||
|     assert(numnode * words == in.size()); |  | ||||||
|     assert(words < (1ULL<<32)); |  | ||||||
|  |  | ||||||
|     AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes); |  | ||||||
|   } |  | ||||||
|   void AllToAll(int dim  ,void *in,void *out,uint64_t words,uint64_t bytes); |  | ||||||
|   void AllToAll(void  *in,void *out,uint64_t words         ,uint64_t bytes); |  | ||||||
|    |    | ||||||
|   template<class obj> void Broadcast(int root,obj &data) |   template<class obj> void Broadcast(int root,obj &data) | ||||||
|     { |     { | ||||||
|   | |||||||
| @@ -53,14 +53,28 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|   ShmInitGeneric(); |   ShmInitGeneric(); | ||||||
| } | } | ||||||
|  |  | ||||||
| CartesianCommunicator::~CartesianCommunicator() | CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||||
| { | { | ||||||
|   int MPI_is_finalised; |   _ndimension = processors.size(); | ||||||
|   MPI_Finalized(&MPI_is_finalised); |   std::vector<int> periodic(_ndimension,1); | ||||||
|   if (communicator && MPI_is_finalised) |  | ||||||
|     MPI_Comm_free(&communicator); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |   _Nprocessors=1; | ||||||
|  |   _processors = processors; | ||||||
|  |   _processor_coor.resize(_ndimension); | ||||||
|  |    | ||||||
|  |   MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); | ||||||
|  |   MPI_Comm_rank(communicator,&_processor); | ||||||
|  |   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<_ndimension;i++){ | ||||||
|  |     _Nprocessors*=_processors[i]; | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   int Size;  | ||||||
|  |   MPI_Comm_size(communicator,&Size); | ||||||
|  |    | ||||||
|  |   assert(Size==_Nprocessors); | ||||||
|  | } | ||||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ | void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| @@ -69,14 +83,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| @@ -196,36 +202,6 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) | |||||||
| 		     root, | 		     root, | ||||||
| 		     communicator); | 		     communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } |  | ||||||
| void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes) |  | ||||||
| { |  | ||||||
|   std::vector<int> row(_ndimension,1); |  | ||||||
|   assert(dim>=0 && dim<_ndimension); |  | ||||||
|  |  | ||||||
|   //  Split the communicator |  | ||||||
|   row[dim] = _processors[dim]; |  | ||||||
|  |  | ||||||
|   int me; |  | ||||||
|   CartesianCommunicator Comm(row,*this,me); |  | ||||||
|   Comm.AllToAll(in,out,words,bytes); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes) |  | ||||||
| { |  | ||||||
|   // MPI is a pain and uses "int" arguments |  | ||||||
|   // 64*64*64*128*16 == 500Million elements of data. |  | ||||||
|   // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. |  | ||||||
|   // (Turns up on 32^3 x 64 Gparity too) |  | ||||||
|   MPI_Datatype object; |  | ||||||
|   int iwords;  |  | ||||||
|   int ibytes; |  | ||||||
|   iwords = words; |  | ||||||
|   ibytes = bytes; |  | ||||||
|   assert(words == iwords); // safe to cast to int ? |  | ||||||
|   assert(bytes == ibytes); // safe to cast to int ? |  | ||||||
|   MPI_Type_contiguous(ibytes,MPI_BYTE,&object); |  | ||||||
|   MPI_Type_commit(&object); |  | ||||||
|   MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); |  | ||||||
|   MPI_Type_free(&object); |  | ||||||
| } | } | ||||||
|   /////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////// | ||||||
|   // Should only be used prior to Grid Init finished. |   // Should only be used prior to Grid Init finished. | ||||||
| @@ -246,7 +222,5 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | |||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -37,12 +37,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <sys/ipc.h> | #include <sys/ipc.h> | ||||||
| #include <sys/shm.h> | #include <sys/shm.h> | ||||||
| #include <sys/mman.h> | #include <sys/mman.h> | ||||||
| #include <zlib.h> | //#include <zlib.h> | ||||||
| #ifdef HAVE_NUMAIF_H | #ifndef SHM_HUGETLB | ||||||
| #include <numaif.h> | #define SHM_HUGETLB 04000 | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -66,7 +65,6 @@ std::vector<int> CartesianCommunicator::MyGroup; | |||||||
| std::vector<void *> CartesianCommunicator::ShmCommBufs; | std::vector<void *> CartesianCommunicator::ShmCommBufs; | ||||||
|  |  | ||||||
| int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | ||||||
| int CartesianCommunicator::RankCount(void)    { return WorldSize;}; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef FORCE_COMMS | #undef FORCE_COMMS | ||||||
| @@ -198,46 +196,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|   ShmCommBuf = 0; |   ShmCommBuf = 0; | ||||||
|   ShmCommBufs.resize(ShmSize); |   ShmCommBufs.resize(ShmSize); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | #if 1 | ||||||
|   // Hugetlbf and others map filesystems as mappable huge pages |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| #ifdef GRID_MPI3_SHMMMAP |  | ||||||
|   char shm_name [NAME_MAX]; |  | ||||||
|   for(int r=0;r<ShmSize;r++){ |  | ||||||
|      |  | ||||||
|     size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; |  | ||||||
|     sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",GroupRank,r); |  | ||||||
|     //sprintf(shm_name,"/var/lib/hugetlbfs/group/wheel/pagesize-2MB/" "Grid_mpi3_shm_%d_%d",GroupRank,r); |  | ||||||
|     //    printf("Opening file %s \n",shm_name); |  | ||||||
|     int fd=open(shm_name,O_RDWR|O_CREAT,0666); |  | ||||||
|     if ( fd == -1) {  |  | ||||||
|       printf("open %s failed\n",shm_name); |  | ||||||
|       perror("open hugetlbfs"); |  | ||||||
|       exit(0); |  | ||||||
|     } |  | ||||||
|     int mmap_flag = MAP_SHARED ; |  | ||||||
| #ifdef MAP_POPULATE     |  | ||||||
|     mmap_flag|=MAP_POPULATE; |  | ||||||
| #endif |  | ||||||
| #ifdef MAP_HUGETLB |  | ||||||
|     if ( Hugepages ) mmap_flag |= MAP_HUGETLB; |  | ||||||
| #endif |  | ||||||
|     void *ptr = (void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);  |  | ||||||
|     if ( ptr == (void *)MAP_FAILED ) {     |  | ||||||
|       printf("mmap %s failed\n",shm_name); |  | ||||||
|       perror("failed mmap");      assert(0);     |  | ||||||
|     } |  | ||||||
|     assert(((uint64_t)ptr&0x3F)==0); |  | ||||||
|     ShmCommBufs[r] =ptr; |  | ||||||
|      |  | ||||||
|   } |  | ||||||
| #endif |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case |  | ||||||
|   // tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for  |  | ||||||
|   // the posix shm virtual file system |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| #ifdef GRID_MPI3_SHMOPEN |  | ||||||
|   char shm_name [NAME_MAX]; |   char shm_name [NAME_MAX]; | ||||||
|   if ( ShmRank == 0 ) { |   if ( ShmRank == 0 ) { | ||||||
|     for(int r=0;r<ShmSize;r++){ |     for(int r=0;r<ShmSize;r++){ | ||||||
| @@ -250,39 +209,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); |       int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); | ||||||
|       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } |       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } | ||||||
|       ftruncate(fd, size); |       ftruncate(fd, size); | ||||||
|        |  | ||||||
|       int mmap_flag = MAP_SHARED; |  | ||||||
| #ifdef MAP_POPULATE  |  | ||||||
|       mmap_flag |= MAP_POPULATE; |  | ||||||
| #endif |  | ||||||
| #ifdef MAP_HUGETLB |  | ||||||
|       if (Hugepages) mmap_flag |= MAP_HUGETLB; |  | ||||||
| #endif |  | ||||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); |  | ||||||
|  |  | ||||||
|       if ( ptr == (void * )MAP_FAILED ) {       perror("failed mmap");      assert(0);    } |       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||||||
|  |       if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    } | ||||||
|       assert(((uint64_t)ptr&0x3F)==0); |       assert(((uint64_t)ptr&0x3F)==0); | ||||||
|  |       ShmCommBufs[r] =ptr; | ||||||
| // Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h |  | ||||||
| #if 0 |  | ||||||
| //#ifdef HAVE_NUMAIF_H |  | ||||||
| 	int status; |  | ||||||
| 	int flags=MPOL_MF_MOVE; |  | ||||||
| #ifdef KNL |  | ||||||
| 	int nodes=1; // numa domain == MCDRAM |  | ||||||
| 	// Find out if in SNC2,SNC4 mode ? |  | ||||||
| #else |  | ||||||
| 	int nodes=r; // numa domain == MPI ID |  | ||||||
| #endif |  | ||||||
| 	unsigned long count=1; |  | ||||||
| 	for(uint64_t page=0;page<size;page+=4096){ |  | ||||||
| 	  void *pages = (void *) ( page + (uint64_t)ptr ); |  | ||||||
| 	  uint64_t *cow_it = (uint64_t *)pages;	*cow_it = 1; |  | ||||||
| 	  ierr= move_pages(0,count, &pages,&nodes,&status,flags); |  | ||||||
| 	  if (ierr && (page==0)) perror("numa relocate command failed"); |  | ||||||
| 	} |  | ||||||
| #endif |  | ||||||
| 	ShmCommBufs[r] =ptr; |  | ||||||
|        |        | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -304,32 +235,21 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       ShmCommBufs[r] =ptr; |       ShmCommBufs[r] =ptr; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| #endif |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | #else | ||||||
|   // SHMGET SHMAT and SHM_HUGETLB flag |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| #ifdef GRID_MPI3_SHMGET |  | ||||||
|   std::vector<int> shmids(ShmSize); |   std::vector<int> shmids(ShmSize); | ||||||
|  |  | ||||||
|   if ( ShmRank == 0 ) { |   if ( ShmRank == 0 ) { | ||||||
|     for(int r=0;r<ShmSize;r++){ |     for(int r=0;r<ShmSize;r++){ | ||||||
|       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; |       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; | ||||||
|       key_t key   = IPC_PRIVATE; |       key_t key   = 0x4545 + r; | ||||||
|       int flags = IPC_CREAT | SHM_R | SHM_W; |       if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { | ||||||
| #ifdef SHM_HUGETLB |  | ||||||
|       if (Hugepages) flags|=SHM_HUGETLB; |  | ||||||
| #endif |  | ||||||
|       if ((shmids[r]= shmget(key,size, flags)) ==-1) { |  | ||||||
| 	int errsv = errno; | 	int errsv = errno; | ||||||
| 	printf("Errno %d\n",errsv); | 	printf("Errno %d\n",errsv); | ||||||
| 	printf("key   %d\n",key); |  | ||||||
| 	printf("size  %lld\n",size); |  | ||||||
| 	printf("flags %d\n",flags); |  | ||||||
| 	perror("shmget"); | 	perror("shmget"); | ||||||
| 	exit(1); | 	exit(1); | ||||||
|       } else {  |  | ||||||
| 	printf("shmid: 0x%x\n", shmids[r]); |  | ||||||
|       } |       } | ||||||
|  |       printf("shmid: 0x%x\n", shmids[r]); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   MPI_Barrier(ShmComm); |   MPI_Barrier(ShmComm); | ||||||
| @@ -450,27 +370,12 @@ void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c | |||||||
|   assert(lr!=-1); |   assert(lr!=-1); | ||||||
|   Lexicographic::CoorFromIndex(coor,lr,_processors); |   Lexicographic::CoorFromIndex(coor,lr,_processors); | ||||||
| } | } | ||||||
|  |  | ||||||
| ////////////////////////////////// |  | ||||||
| // Try to subdivide communicator |  | ||||||
| ////////////////////////////////// |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  |  | ||||||
|   : CartesianCommunicator(processors)  |  | ||||||
| { |  | ||||||
|   std::cout << "Attempts to split MPI3 communicators will fail until implemented" <<std::endl; |  | ||||||
| } |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||||
| {  | {  | ||||||
|   int ierr; |   int ierr; | ||||||
|   communicator=communicator_world; |   communicator=communicator_world; | ||||||
|  |  | ||||||
|   _ndimension = processors.size(); |   _ndimension = processors.size(); | ||||||
|  |  | ||||||
|   communicator_halo.resize (2*_ndimension); |  | ||||||
|   for(int i=0;i<_ndimension*2;i++){ |  | ||||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
|   // Assert power of two shm_size. |   // Assert power of two shm_size. | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
| @@ -604,14 +509,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| @@ -693,28 +590,13 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, |  | ||||||
| 						     int dest, |  | ||||||
| 						     void *recv, |  | ||||||
| 						     int from, |  | ||||||
| 						     int bytes,int dir) |  | ||||||
| { |  | ||||||
|   std::vector<CommsRequest_t> list; |  | ||||||
|   double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); |  | ||||||
|   StencilSendToRecvFromComplete(list,dir); |  | ||||||
|   return offbytes; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 							 void *xmit, | 						       void *xmit, | ||||||
| 							 int dest, | 						       int dest, | ||||||
| 							 void *recv, | 						       void *recv, | ||||||
| 							 int from, | 						       int from, | ||||||
| 							 int bytes,int dir) | 						       int bytes) | ||||||
| { | { | ||||||
|   int ncomm  =communicator_halo.size();  |  | ||||||
|   int commdir=dir%ncomm; |  | ||||||
|  |  | ||||||
|   MPI_Request xrq; |   MPI_Request xrq; | ||||||
|   MPI_Request rrq; |   MPI_Request rrq; | ||||||
|  |  | ||||||
| @@ -733,26 +615,26 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|   gfrom = MPI_UNDEFINED; |   gfrom = MPI_UNDEFINED; | ||||||
| #endif | #endif | ||||||
|   if ( gfrom ==MPI_UNDEFINED) { |   if ( gfrom ==MPI_UNDEFINED) { | ||||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq); |     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(rrq); |     list.push_back(rrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( gdest == MPI_UNDEFINED ) { |   if ( gdest == MPI_UNDEFINED ) { | ||||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq); |     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(xrq); |     list.push_back(xrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) {  |   if ( CommunicatorPolicy == CommunicatorPolicySequential ) {  | ||||||
|     this->StencilSendToRecvFromComplete(list,dir); |     this->StencilSendToRecvFromComplete(list); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   return off_node_bytes; |   return off_node_bytes; | ||||||
| } | } | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | ||||||
| { | { | ||||||
|   SendToRecvFromComplete(waitall); |   SendToRecvFromComplete(waitall); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,268 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/communicator/Communicator_mpi.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/GridCore.h> |  | ||||||
| #include <Grid/GridQCDcore.h> |  | ||||||
| #include <Grid/qcd/action/ActionCore.h> |  | ||||||
| #include <mpi.h> |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Info that is setup once and indept of cartesian layout |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| MPI_Comm CartesianCommunicator::communicator_world; |  | ||||||
|  |  | ||||||
| // Should error check all MPI calls. |  | ||||||
| void CartesianCommunicator::Init(int *argc, char ***argv) { |  | ||||||
|   int flag; |  | ||||||
|   int provided; |  | ||||||
|   MPI_Initialized(&flag); // needed to coexist with other libs apparently |  | ||||||
|   if ( !flag ) { |  | ||||||
|     MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); |  | ||||||
|     if ( provided != MPI_THREAD_MULTIPLE ) { |  | ||||||
|       QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); |  | ||||||
|   ShmInitGeneric(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| CartesianCommunicator::~CartesianCommunicator() |  | ||||||
| { |  | ||||||
|   if (communicator && !MPI::Is_finalized()) |  | ||||||
|     MPI_Comm_free(&communicator); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSumVector(float *f,int N) |  | ||||||
| { |  | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSum(double &d) |  | ||||||
| { |  | ||||||
|   int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::GlobalSumVector(double *d,int N) |  | ||||||
| { |  | ||||||
|   int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) |  | ||||||
| { |  | ||||||
|   int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
| int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) |  | ||||||
| { |  | ||||||
|   int rank; |  | ||||||
|   int ierr=MPI_Cart_rank  (communicator, &coor[0], &rank); |  | ||||||
|   assert(ierr==0); |  | ||||||
|   return rank; |  | ||||||
| } |  | ||||||
| void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) |  | ||||||
| { |  | ||||||
|   coor.resize(_ndimension); |  | ||||||
|   int ierr=MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Basic Halo comms primitive |  | ||||||
| void CartesianCommunicator::SendToRecvFrom(void *xmit, |  | ||||||
| 					   int dest, |  | ||||||
| 					   void *recv, |  | ||||||
| 					   int from, |  | ||||||
| 					   int bytes) |  | ||||||
| { |  | ||||||
|   std::vector<CommsRequest_t> reqs(0); |  | ||||||
|   SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); |  | ||||||
|   SendToRecvFromComplete(reqs); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, |  | ||||||
| 					   void *recv, |  | ||||||
| 					   int sender, |  | ||||||
| 					   int receiver, |  | ||||||
| 					   int bytes) |  | ||||||
| { |  | ||||||
|   MPI_Status stat; |  | ||||||
|   assert(sender != receiver); |  | ||||||
|   int tag = sender; |  | ||||||
|   if ( _processor == sender ) { |  | ||||||
|     MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); |  | ||||||
|   } |  | ||||||
|   if ( _processor == receiver ) {  |  | ||||||
|     MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Basic Halo comms primitive |  | ||||||
| void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, |  | ||||||
| 						void *xmit, |  | ||||||
| 						int dest, |  | ||||||
| 						void *recv, |  | ||||||
| 						int from, |  | ||||||
| 						int bytes) |  | ||||||
| { |  | ||||||
|   int myrank = _processor; |  | ||||||
|   int ierr; |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  |  | ||||||
|     MPI_Request xrq; |  | ||||||
|     MPI_Request rrq; |  | ||||||
|  |  | ||||||
|     ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); |  | ||||||
|     ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); |  | ||||||
|      |  | ||||||
|     assert(ierr==0); |  | ||||||
|     list.push_back(xrq); |  | ||||||
|     list.push_back(rrq); |  | ||||||
|   } else {  |  | ||||||
|     // Give the CPU to MPI immediately; can use threads to overlap optionally |  | ||||||
|     ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, |  | ||||||
| 		      recv,bytes,MPI_CHAR,from, from, |  | ||||||
| 		      communicator,MPI_STATUS_IGNORE); |  | ||||||
|     assert(ierr==0); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) |  | ||||||
| { |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  |  | ||||||
|     int nreq=list.size(); |  | ||||||
|     std::vector<MPI_Status> status(nreq); |  | ||||||
|     int ierr = MPI_Waitall(nreq,&list[0],&status[0]); |  | ||||||
|     assert(ierr==0); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::Barrier(void) |  | ||||||
| { |  | ||||||
|   int ierr = MPI_Barrier(communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::Broadcast(int root,void* data, int bytes) |  | ||||||
| { |  | ||||||
|   int ierr=MPI_Bcast(data, |  | ||||||
| 		     bytes, |  | ||||||
| 		     MPI_BYTE, |  | ||||||
| 		     root, |  | ||||||
| 		     communicator); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   // Should only be used prior to Grid Init finished. |  | ||||||
|   // Check for this? |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
| int CartesianCommunicator::RankWorld(void){  |  | ||||||
|   int r;  |  | ||||||
|   MPI_Comm_rank(communicator_world,&r); |  | ||||||
|   return r; |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) |  | ||||||
| { |  | ||||||
|   int ierr= MPI_Bcast(data, |  | ||||||
| 		      bytes, |  | ||||||
| 		      MPI_BYTE, |  | ||||||
| 		      root, |  | ||||||
| 		      communicator_world); |  | ||||||
|   assert(ierr==0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, |  | ||||||
| 							 void *xmit, |  | ||||||
| 							 int xmit_to_rank, |  | ||||||
| 							 void *recv, |  | ||||||
| 							 int recv_from_rank, |  | ||||||
| 							 int bytes,int dir) |  | ||||||
| { |  | ||||||
|   int myrank = _processor; |  | ||||||
|   int ierr; |  | ||||||
|   int ncomm  =communicator_halo.size();  |  | ||||||
|   int commdir=dir%ncomm; |  | ||||||
|    |  | ||||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; |  | ||||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally |  | ||||||
|   MPI_Request req[2]; |  | ||||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]); |  | ||||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[commdir],&req[0]); |  | ||||||
|  |  | ||||||
|   list.push_back(req[0]); |  | ||||||
|   list.push_back(req[1]); |  | ||||||
|   return 2.0*bytes; |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) |  | ||||||
| {  |  | ||||||
|   int nreq=waitall.size(); |  | ||||||
|   MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); |  | ||||||
| }; |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, |  | ||||||
| 						    int xmit_to_rank, |  | ||||||
| 						    void *recv, |  | ||||||
| 						    int recv_from_rank, |  | ||||||
| 						    int bytes,int dir) |  | ||||||
| { |  | ||||||
|   int myrank = _processor; |  | ||||||
|   int ierr; |  | ||||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo.size()<< <std::endl; |  | ||||||
|  |  | ||||||
|   int ncomm  =communicator_halo.size();  |  | ||||||
|   int commdir=dir%ncomm; |  | ||||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally |  | ||||||
|   MPI_Request req[2]; |  | ||||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]); |  | ||||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[commdir],&req[0]); |  | ||||||
|   MPI_Waitall(2, req, MPI_STATUSES_IGNORE); |  | ||||||
|   return 2.0*bytes; |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| @@ -38,9 +38,6 @@ void CartesianCommunicator::Init(int *argc, char *** arv) | |||||||
|   ShmInitGeneric(); |   ShmInitGeneric(); | ||||||
| } | } | ||||||
|  |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)  |  | ||||||
|   : CartesianCommunicator(processors) { srank=0;} |  | ||||||
|  |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||||
| { | { | ||||||
|   _processors = processors; |   _processors = processors; | ||||||
| @@ -56,16 +53,12 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| CartesianCommunicator::~CartesianCommunicator(){} |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::GlobalSum(float &){} | void CartesianCommunicator::GlobalSum(float &){} | ||||||
| void CartesianCommunicator::GlobalSumVector(float *,int N){} | void CartesianCommunicator::GlobalSumVector(float *,int N){} | ||||||
| void CartesianCommunicator::GlobalSum(double &){} | void CartesianCommunicator::GlobalSum(double &){} | ||||||
| void CartesianCommunicator::GlobalSum(uint32_t &){} | void CartesianCommunicator::GlobalSum(uint32_t &){} | ||||||
| void CartesianCommunicator::GlobalSum(uint64_t &){} | void CartesianCommunicator::GlobalSum(uint64_t &){} | ||||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||||
| void CartesianCommunicator::GlobalXOR(uint32_t &){} |  | ||||||
| void CartesianCommunicator::GlobalXOR(uint64_t &){} |  | ||||||
|  |  | ||||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||||
| 					   void *recv, | 					   void *recv, | ||||||
| @@ -100,14 +93,6 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> & | |||||||
| { | { | ||||||
|   assert(0); |   assert(0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes) |  | ||||||
| { |  | ||||||
|   bcopy(in,out,bytes*words); |  | ||||||
| } |  | ||||||
| void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes) |  | ||||||
| { |  | ||||||
|   bcopy(in,out,bytes*words); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int  CartesianCommunicator::RankWorld(void){return 0;} | int  CartesianCommunicator::RankWorld(void){return 0;} | ||||||
| void CartesianCommunicator::Barrier(void){} | void CartesianCommunicator::Barrier(void){} | ||||||
|   | |||||||
| @@ -75,11 +75,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|   ShmInitGeneric(); |   ShmInitGeneric(); | ||||||
| } | } | ||||||
|  |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)  |  | ||||||
|   : CartesianCommunicator(processors)  |  | ||||||
| { |  | ||||||
|   std::cout << "Attempts to split SHMEM communicators will fail " <<std::endl; |  | ||||||
| } |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||||
| { | { | ||||||
|   _ndimension = processors.size(); |   _ndimension = processors.size(); | ||||||
|   | |||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user