mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-24 17:54:47 +01:00 
			
		
		
		
	Compare commits
	
		
			276 Commits
		
	
	
		
			release/v0
			...
			dirac-ITT-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | c709883f3f | ||
|  | aed5de4d50 | ||
|  | ba27cc6571 | ||
|  | d75369cb56 | ||
|  | bf973d0d56 | ||
|  | 837bf8a5be | ||
|  | c05b2199f6 | ||
|  | b331be9101 | ||
|  | 49c20a9fa8 | ||
|  | 7359df3501 | ||
|  | 5b9267e88d | ||
|  | 15fd4003ef | ||
|  | 73aeca7dea | ||
|  | ad89abb018 | ||
|  | 80c5bce5bb | ||
|  | f68b5de9c8 | ||
|  | d0f3d525d5 | ||
|  | 3a58217405 | ||
|  | c289699d9a | ||
|  | c3b1263e75 | ||
| 102ea9ae66 | |||
|  | 5fa386ddc9 | ||
|  | d9cd4f0273 | ||
|  | b49bec0cec | ||
|  | ae56e556c6 | ||
|  | 1cdf999668 | ||
|  | 11062fb686 | ||
|  | 383ca7d392 | ||
|  | a446d95c33 | ||
|  | be66e7dd95 | ||
|  | 6d0d064a6c | ||
|  | bfef525ed2 | ||
|  | 0b0cf62193 | ||
|  | 7d88198387 | ||
|  | 2f619482b8 | ||
|  | d6472eda8d | ||
|  | 9e658de238 | ||
|  | bcefdd7c4e | ||
|  | fd367d8bfd | ||
|  | 8a3fe60a27 | ||
|  | 44051aecd1 | ||
|  | 06e6f8de00 | ||
|  | dbe4d7850c | ||
|  | 4fe182e5a7 | ||
|  | 175f393f9d | ||
|  | 14d53e1c9e | ||
|  | 8bd869da37 | ||
|  | c7036f6717 | ||
|  | c0485d799d | ||
|  | 7abc5613bd | ||
|  | 237cfd11ab | ||
|  | a4b7dddb67 | ||
|  | 5696781862 | ||
| c3f0889eda | |||
|  | 0f214ad427 | ||
|  | fe4912880d | ||
|  | f038c6babe | ||
|  | 169f4b2711 | ||
|  | 2d8aff36fe | ||
|  | 659d7d1a40 | ||
|  | dc6f078246 | ||
|  | 8a4714a4a6 | ||
|  | 40e119c61c | ||
|  | 7b0237b081 | ||
|  | b68ad0cc0b | ||
|  | 37263fd9b1 | ||
|  | 3d09e3e9e0 | ||
|  | 1354b46338 | ||
|  | 251a97fe1b | ||
|  | e18929eaa0 | ||
|  | f3b0a92e71 | ||
|  | a0be3f7330 | ||
|  | b5a6e4f1fd | ||
|  | 7a788db3dc | ||
|  | f20eceb6cd | ||
|  | 38325ebbc6 | ||
|  | b73bd151bb | ||
|  | 694b305cab | ||
|  | 2d3737a133 | ||
|  | ac1f1838bc | ||
|  | 09d09d0fe5 | ||
|  | bf630a6821 | ||
|  | 8859a151cc | ||
|  | 688a39cfd9 | ||
|  | 6f5a5cd9b3 | ||
|  | 0933aeefd4 | ||
|  | 322f61acee | ||
|  | 08e04b9676 | ||
| feaa2ac947 | |||
| 07de925127 | |||
|  | a9c816a268 | ||
|  | e43a8b6b8a | ||
|  | bf729766dd | ||
|  | dafb351d38 | ||
| 0b707b861c | |||
| 15e87a4607 | |||
| 7d7220cbd7 | |||
|  | 54e94360ad | ||
| 0af740dc15 | |||
| d2e8372df3 | |||
|  | 869b99ec1e | ||
| 4372d04ad4 | |||
|  | 56abbdf4c2 | ||
|  | af71c63f4c | ||
|  | 0440d4ce66 | ||
| b22eab8c8b | |||
|  | a7d56523ab | ||
|  | 9e56c65730 | ||
|  | ef4f2b8c41 | ||
|  | e8b95bd35b | ||
|  | 7e35286860 | ||
|  | 0486ff8e79 | ||
| 1e8a2e1621 | |||
| 7587df831a | |||
|  | e9cc21900f | ||
|  | 0a8faac271 | ||
|  | abc4de0fd2 | ||
|  | cfe3cd76d1 | ||
|  | 3fa5e3109f | ||
|  | 8b7049f737 | ||
|  | c85024683e | ||
|  | 1300b0b04b | ||
|  | e6d984b484 | ||
|  | 1d18d95d4f | ||
|  | ae39ec85a3 | ||
|  | b96daf53a0 | ||
|  | 46879e1658 | ||
|  | ae4de94798 | ||
|  | 0ab555b4f5 | ||
|  | 8e9be9f84f | ||
|  | d572170170 | ||
| 81b18f843a | |||
|  | a833f88c32 | ||
|  | 07b2c1b253 | ||
|  | 735cbdb983 | ||
|  | 2ad54c5a02 | ||
|  | 12ccc73cf5 | ||
|  | 3d04dc33c6 | ||
|  | e7564f8330 | ||
|  | 91199a8ea0 | ||
|  | 0494feec98 | ||
|  | a16b1e134e | ||
|  | 769ad578f5 | ||
|  | eaac0044b5 | ||
|  | 56042f002c | ||
|  | 3bfd1f13e6 | ||
|  | 70ab598c96 | ||
|  | 1d0ca65e28 | ||
|  | 2bc4d0a20e | ||
| 2490816297 | |||
| 5f55bca378 | |||
| f6aa82b7f2 | |||
| 22749699a3 | |||
| 0503c028be | |||
|  | 092dcd4e04 | ||
|  | 4a8c4ccfba | ||
|  | 9b44189d5a | ||
|  | 7da4856e8e | ||
|  | aaf1e33a77 | ||
|  | 094c3d091a | ||
|  | 4b98e524a0 | ||
|  | 1a1f6d55f9 | ||
|  | 21421656ab | ||
|  | 6f687a67cd | ||
|  | b30754e762 | ||
|  | 1e429a0d57 | ||
|  | d38a4de36c | ||
|  | ef1b7db374 | ||
|  | 53a9aeb965 | ||
|  | e30fa9f4b8 | ||
|  | 58e8d0a10d | ||
|  | 62cf9cf638 | ||
|  | 0fb458879d | ||
|  | 725c513d94 | ||
| d8648307ff | |||
| 064315c00b | |||
|  | 7c6cc85df6 | ||
|  | a6691ef87c | ||
|  | 8e0ced627a | ||
|  | 0de314870d | ||
|  | ffb91e53d2 | ||
|  | f4e8bf2858 | ||
| a74c34315c | |||
|  | 69470ccc10 | ||
|  | b8b5934193 | ||
|  | 75856f2945 | ||
|  | 3c112a7a25 | ||
|  | ab3596d4d3 | ||
|  | a8c10b1933 | ||
|  | 15e801af3f | ||
|  | 0ffc235741 | ||
|  | 8e19c99c7d | ||
|  | a0bc0ad06f | ||
|  | a8fb2835ca | ||
|  | bc862ce3ab | ||
| 22f4feee7b | |||
| 3f858d6755 | |||
|  | 3267683e22 | ||
|  | f46a67ffb3 | ||
|  | f7b8383ef5 | ||
|  | 10f2872aae | ||
| 35fa3d1dfd | |||
|  | cd73897b8d | ||
|  | c4435e6beb | ||
| d1ece74137 | |||
| 43c817cc67 | |||
|  | 51bf1501fc | ||
|  | 741bc836f6 | ||
|  | 8546d01a4c | ||
| 1407418755 | |||
| a6a0da873f | |||
|  | 7b03d8d087 | ||
|  | 4b759b8f2a | ||
|  | 038b6ee9cd | ||
|  | 38806343a8 | ||
|  | 831ca4e3bf | ||
| eedcaf6470 | |||
| b39f0d1fb6 | |||
| 9f1267dfe6 | |||
| 2e90285232 | |||
| e254de982e | |||
| 28d99b5297 | |||
|  | ee93f0218b | ||
| 161ed102a5 | |||
|  | f65a585236 | ||
|  | ae99e99da2 | ||
| f3ca29af6c | |||
| 37988221a8 | |||
| 7a327a3f28 | |||
| 92f8950a56 | |||
| 65987a8a58 | |||
| 889d828bc2 | |||
| ad98b6193d | |||
| fc760016b3 | |||
| 2da86f7dae | |||
| 97843e2b58 | |||
| 82b3f54697 | |||
| 673994b281 | |||
| bbc0eff078 | |||
| 4c60e31070 | |||
| afbf7d4c37 | |||
| 8c3cc32364 | |||
| 4c3fd9fa3f | |||
| 17b3a10d46 | |||
| 149a46b92c | |||
| db9c28a773 | |||
| 9ac3ac41df | |||
| 2af9ab9034 | |||
| 6f1ea96293 | |||
| 2e3c5890b6 | |||
| bc6678732f | |||
| b10ae00c8a | |||
|  | 6ad73145bc | ||
| f7293f2ddb | |||
|  | 6b8ee7bae0 | ||
|  | 739c2308b5 | ||
|  | a71b69389b | ||
|  | d49e502f53 | ||
|  | 92ec3404f8 | ||
|  | f4ebea3381 | ||
|  | cf167d0cd1 | ||
|  | c363bdd784 | ||
|  | c30d96ea50 | ||
|  | 7ffe17ada1 | ||
| 330a9b3f4c | |||
|  | 28ff66a381 | ||
|  | 78c7bcee36 | ||
| 00a7b95631 | |||
| 94d8321d01 | |||
|  | ac24cc9f99 | ||
|  | 3ab4c8c0bb | ||
| 26d124283e | |||
| 0d889b7041 | |||
| ab31ad006a | |||
| 6e4a06e180 | |||
|  | 446c768cd3 | 
							
								
								
									
										68
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										68
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -9,68 +9,6 @@ matrix: | ||||
|     - os:        osx | ||||
|       osx_image: xcode8.3 | ||||
|       compiler: clang | ||||
|     - compiler: gcc | ||||
|       dist: trusty | ||||
|       sudo: required | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-4.9 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-4.9 | ||||
|     - compiler: gcc | ||||
|       dist: trusty | ||||
|       sudo: required | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-5 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-5 | ||||
|     - compiler: clang | ||||
|       dist: trusty | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-4.8 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||
|     - compiler: clang | ||||
|       dist: trusty | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-4.8 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||
|        | ||||
| before_install: | ||||
|     - export GRIDDIR=`pwd` | ||||
| @@ -106,9 +44,3 @@ script: | ||||
|     - make -j4 | ||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||
|     - make check | ||||
|     - echo make clean | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										281
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										281
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,27 +1,44 @@ | ||||
| # Grid | ||||
| <table> | ||||
| <tr> | ||||
|     <td>Last stable release</td> | ||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a> | ||||
|     </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td>Development branch</td> | ||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a> | ||||
|     </td> | ||||
| </tr> | ||||
| </table> | ||||
| # Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid) | ||||
|  | ||||
| **Data parallel C++ mathematical object library.** | ||||
|  | ||||
| License: GPL v2. | ||||
|  | ||||
| Last update Nov 2016. | ||||
| Last update June 2017. | ||||
|  | ||||
| _Please do not send pull requests to the `master` branch which is reserved for releases._ | ||||
|  | ||||
|  | ||||
|  | ||||
| ### Description | ||||
| This library provides data parallel C++ container classes with internal memory layout | ||||
| that is transformed to map efficiently to SIMD architectures. CSHIFT facilities | ||||
| are provided, similar to HPF and cmfortran, and user control is given over the mapping of | ||||
| array indices to both MPI tasks and SIMD processing elements. | ||||
|  | ||||
| * Identically shaped arrays then be processed with perfect data parallelisation. | ||||
| * Such identically shaped arrays are called conformable arrays. | ||||
|  | ||||
| The transformation is based on the observation that Cartesian array processing involves | ||||
| identical processing to be performed on different regions of the Cartesian array. | ||||
|  | ||||
| The library will both geometrically decompose into MPI tasks and across SIMD lanes. | ||||
| Local vector loops are parallelised with OpenMP pragmas. | ||||
|  | ||||
| Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but | ||||
| optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification | ||||
| for most programmers. | ||||
|  | ||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||
| Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. | ||||
|  | ||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.  | ||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||
|  | ||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | ||||
| Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. | ||||
|  | ||||
|  | ||||
| ### Compilers | ||||
|  | ||||
| Intel ICPC v16.0.3 and later | ||||
| @@ -56,35 +73,25 @@ When you file an issue, please go though the following checklist: | ||||
| 6. Attach the output of `make V=1`. | ||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||
|  | ||||
| ### Required libraries | ||||
| Grid requires: | ||||
|  | ||||
| [GMP](https://gmplib.org/),  | ||||
|  | ||||
| ### Description | ||||
| This library provides data parallel C++ container classes with internal memory layout | ||||
| that is transformed to map efficiently to SIMD architectures. CSHIFT facilities | ||||
| are provided, similar to HPF and cmfortran, and user control is given over the mapping of | ||||
| array indices to both MPI tasks and SIMD processing elements. | ||||
| [MPFR](http://www.mpfr.org/)  | ||||
|  | ||||
| * Identically shaped arrays then be processed with perfect data parallelisation. | ||||
| * Such identically shaped arrays are called conformable arrays. | ||||
| Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. | ||||
|  | ||||
| The transformation is based on the observation that Cartesian array processing involves | ||||
| identical processing to be performed on different regions of the Cartesian array. | ||||
| Grid optionally uses: | ||||
|  | ||||
| The library will both geometrically decompose into MPI tasks and across SIMD lanes. | ||||
| Local vector loops are parallelised with OpenMP pragmas. | ||||
| [HDF5](https://support.hdfgroup.org/HDF5/)   | ||||
|  | ||||
| Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but | ||||
| optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification | ||||
| for most programmers. | ||||
| [LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.  | ||||
|  | ||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||
| Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). | ||||
| [FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. | ||||
|  | ||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. | ||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||
| LAPACK either generic version or Intel MKL library. | ||||
|  | ||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | ||||
| Please see https://arxiv.org/abs/1512.03487 for more detail. | ||||
|  | ||||
| ### Quick start | ||||
| First, start by cloning the repository: | ||||
| @@ -155,7 +162,6 @@ The following options can be use with the `--enable-comms=` option to target dif | ||||
| | `none`         | no communications                                             | | ||||
| | `mpi[-auto]`   | MPI communications                                            | | ||||
| | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | ||||
| | `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | | ||||
| | `shmem `       | Cray SHMEM communications                                     | | ||||
|  | ||||
| For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | ||||
| @@ -173,7 +179,8 @@ The following options can be use with the `--enable-simd=` option to target diff | ||||
| | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | ||||
| | `AVX2`      | AVX 2 (256 bit)                        | | ||||
| | `AVX512`    | AVX 512 bit                            | | ||||
| | `QPX`       | QPX (256 bit)                          | | ||||
| | `NEONv8`    | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit)                     | | ||||
| | `QPX`       | IBM QPX (256 bit)                      | | ||||
|  | ||||
| Alternatively, some CPU codenames can be directly used: | ||||
|  | ||||
| @@ -195,21 +202,205 @@ The following configuration is recommended for the Intel Knights Landing platfor | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=KNL        \ | ||||
|              --enable-comms=mpi-auto \ | ||||
|              --with-gmp=<path>        \ | ||||
|              --with-mpfr=<path>       \ | ||||
|              --enable-comms=mpi-auto  \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=icpc MPICXX=mpiicpc | ||||
| ``` | ||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||
|  | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=KNL        \ | ||||
|              --enable-comms=mpi       \ | ||||
|              --with-gmp=<path>        \ | ||||
|              --with-mpfr=<path>       \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=CC CC=cc | ||||
| ``` | ||||
| ``` | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| Knight's Landing with Intel Omnipath adapters with two adapters per node  | ||||
| presently performs better with use of more than one rank per node, using shared memory  | ||||
| for interior communication. This is the mpi3 communications implementation.  | ||||
| We recommend four ranks per node for best performance, but optimum is local volume dependent. | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=KNL        \ | ||||
|              --enable-comms=mpi3-auto \ | ||||
|              --enable-mkl             \ | ||||
|              CC=icpc MPICXX=mpiicpc  | ||||
| ``` | ||||
|  | ||||
| ### Build setup for Intel Haswell Xeon platform | ||||
|  | ||||
| The following configuration is recommended for the Intel Haswell platform: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX2       \ | ||||
|              --enable-comms=mpi3-auto \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=icpc MPICXX=mpiicpc | ||||
| ``` | ||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX2       \ | ||||
|              --enable-comms=mpi3      \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=CC CC=cc | ||||
| ``` | ||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  | ||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using | ||||
| ``` | ||||
|         export I_MPI_PIN=1 | ||||
| ``` | ||||
| This is the default. | ||||
|  | ||||
| ### Build setup for Intel Skylake Xeon platform | ||||
|  | ||||
| The following configuration is recommended for the Intel Skylake platform: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX512     \ | ||||
|              --enable-comms=mpi3      \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=mpiicpc | ||||
| ``` | ||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX512     \ | ||||
|              --enable-comms=mpi3      \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=CC CC=cc | ||||
| ``` | ||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  | ||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using | ||||
| ```  | ||||
|         export I_MPI_PIN=1 | ||||
| ``` | ||||
| This is the default.  | ||||
|  | ||||
| #### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):  | ||||
|  | ||||
| mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18  | ||||
|  | ||||
| TBA | ||||
|  | ||||
|  | ||||
| ### Build setup for AMD EPYC / RYZEN | ||||
|  | ||||
| The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. | ||||
| So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total | ||||
| are common. Each chip within the module exposes a separate NUMA domain. | ||||
| There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. | ||||
| MPI-3 is recommended with the use of four ranks per socket, | ||||
| and 8 threads per rank.  | ||||
|  | ||||
| The following configuration is recommended for the AMD EPYC platform. | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX2       \ | ||||
|              --enable-comms=mpi3 \ | ||||
|              CXX=mpicxx  | ||||
| ``` | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. | ||||
| This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.  | ||||
|  | ||||
| It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and | ||||
| shared memory to communicate within this node: | ||||
|  | ||||
| mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4  | ||||
|  | ||||
| Where omp_bind.sh does the following: | ||||
| ``` | ||||
| #!/bin/bash | ||||
|  | ||||
| numanode=` expr $PMI_RANK % 8 ` | ||||
| basecore=`expr $numanode \* 16` | ||||
| core0=`expr $basecore + 0 ` | ||||
| core1=`expr $basecore + 2 ` | ||||
| core2=`expr $basecore + 4 ` | ||||
| core3=`expr $basecore + 6 ` | ||||
| core4=`expr $basecore + 8 ` | ||||
| core5=`expr $basecore + 10 ` | ||||
| core6=`expr $basecore + 12 ` | ||||
| core7=`expr $basecore + 14 ` | ||||
|  | ||||
| export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" | ||||
| echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY | ||||
|  | ||||
| $@ | ||||
| ``` | ||||
|  | ||||
| Performance: | ||||
|  | ||||
| #### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):  | ||||
|  | ||||
| mpirun  -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 | ||||
|  | ||||
| TBA | ||||
|  | ||||
| ### Build setup for BlueGene/Q | ||||
|  | ||||
| To be written... | ||||
|  | ||||
| ### Build setup for ARM Neon | ||||
|  | ||||
| To be written... | ||||
|  | ||||
| ### Build setup for laptops, other compilers, non-cluster builds | ||||
|  | ||||
| Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), | ||||
| and omit the enable-mkl flag.  | ||||
|  | ||||
| Single node builds are enabled with  | ||||
| ``` | ||||
|             --enable-comms=none | ||||
| ``` | ||||
|  | ||||
| FFTW support that is not in the default search path may then enabled with | ||||
| ``` | ||||
|     --with-fftw=<installpath> | ||||
| ``` | ||||
|  | ||||
| BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. | ||||
|  | ||||
|   | ||||
							
								
								
									
										33
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,23 +1,32 @@ | ||||
| TODO: | ||||
| --------------- | ||||
|  | ||||
| Peter's work list: | ||||
| 2)- Precision conversion and sort out localConvert      <--  | ||||
| 3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started  | ||||
| 4)- Binary I/O speed up & x-strips | ||||
| -- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet | ||||
| -- Physical propagator interface | ||||
| -- Conserved currents | ||||
| -- GaugeFix into central location | ||||
| -- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||
| -- HDCR resume | ||||
| Large item work list: | ||||
|  | ||||
| 1)- BG/Q port and check | ||||
| 2)- Christoph's local basis expansion Lanczos | ||||
| 3)- Precision conversion and sort out localConvert      <-- partial | ||||
|  | ||||
|   - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet | ||||
| 4)- Physical propagator interface | ||||
| 5)- Conserved currents | ||||
| 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||
| 7)- HDCR resume | ||||
|  | ||||
| Recent DONE  | ||||
|  | ||||
| -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O.  <--- DONE | ||||
| -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE | ||||
| -- GaugeFix into central location                      <-- DONE | ||||
| -- Scidac and Ildg metadata handling                   <-- DONE | ||||
| -- Binary I/O MPI2 IO                                  <-- DONE | ||||
| -- Binary I/O speed up & x-strips                      <-- DONE | ||||
| -- Cut down the exterior overhead                      <-- DONE | ||||
| -- Interior legs from SHM comms                        <-- DONE | ||||
| -- Half-precision comms                                <-- DONE | ||||
| -- Merge high precision reduction into develop         | ||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination | ||||
| -- Merge high precision reduction into develop         <-- DONE | ||||
| -- BlockCG, BCGrQ                                      <-- DONE | ||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE | ||||
|    -- slice* linalg routines for multiRHS, BlockCG     | ||||
|  | ||||
| ----- | ||||
|   | ||||
							
								
								
									
										797
									
								
								benchmarks/Benchmark_ITT.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										797
									
								
								benchmarks/Benchmark_ITT.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,797 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./benchmarks/Benchmark_memory_bandwidth.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; | ||||
|  | ||||
|  | ||||
| std::vector<int> L_list; | ||||
| std::vector<int> Ls_list; | ||||
| std::vector<double> mflop_list; | ||||
|  | ||||
| double mflop_ref; | ||||
| double mflop_ref_err; | ||||
|  | ||||
| int NN_global; | ||||
|  | ||||
| struct time_statistics{ | ||||
|   double mean; | ||||
|   double err; | ||||
|   double min; | ||||
|   double max; | ||||
|  | ||||
|   void statistics(std::vector<double> v){ | ||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||
|       mean = sum / v.size(); | ||||
|  | ||||
|       std::vector<double> diff(v.size()); | ||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||
|  | ||||
|       auto result = std::minmax_element(v.begin(), v.end()); | ||||
|       min = *result.first; | ||||
|       max = *result.second; | ||||
| } | ||||
| }; | ||||
|  | ||||
| void comms_header(){ | ||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||
| }; | ||||
|  | ||||
| Gamma::Algebra Gmu [] = { | ||||
|   Gamma::Algebra::GammaX, | ||||
|   Gamma::Algebra::GammaY, | ||||
|   Gamma::Algebra::GammaZ, | ||||
|   Gamma::Algebra::GammaT | ||||
| }; | ||||
| struct controls { | ||||
|   int Opt; | ||||
|   int CommsOverlap; | ||||
|   Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; | ||||
|   //  int HugePages; | ||||
| }; | ||||
|  | ||||
| class Benchmark { | ||||
| public: | ||||
|   static void Decomposition (void ) { | ||||
|  | ||||
|     int threads = GridThread::GetThreads(); | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; | ||||
|     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvReal          : "<<sizeof(vReal )*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvRealD         : "<<sizeof(vRealD)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvComplex       : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvComplexF      : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|   } | ||||
|  | ||||
|   static void Comms(void) | ||||
|   { | ||||
|     int Nloop=200; | ||||
|     int nmu=0; | ||||
|     int maxlat=32; | ||||
|  | ||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); | ||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|  | ||||
|     for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||
|  | ||||
|     std::vector<double> t_time(Nloop); | ||||
|     time_statistics timestat; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|     comms_header(); | ||||
|  | ||||
|     for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|       for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
| 	std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
| 	      lat*mpi_layout[1], | ||||
| 	      lat*mpi_layout[2], | ||||
| 	      lat*mpi_layout[3]}); | ||||
|  | ||||
| 	GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
| 	RealD Nrank = Grid._Nprocessors; | ||||
| 	RealD Nnode = Grid.NodeCount(); | ||||
| 	RealD ppn = Nrank/Nnode; | ||||
|  | ||||
| 	std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
| 	std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| 	Grid.ShmBufferFreeAll(); | ||||
| 	for(int d=0;d<8;d++){ | ||||
| 	  xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	  rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	  bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	  bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	} | ||||
|  | ||||
| 	int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
| 	int ncomm; | ||||
| 	double dbytes; | ||||
| 	std::vector<double> times(Nloop); | ||||
| 	for(int i=0;i<Nloop;i++){ | ||||
|  | ||||
| 	  double start=usecond(); | ||||
|  | ||||
| 	  dbytes=0; | ||||
| 	  ncomm=0; | ||||
|  | ||||
| 	  parallel_for(int dir=0;dir<8;dir++){ | ||||
|  | ||||
| 	    double tbytes; | ||||
| 	    int mu =dir % 4; | ||||
|  | ||||
| 	    if (mpi_layout[mu]>1 ) { | ||||
| 	         | ||||
| 	      int xmit_to_rank; | ||||
| 	      int recv_from_rank; | ||||
| 	      if ( dir == mu ) {  | ||||
| 		int comm_proc=1; | ||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	      } else {  | ||||
| 		int comm_proc = mpi_layout[mu]-1; | ||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	      } | ||||
| 	      tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | ||||
| 						 (void *)&rbuf[dir][0], recv_from_rank, | ||||
| 						 bytes,dir); | ||||
| 	   | ||||
| #ifdef GRID_OMP | ||||
| #pragma omp atomic | ||||
| #endif | ||||
| 	      ncomm++; | ||||
|  | ||||
| #ifdef GRID_OMP | ||||
| #pragma omp atomic | ||||
| #endif | ||||
| 	      dbytes+=tbytes; | ||||
| 	    } | ||||
| 	  } | ||||
| 	  Grid.Barrier(); | ||||
| 	  double stop=usecond(); | ||||
| 	  t_time[i] = stop-start; // microseconds | ||||
| 	} | ||||
|  | ||||
| 	timestat.statistics(t_time); | ||||
| 	//	for(int i=0;i<t_time.size();i++){ | ||||
| 	//	  std::cout << i<<" "<<t_time[i]<<std::endl; | ||||
| 	//	} | ||||
|  | ||||
| 	dbytes=dbytes*ppn; | ||||
| 	double xbytes    = dbytes*0.5; | ||||
| 	double rbytes    = dbytes*0.5; | ||||
| 	double bidibytes = dbytes; | ||||
|  | ||||
| 	std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
| 		 <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
| 		 <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
| 		 <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
| 		 << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
| 		 << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|   | ||||
| 	 | ||||
| 	    } | ||||
|     }     | ||||
|  | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   static void Memory(void) | ||||
|   { | ||||
|     const int Nvec=8; | ||||
|     typedef Lattice< iVector< vReal,Nvec> > LatticeVec; | ||||
|     typedef iVector<vReal,Nvec> Vec; | ||||
|  | ||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); | ||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|  | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|    | ||||
|     uint64_t NP; | ||||
|     uint64_t NN; | ||||
|  | ||||
|  | ||||
|   uint64_t lmax=48; | ||||
| #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) | ||||
|  | ||||
|     GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|     for(int lat=8;lat<=lmax;lat+=4){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       NP= Grid.RankCount(); | ||||
|       NN =Grid.NodeCount(); | ||||
|  | ||||
|       Vec rn ; random(sRNG,rn); | ||||
|  | ||||
|       LatticeVec z(&Grid); z=rn; | ||||
|       LatticeVec x(&Grid); x=rn; | ||||
|       LatticeVec y(&Grid); y=rn; | ||||
|       double a=2.0; | ||||
|  | ||||
|       uint64_t Nloop=NLOOP; | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	z=a*x-y; | ||||
|         x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away | ||||
|         y._odata[4]=z._odata[4]; | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|       | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3)  | ||||
| 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000. | ||||
| 	       << "\t\t"<< bytes/time/NN <<std::endl; | ||||
|  | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   static double DWF5(int Ls,int L) | ||||
|   { | ||||
|     RealD mass=0.1; | ||||
|     RealD M5  =1.8; | ||||
|  | ||||
|     double mflops; | ||||
|     double mflops_best = 0; | ||||
|     double mflops_worst= 0; | ||||
|     std::vector<double> mflops_all; | ||||
|  | ||||
|     /////////////////////////////////////////////////////// | ||||
|     // Set/Get the layout & grid size | ||||
|     /////////////////////////////////////////////////////// | ||||
|     int threads = GridThread::GetThreads(); | ||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); | ||||
|     std::vector<int> local({L,L,L,L}); | ||||
|  | ||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  | ||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     uint64_t NP = TmpGrid->RankCount(); | ||||
|     uint64_t NN = TmpGrid->NodeCount(); | ||||
|     NN_global=NN; | ||||
|     uint64_t SHM=NP/NN; | ||||
|  | ||||
|     std::vector<int> internal; | ||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); | ||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); | ||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); | ||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); | ||||
|     else assert(0); | ||||
|  | ||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); | ||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); | ||||
|  | ||||
|     ///////// Welcome message //////////// | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|     ///////// Lattice Init //////////// | ||||
|     GridCartesian         * UGrid    = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     GridRedBlackCartesian * UrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|     GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); | ||||
|     GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); | ||||
|     GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); | ||||
|     GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|     ///////// RNG Init //////////// | ||||
|     std::vector<int> seeds4({1,2,3,4}); | ||||
|     std::vector<int> seeds5({5,6,7,8}); | ||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|     GridParallelRNG          RNG5(sFGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||
|  | ||||
|     ///////// Source preparation //////////// | ||||
|     LatticeFermion src   (sFGrid); random(RNG5,src); | ||||
|     LatticeFermion tmp   (sFGrid); | ||||
|  | ||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); | ||||
|     src = src*N2; | ||||
|      | ||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  | ||||
|  | ||||
|     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); | ||||
|     LatticeFermion src_e (sFrbGrid); | ||||
|     LatticeFermion src_o (sFrbGrid); | ||||
|     LatticeFermion r_e   (sFrbGrid); | ||||
|     LatticeFermion r_o   (sFrbGrid); | ||||
|     LatticeFermion r_eo  (sFGrid); | ||||
|     LatticeFermion err   (sFGrid); | ||||
|     { | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd,src_o,src); | ||||
|  | ||||
| #if defined(AVX512)  | ||||
|       const int num_cases = 6; | ||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); | ||||
| #else | ||||
|       const int num_cases = 4; | ||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); | ||||
| #endif | ||||
|       controls Cases [] = { | ||||
| #ifdef AVX512 | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| #endif | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } | ||||
|       };  | ||||
|  | ||||
|       for(int c=0;c<num_cases;c++) { | ||||
|  | ||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; | ||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; | ||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); | ||||
|  | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
| 	int nwarm = 100; | ||||
| 	uint64_t ncall = 1000; | ||||
|  | ||||
| 	double t0=usecond(); | ||||
| 	sFGrid->Barrier(); | ||||
| 	for(int i=0;i<nwarm;i++){ | ||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	} | ||||
| 	sFGrid->Barrier(); | ||||
| 	double t1=usecond(); | ||||
|  | ||||
| 	sDw.ZeroCounters(); | ||||
| 	time_statistics timestat; | ||||
| 	std::vector<double> t_time(ncall); | ||||
| 	for(uint64_t i=0;i<ncall;i++){ | ||||
| 	  t0=usecond(); | ||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	  t1=usecond(); | ||||
| 	  t_time[i] = t1-t0; | ||||
| 	} | ||||
| 	sFGrid->Barrier(); | ||||
| 	 | ||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
| 	double flops=(1344.0*volume)/2; | ||||
| 	double mf_hi, mf_lo, mf_err; | ||||
|  | ||||
| 	timestat.statistics(t_time); | ||||
| 	mf_hi = flops/timestat.min; | ||||
| 	mf_lo = flops/timestat.max; | ||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; | ||||
|  | ||||
| 	mflops = flops/timestat.mean; | ||||
| 	mflops_all.push_back(mflops); | ||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; | ||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; | ||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; | ||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; | ||||
|  | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank   "<< mflops/NP<<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node   "<< mflops/NN<<std::endl; | ||||
|  | ||||
| 	sDw.Report(); | ||||
|  | ||||
|       } | ||||
|       double robust = mflops_worst/mflops_best;; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust <<std::endl; | ||||
|       std::cout<<GridLogMessage <<fmt << std::endl; | ||||
|       std::cout<<GridLogMessage; | ||||
|  | ||||
|       for(int i=0;i<mflops_all.size();i++){ | ||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||
|       } | ||||
|       std::cout<<std::endl; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|     } | ||||
|     return mflops_best; | ||||
|   } | ||||
|  | ||||
|   static double DWF(int Ls,int L, double & robust) | ||||
|   { | ||||
|     RealD mass=0.1; | ||||
|     RealD M5  =1.8; | ||||
|  | ||||
|     double mflops; | ||||
|     double mflops_best = 0; | ||||
|     double mflops_worst= 0; | ||||
|     std::vector<double> mflops_all; | ||||
|  | ||||
|     /////////////////////////////////////////////////////// | ||||
|     // Set/Get the layout & grid size | ||||
|     /////////////////////////////////////////////////////// | ||||
|     int threads = GridThread::GetThreads(); | ||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); | ||||
|     std::vector<int> local({L,L,L,L}); | ||||
|  | ||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  | ||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     uint64_t NP = TmpGrid->RankCount(); | ||||
|     uint64_t NN = TmpGrid->NodeCount(); | ||||
|     NN_global=NN; | ||||
|     uint64_t SHM=NP/NN; | ||||
|  | ||||
|     std::vector<int> internal; | ||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); | ||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); | ||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); | ||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); | ||||
|     else assert(0); | ||||
|  | ||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); | ||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); | ||||
|  | ||||
|     ///////// Welcome message //////////// | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|  | ||||
|     ///////// Lattice Init //////////// | ||||
|     GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|      | ||||
|     ///////// RNG Init //////////// | ||||
|     std::vector<int> seeds4({1,2,3,4}); | ||||
|     std::vector<int> seeds5({5,6,7,8}); | ||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||
|  | ||||
|     ///////// Source preparation //////////// | ||||
|     LatticeFermion src   (FGrid); random(RNG5,src); | ||||
|     LatticeFermion ref   (FGrid); | ||||
|     LatticeFermion tmp   (FGrid); | ||||
|  | ||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); | ||||
|     src = src*N2; | ||||
|      | ||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  | ||||
|  | ||||
|     DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|  | ||||
|     //////////////////////////////////// | ||||
|     // Naive wilson implementation | ||||
|     //////////////////////////////////// | ||||
|     { | ||||
|       LatticeGaugeField Umu5d(FGrid);  | ||||
|       std::vector<LatticeColourMatrix> U(4,FGrid); | ||||
|       for(int ss=0;ss<Umu._grid->oSites();ss++){ | ||||
| 	for(int s=0;s<Ls;s++){ | ||||
| 	  Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; | ||||
| 	} | ||||
|       } | ||||
|       ref = zero; | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); | ||||
|       } | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	 | ||||
| 	tmp = U[mu]*Cshift(src,mu+1,1); | ||||
| 	ref=ref + tmp - Gamma(Gmu[mu])*tmp; | ||||
| 	 | ||||
| 	tmp =adj(U[mu])*src; | ||||
| 	tmp =Cshift(tmp,mu+1,-1); | ||||
| 	ref=ref + tmp + Gamma(Gmu[mu])*tmp; | ||||
|       } | ||||
|       ref = -0.5*ref; | ||||
|     } | ||||
|  | ||||
|     LatticeFermion src_e (FrbGrid); | ||||
|     LatticeFermion src_o (FrbGrid); | ||||
|     LatticeFermion r_e   (FrbGrid); | ||||
|     LatticeFermion r_o   (FrbGrid); | ||||
|     LatticeFermion r_eo  (FGrid); | ||||
|     LatticeFermion err   (FGrid); | ||||
|     { | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd,src_o,src); | ||||
|  | ||||
| #if defined(AVX512)  | ||||
|       const int num_cases = 6; | ||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); | ||||
| #else | ||||
|       const int num_cases = 4; | ||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); | ||||
| #endif | ||||
|       controls Cases [] = { | ||||
| #ifdef AVX512 | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| #endif | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } | ||||
|       };  | ||||
|  | ||||
|       for(int c=0;c<num_cases;c++) { | ||||
|  | ||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; | ||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; | ||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); | ||||
|  | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
| 	int nwarm = 200; | ||||
| 	double t0=usecond(); | ||||
| 	FGrid->Barrier(); | ||||
| 	for(int i=0;i<nwarm;i++){ | ||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	} | ||||
| 	FGrid->Barrier(); | ||||
| 	double t1=usecond(); | ||||
| 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); | ||||
| 	//	if (ncall < 500) ncall = 500; | ||||
| 	uint64_t ncall = 1000; | ||||
|  | ||||
| 	FGrid->Broadcast(0,&ncall,sizeof(ncall)); | ||||
|  | ||||
| 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; | ||||
| 	Dw.ZeroCounters(); | ||||
|  | ||||
| 	time_statistics timestat; | ||||
| 	std::vector<double> t_time(ncall); | ||||
| 	for(uint64_t i=0;i<ncall;i++){ | ||||
| 	  t0=usecond(); | ||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	  t1=usecond(); | ||||
| 	  t_time[i] = t1-t0; | ||||
| 	} | ||||
| 	FGrid->Barrier(); | ||||
| 	 | ||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
| 	double flops=(1344.0*volume)/2; | ||||
| 	double mf_hi, mf_lo, mf_err; | ||||
|  | ||||
| 	timestat.statistics(t_time); | ||||
| 	mf_hi = flops/timestat.min; | ||||
| 	mf_lo = flops/timestat.max; | ||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; | ||||
|  | ||||
| 	mflops = flops/timestat.mean; | ||||
| 	mflops_all.push_back(mflops); | ||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; | ||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; | ||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; | ||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; | ||||
|  | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl; | ||||
|  | ||||
| 	Dw.Report(); | ||||
|  | ||||
| 	Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	Dw.DhopOE(src_e,r_o,DaggerNo); | ||||
| 	setCheckerboard(r_eo,r_o); | ||||
| 	setCheckerboard(r_eo,r_e); | ||||
| 	err = r_eo-ref;  | ||||
| 	std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||
| 	assert((norm2(err)<1.0e-4)); | ||||
|  | ||||
|       } | ||||
|       robust = mflops_worst/mflops_best; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||
|       std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust  <<std::endl; | ||||
|       std::cout<<GridLogMessage <<fmt << std::endl; | ||||
|       std::cout<<GridLogMessage ; | ||||
|  | ||||
|       for(int i=0;i<mflops_all.size();i++){ | ||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||
|       } | ||||
|       std::cout<<std::endl; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|     } | ||||
|     return mflops_best; | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); | ||||
| #ifdef KNL | ||||
|   LebesgueOrder::Block = std::vector<int>({8,2,2,2}); | ||||
| #else | ||||
|   LebesgueOrder::Block = std::vector<int>({2,2,2,2}); | ||||
| #endif | ||||
|   Benchmark::Decomposition(); | ||||
|  | ||||
|   int do_memory=1; | ||||
|   int do_comms =1; | ||||
|   int do_su3   =0; | ||||
|   int do_wilson=1; | ||||
|   int do_dwf   =1; | ||||
|  | ||||
|   if ( do_su3 ) { | ||||
|     // empty for now | ||||
|   } | ||||
|  | ||||
|   int sel=2; | ||||
|   std::vector<int> L_list({8,12,16,24}); | ||||
|  | ||||
|   //int sel=1; | ||||
|   //  std::vector<int> L_list({8,12}); | ||||
|   std::vector<double> robust_list; | ||||
|  | ||||
|   std::vector<double> wilson; | ||||
|   std::vector<double> dwf4; | ||||
|   std::vector<double> dwf5; | ||||
|  | ||||
|   if ( do_wilson ) { | ||||
|     int Ls=1; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     for(int l=0;l<L_list.size();l++){ | ||||
|       double robust; | ||||
|       wilson.push_back(Benchmark::DWF(1,L_list[l],robust)); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int Ls=16; | ||||
|   if ( do_dwf ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     for(int l=0;l<L_list.size();l++){ | ||||
|       double robust; | ||||
|       double result = Benchmark::DWF(Ls,L_list[l],robust) ; | ||||
|       dwf4.push_back(result); | ||||
|       robust_list.push_back(robust); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if ( do_dwf ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     for(int l=0;l<L_list.size();l++){ | ||||
|       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
|   if ( do_dwf ) { | ||||
|  | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl; | ||||
|   for(int l=0;l<L_list.size();l++){ | ||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; | ||||
|   } | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   } | ||||
|  | ||||
|   int NN=NN_global; | ||||
|   if ( do_memory ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     Benchmark::Memory(); | ||||
|   } | ||||
|  | ||||
|   if ( do_comms && (NN>1) ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     Benchmark::Comms(); | ||||
|   } | ||||
|  | ||||
|   if ( do_dwf ) { | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; | ||||
|   for(int l=0;l<L_list.size();l++){ | ||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; | ||||
|   } | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " Comparison point     result: "  << dwf4[sel]/NN << " Mflop/s per node"<<std::endl; | ||||
|   std::cout<<std::setprecision(3); | ||||
|   std::cout<<GridLogMessage << " Comparison point robustness: "  << robust_list[sel] <<std::endl; | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|   } | ||||
|  | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
| @@ -31,6 +31,32 @@ using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| struct time_statistics{ | ||||
|   double mean; | ||||
|   double err; | ||||
|   double min; | ||||
|   double max; | ||||
|  | ||||
|   void statistics(std::vector<double> v){ | ||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||
|       mean = sum / v.size(); | ||||
|  | ||||
|       std::vector<double> diff(v.size()); | ||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||
|  | ||||
|       auto result = std::minmax_element(v.begin(), v.end()); | ||||
|       min = *result.first; | ||||
|       max = *result.second; | ||||
| } | ||||
| }; | ||||
|  | ||||
| void header(){ | ||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
| @@ -40,17 +66,21 @@ int main (int argc, char ** argv) | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   int Nloop=10; | ||||
|   int Nloop=100; | ||||
|   int nmu=0; | ||||
|   int maxlat=32; | ||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||
|  | ||||
|   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; | ||||
|   std::vector<double> t_time(Nloop); | ||||
|   time_statistics timestat; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   int maxlat=24; | ||||
|   header(); | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
| @@ -58,15 +88,23 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);	 | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|       for(int mu=0;mu<8;mu++){ | ||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; | ||||
|       } | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| @@ -79,7 +117,6 @@ int main (int argc, char ** argv) | ||||
| 	    int comm_proc=1; | ||||
| 	    int xmit_to_rank; | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.SendToRecvFromBegin(requests, | ||||
| 				   (void *)&xbuf[mu][0], | ||||
| @@ -102,18 +139,24 @@ int main (int argc, char ** argv) | ||||
| 	} | ||||
| 	Grid.SendToRecvFromComplete(requests); | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       double dbytes    = bytes*ppn; | ||||
|       double xbytes    = dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|     } | ||||
|   }     | ||||
|  | ||||
| @@ -121,25 +164,32 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|  | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8); | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); | ||||
|  | ||||
|       for(int mu=0;mu<8;mu++){ | ||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       double start=usecond(); | ||||
|      | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| @@ -178,30 +228,37 @@ int main (int argc, char ** argv) | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|  | ||||
|       } | ||||
|  | ||||
|       double stop=usecond(); | ||||
|       timestat.statistics(t_time); | ||||
|        | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double dbytes    = bytes*ppn; | ||||
|       double xbytes    = dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|  | ||||
|       double time = stop-start; | ||||
|     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|        | ||||
|     } | ||||
|   }   | ||||
|  | ||||
|  | ||||
|   Nloop=10; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
| @@ -209,6 +266,9 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| @@ -216,73 +276,86 @@ int main (int argc, char ** argv) | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	 | ||||
|  | ||||
| 	  if (mpi_layout[mu]>1 ) { | ||||
| 	   | ||||
| 	    ncomm++; | ||||
| 	    int comm_proc=1; | ||||
| 	    int xmit_to_rank; | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu); | ||||
| 	 | ||||
| 	    comm_proc = mpi_layout[mu]-1; | ||||
| 	   | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu+4][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu+4][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu+4][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu+4][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu+4); | ||||
| 	   | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	Grid.StencilSendToRecvFromComplete(requests,0); | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
| 	 | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|  | ||||
|  | ||||
|   Nloop=100; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
| @@ -290,6 +363,9 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| @@ -297,16 +373,18 @@ int main (int argc, char ** argv) | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	 | ||||
| @@ -318,44 +396,146 @@ int main (int argc, char ** argv) | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu); | ||||
| 	    requests.resize(0); | ||||
|  | ||||
| 	    comm_proc = mpi_layout[mu]-1; | ||||
| 	   | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu+4][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu+4][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu+4][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu+4][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu+4); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu+4); | ||||
| 	    requests.resize(0); | ||||
| 	   | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
| 	 | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|   | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|  | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
|       				    lat*mpi_layout[2], | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
|       Grid.ShmBufferFreeAll(); | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
|  | ||||
| 	parallel_for(int dir=0;dir<8;dir++){ | ||||
|  | ||||
| 	  double tbytes; | ||||
| 	  int mu =dir % 4; | ||||
|  | ||||
| 	  if (mpi_layout[mu]>1 ) { | ||||
| 	   | ||||
| 	    ncomm++; | ||||
| 	    int xmit_to_rank; | ||||
| 	    int recv_from_rank; | ||||
| 	    if ( dir == mu ) {  | ||||
| 	      int comm_proc=1; | ||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    } else {  | ||||
| 	      int comm_proc = mpi_layout[mu]-1; | ||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    } | ||||
|  | ||||
| 	    tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | ||||
| 					       (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); | ||||
|  | ||||
| #pragma omp atomic | ||||
| 	    dbytes+=tbytes; | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|       } | ||||
|  | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|   | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|   | ||||
| @@ -165,7 +165,7 @@ int main (int argc, char ** argv) | ||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||
|  | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   int ncall =1000; | ||||
|   int ncall =500; | ||||
|   if (1) { | ||||
|     FGrid->Barrier(); | ||||
|     Dw.ZeroCounters(); | ||||
| @@ -302,6 +302,7 @@ int main (int argc, char ** argv) | ||||
|       std::cout<< "sD ERR   \n " << err  <<std::endl; | ||||
|     } | ||||
|     assert(sum < 1.0e-4); | ||||
|  | ||||
|      | ||||
|     if(1){ | ||||
|       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||
| @@ -381,8 +382,23 @@ int main (int argc, char ** argv) | ||||
|       } | ||||
|       assert(error<1.0e-4); | ||||
|     } | ||||
|  | ||||
|   if(0){ | ||||
|     std::cout << "Single cache warm call to sDw.Dhop " <<std::endl; | ||||
|     for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ | ||||
|       sDw.Dhop(ssrc,sresult,0); | ||||
|       PerformanceCounter Counter(i); | ||||
|       Counter.Start(); | ||||
|       sDw.Dhop(ssrc,sresult,0); | ||||
|       Counter.Stop(); | ||||
|       Counter.Report(); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   } | ||||
|  | ||||
|  | ||||
|  | ||||
|   if (1) | ||||
|   { // Naive wilson dag implementation | ||||
|     ref = zero; | ||||
| @@ -487,9 +503,9 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; | ||||
|   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; | ||||
|  | ||||
|   //assert(norm2(src_e)<1.0e-4); | ||||
|   //assert(norm2(src_o)<1.0e-4); | ||||
|  | ||||
|   assert(norm2(src_e)<1.0e-4); | ||||
|   assert(norm2(src_o)<1.0e-4); | ||||
|   Grid_finalize(); | ||||
|   exit(0); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -55,21 +55,21 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|   uint64_t lmax=44; | ||||
| #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   uint64_t lmax=96; | ||||
| #define NLOOP (10*lmax*lmax*lmax*lmax/vol) | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       uint64_t Nloop=NLOOP; | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       double a=2.0; | ||||
|  | ||||
|  | ||||
| @@ -83,7 +83,7 @@ int main (int argc, char ** argv) | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|        | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       double bytes=3*vol*Nvec*sizeof(Real); | ||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||
|  | ||||
|     } | ||||
| @@ -94,17 +94,17 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|    | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       double a=2.0; | ||||
|  | ||||
|       uint64_t Nloop=NLOOP; | ||||
| @@ -119,7 +119,7 @@ int main (int argc, char ** argv) | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|       | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       double bytes=3*vol*Nvec*sizeof(Real); | ||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||
|  | ||||
|     } | ||||
| @@ -129,20 +129,20 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|  | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       uint64_t Nloop=NLOOP; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       RealD a=2.0; | ||||
|  | ||||
|  | ||||
| @@ -154,7 +154,7 @@ int main (int argc, char ** argv) | ||||
|       double stop=usecond(); | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|        | ||||
|       double bytes=2*vol*Nvec*sizeof(Real); | ||||
|       double bytes=2.0*vol*Nvec*sizeof(Real); | ||||
|       double flops=vol*Nvec*1;// mul | ||||
|       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||
|  | ||||
| @@ -166,17 +166,17 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       uint64_t Nloop=NLOOP; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       RealD a=2.0; | ||||
|       Real nn;       | ||||
|       double start=usecond(); | ||||
| @@ -187,7 +187,7 @@ int main (int argc, char ** argv) | ||||
|       double stop=usecond(); | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|        | ||||
|       double bytes=vol*Nvec*sizeof(Real); | ||||
|       double bytes=1.0*vol*Nvec*sizeof(Real); | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; | ||||
|  | ||||
|   | ||||
| @@ -35,14 +35,14 @@ using namespace Grid::QCD; | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
| #define LMAX (32) | ||||
| #define LMAX (64) | ||||
|  | ||||
|   int Nloop=200; | ||||
|   int64_t Nloop=20; | ||||
|  | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|  | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   int64_t threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
| @@ -54,16 +54,16 @@ int main (int argc, char ** argv) | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid);// random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid);// random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid);// random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	x=x*y; | ||||
|       } | ||||
|       double stop=usecond(); | ||||
| @@ -86,17 +86,17 @@ int main (int argc, char ** argv) | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	z=x*y; | ||||
|       } | ||||
|       double stop=usecond(); | ||||
| @@ -117,17 +117,17 @@ int main (int argc, char ** argv) | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	mult(z,x,y); | ||||
|       } | ||||
|       double stop=usecond(); | ||||
| @@ -148,17 +148,17 @@ int main (int argc, char ** argv) | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	mac(z,x,y); | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| ]#!/usr/bin/env bash | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | ||||
|  | ||||
|   | ||||
							
								
								
									
										69
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										69
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -13,6 +13,10 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||
| ################ Get git info | ||||
| #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) | ||||
|  | ||||
| ################ Set flags | ||||
| # do not move! | ||||
| CXXFLAGS="-O3 $CXXFLAGS" | ||||
|  | ||||
| ############### Checks for programs | ||||
| AC_PROG_CXX | ||||
| AC_PROG_RANLIB | ||||
| @@ -27,7 +31,6 @@ AX_GXX_VERSION | ||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||
|       [version of g++ that will compile the code]) | ||||
|  | ||||
| CXXFLAGS="-O3 $CXXFLAGS" | ||||
|  | ||||
|  | ||||
| ############### Checks for typedefs, structures, and compiler characteristics | ||||
| @@ -51,9 +54,14 @@ AC_CHECK_HEADERS(malloc/malloc.h) | ||||
| AC_CHECK_HEADERS(malloc.h) | ||||
| AC_CHECK_HEADERS(endian.h) | ||||
| AC_CHECK_HEADERS(execinfo.h) | ||||
| AC_CHECK_HEADERS(numaif.h) | ||||
| AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | ||||
| AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | ||||
|  | ||||
| ############## Standard libraries | ||||
| AC_CHECK_LIB([m],[cos]) | ||||
| AC_CHECK_LIB([stdc++],[abort]) | ||||
|  | ||||
| ############### GMP and MPFR | ||||
| AC_ARG_WITH([gmp], | ||||
|     [AS_HELP_STRING([--with-gmp=prefix], | ||||
| @@ -184,6 +192,15 @@ AC_SEARCH_LIBS([limeCreateReader], [lime], | ||||
| In order to use ILGG file format please install or provide the correct path to your installation | ||||
| Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([crc32], [z], | ||||
|                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] | ||||
|                [have_zlib=true] [LIBS="${LIBS} -lz"], | ||||
| 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([move_pages], [numa], | ||||
|                [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] | ||||
|                [have_libnuma=true] [LIBS="${LIBS} -lnuma"], | ||||
| 	       [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||
| @@ -237,6 +254,7 @@ case ${ax_cv_cxx_compiler_vendor} in | ||||
|         SIMD_FLAGS='';; | ||||
|       KNL) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) | ||||
|         SIMD_FLAGS='-march=knl';; | ||||
|       GEN) | ||||
|         AC_DEFINE([GEN],[1],[generic vector code]) | ||||
| @@ -244,6 +262,9 @@ case ${ax_cv_cxx_compiler_vendor} in | ||||
|                            [generic SIMD vector width (in bytes)]) | ||||
|         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" | ||||
|         SIMD_FLAGS='';; | ||||
|       NEONv8) | ||||
|         AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) | ||||
|         SIMD_FLAGS='-march=armv8-a';; | ||||
|       QPX|BGQ) | ||||
|         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) | ||||
|         SIMD_FLAGS='';; | ||||
| @@ -272,6 +293,7 @@ case ${ax_cv_cxx_compiler_vendor} in | ||||
|         SIMD_FLAGS='';; | ||||
|       KNL) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) | ||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) | ||||
|         SIMD_FLAGS='-xmic-avx512';; | ||||
|       GEN) | ||||
|         AC_DEFINE([GEN],[1],[generic vector code]) | ||||
| @@ -309,8 +331,41 @@ case ${ac_PRECISION} in | ||||
|      double) | ||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) | ||||
|      ;; | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]); | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| ######################  Shared memory allocation technique under MPI3 | ||||
| AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], | ||||
|               [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) | ||||
|  | ||||
| case ${ac_SHM} in | ||||
|  | ||||
|      shmget) | ||||
|      AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) | ||||
|      ;; | ||||
|  | ||||
|      shmopen) | ||||
|      AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) | ||||
|      ;; | ||||
|  | ||||
|      hugetlbfs) | ||||
|      AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) | ||||
|      ;; | ||||
|  | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]); | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| ######################  Shared base path for SHMMMAP | ||||
| AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], | ||||
|               [Select SHM mmap base path for hugetlbfs])], | ||||
| 	      [ac_SHMPATH=${enable_shmpath}], | ||||
| 	      [ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/]) | ||||
| AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) | ||||
|  | ||||
| ############### communication type selection | ||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | ||||
|               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||
| @@ -320,14 +375,14 @@ case ${ac_COMMS} in | ||||
|         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) | ||||
|         comms_type='none' | ||||
|      ;; | ||||
|      mpi3l*) | ||||
|        AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] ) | ||||
|        comms_type='mpi3l' | ||||
|      ;; | ||||
|      mpi3*) | ||||
|         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) | ||||
|         comms_type='mpi3' | ||||
|      ;; | ||||
|      mpit) | ||||
|         AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) | ||||
|         comms_type='mpit' | ||||
|      ;; | ||||
|      mpi*) | ||||
|         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||
|         comms_type='mpi' | ||||
| @@ -355,7 +410,7 @@ esac | ||||
| AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] ) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPIT,  [ test "${comms_type}X" == "mpitX" ] ) | ||||
| AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | ||||
|  | ||||
| ############### RNG selection | ||||
| @@ -460,6 +515,8 @@ compiler version            : ${ax_cv_gxx_version} | ||||
| SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | ||||
| Threading                   : ${ac_openmp} | ||||
| Communications type         : ${comms_type} | ||||
| Shared memory allocator     : ${ac_SHM} | ||||
| Shared memory mmap path     : ${ac_SHMPATH} | ||||
| Default precision           : ${ac_PRECISION} | ||||
| Software FP16 conversion    : ${ac_SFW_FP16} | ||||
| RNG choice                  : ${ac_RNG} | ||||
|   | ||||
| @@ -41,9 +41,10 @@ using namespace Hadrons; | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| Environment::Environment(void) | ||||
| { | ||||
|     nd_ = GridDefaultLatt().size(); | ||||
|     dim_ = GridDefaultLatt(); | ||||
|     nd_  = dim_.size(); | ||||
|     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( | ||||
|         GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||
|         dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||
|         GridDefaultMpi())); | ||||
|     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); | ||||
|     auto loc = getGrid()->LocalDimensions(); | ||||
| @@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const | ||||
|     return nd_; | ||||
| } | ||||
|  | ||||
| std::vector<int> Environment::getDim(void) const | ||||
| { | ||||
|     return dim_; | ||||
| } | ||||
|  | ||||
| int Environment::getDim(const unsigned int mu) const | ||||
| { | ||||
|     return dim_[mu]; | ||||
| } | ||||
|  | ||||
| // random number generator ///////////////////////////////////////////////////// | ||||
| void Environment::setSeed(const std::vector<int> &seed) | ||||
| { | ||||
| @@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const | ||||
|     return getModuleType(getModuleAddress(name)); | ||||
| } | ||||
|  | ||||
| std::string Environment::getModuleNamespace(const unsigned int address) const | ||||
| { | ||||
|     std::string type = getModuleType(address), ns; | ||||
|      | ||||
|     auto pos2 = type.rfind("::"); | ||||
|     auto pos1 = type.rfind("::", pos2 - 2); | ||||
|      | ||||
|     return type.substr(pos1 + 2, pos2 - pos1 - 2); | ||||
| } | ||||
|  | ||||
| std::string Environment::getModuleNamespace(const std::string name) const | ||||
| { | ||||
|     return getModuleNamespace(getModuleAddress(name)); | ||||
| } | ||||
|  | ||||
| bool Environment::hasModule(const unsigned int address) const | ||||
| { | ||||
|     return (address < module_.size()); | ||||
| @@ -492,7 +518,14 @@ std::string Environment::getObjectType(const unsigned int address) const | ||||
| { | ||||
|     if (hasRegisteredObject(address)) | ||||
|     { | ||||
|         return typeName(object_[address].type); | ||||
|         if (object_[address].type) | ||||
|         { | ||||
|             return typeName(object_[address].type); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return "<no type>"; | ||||
|         } | ||||
|     } | ||||
|     else if (hasObject(address)) | ||||
|     { | ||||
| @@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const | ||||
|     return getObjectSize(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| unsigned int Environment::getObjectModule(const unsigned int address) const | ||||
| { | ||||
|     if (hasObject(address)) | ||||
|     { | ||||
|         return object_[address].module; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| unsigned int Environment::getObjectModule(const std::string name) const | ||||
| { | ||||
|     return getObjectModule(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| unsigned int Environment::getObjectLs(const unsigned int address) const | ||||
| { | ||||
|     if (hasRegisteredObject(address)) | ||||
|   | ||||
| @@ -106,6 +106,8 @@ public: | ||||
|     void                    createGrid(const unsigned int Ls); | ||||
|     GridCartesian *         getGrid(const unsigned int Ls = 1) const; | ||||
|     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; | ||||
|     std::vector<int>        getDim(void) const; | ||||
|     int                     getDim(const unsigned int mu) const; | ||||
|     unsigned int            getNd(void) const; | ||||
|     // random number generator | ||||
|     void                    setSeed(const std::vector<int> &seed); | ||||
| @@ -131,6 +133,8 @@ public: | ||||
|     std::string             getModuleName(const unsigned int address) const; | ||||
|     std::string             getModuleType(const unsigned int address) const; | ||||
|     std::string             getModuleType(const std::string name) const; | ||||
|     std::string             getModuleNamespace(const unsigned int address) const; | ||||
|     std::string             getModuleNamespace(const std::string name) const; | ||||
|     bool                    hasModule(const unsigned int address) const; | ||||
|     bool                    hasModule(const std::string name) const; | ||||
|     Graph<unsigned int>     makeModuleGraph(void) const; | ||||
| @@ -171,6 +175,8 @@ public: | ||||
|     std::string             getObjectType(const std::string name) const; | ||||
|     Size                    getObjectSize(const unsigned int address) const; | ||||
|     Size                    getObjectSize(const std::string name) const; | ||||
|     unsigned int            getObjectModule(const unsigned int address) const; | ||||
|     unsigned int            getObjectModule(const std::string name) const; | ||||
|     unsigned int            getObjectLs(const unsigned int address) const; | ||||
|     unsigned int            getObjectLs(const std::string name) const; | ||||
|     bool                    hasObject(const unsigned int address) const; | ||||
| @@ -181,6 +187,10 @@ public: | ||||
|     bool                    hasCreatedObject(const std::string name) const; | ||||
|     bool                    isObject5d(const unsigned int address) const; | ||||
|     bool                    isObject5d(const std::string name) const; | ||||
|     template <typename T> | ||||
|     bool                    isObjectOfType(const unsigned int address) const; | ||||
|     template <typename T> | ||||
|     bool                    isObjectOfType(const std::string name) const; | ||||
|     Environment::Size       getTotalSize(void) const; | ||||
|     void                    addOwnership(const unsigned int owner, | ||||
|                                          const unsigned int property); | ||||
| @@ -197,6 +207,7 @@ private: | ||||
|     bool                                   dryRun_{false}; | ||||
|     unsigned int                           traj_, locVol_; | ||||
|     // grids | ||||
|     std::vector<int>                       dim_; | ||||
|     GridPt                                 grid4d_; | ||||
|     std::map<unsigned int, GridPt>         grid5d_; | ||||
|     GridRbPt                               gridRb4d_; | ||||
| @@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const | ||||
|         else | ||||
|         { | ||||
|             HADRON_ERROR("object with address " + std::to_string(address) + | ||||
|                          " does not have type '" + typeid(T).name() + | ||||
|                          " does not have type '" + typeName(&typeid(T)) + | ||||
|                          "' (has type '" + getObjectType(address) + "')"); | ||||
|         } | ||||
|     } | ||||
| @@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name) | ||||
|     return createLattice<T>(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| bool Environment::isObjectOfType(const unsigned int address) const | ||||
| { | ||||
|     if (hasRegisteredObject(address)) | ||||
|     { | ||||
|         if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get())) | ||||
|         { | ||||
|             return true; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return false; | ||||
|         } | ||||
|     } | ||||
|     else if (hasObject(address)) | ||||
|     { | ||||
|         HADRON_ERROR("object with address " + std::to_string(address) + | ||||
|                      " exists but is not registered"); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| bool Environment::isObjectOfType(const std::string name) const | ||||
| { | ||||
|     return isObjectOfType<T>(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Environment_hpp_ | ||||
|   | ||||
| @@ -51,23 +51,43 @@ using Grid::operator<<; | ||||
|  * error with GCC 5 (clang & GCC 6 compile fine without it). | ||||
|  */ | ||||
|  | ||||
| // FIXME: find a way to do that in a more general fashion | ||||
| #ifndef FIMPL | ||||
| #define FIMPL WilsonImplR | ||||
| #endif | ||||
| #ifndef SIMPL | ||||
| #define SIMPL ScalarImplCR | ||||
| #endif | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| // type aliases | ||||
| #define TYPE_ALIASES(FImpl, suffix)\ | ||||
| #define FERM_TYPE_ALIASES(FImpl, suffix)\ | ||||
| typedef FermionOperator<FImpl>                       FMat##suffix;             \ | ||||
| typedef typename FImpl::FermionField                 FermionField##suffix;     \ | ||||
| typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | ||||
| typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | ||||
| typedef typename FImpl::DoubledGaugeField            DoubledGaugeField##suffix;\ | ||||
| typedef std::function<void(FermionField##suffix &,                             \ | ||||
| typedef std::vector<typename FImpl::SitePropagator::scalar_object>             \ | ||||
|                                                      SlicedPropagator##suffix; | ||||
|  | ||||
| #define GAUGE_TYPE_ALIASES(FImpl, suffix)\ | ||||
| typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; | ||||
|  | ||||
| #define SCALAR_TYPE_ALIASES(SImpl, suffix)\ | ||||
| typedef typename SImpl::Field ScalarField##suffix;\ | ||||
| typedef typename SImpl::Field PropagatorField##suffix; | ||||
|  | ||||
| #define SOLVER_TYPE_ALIASES(FImpl, suffix)\ | ||||
| typedef std::function<void(FermionField##suffix &,\ | ||||
|                       const FermionField##suffix &)> SolverFn##suffix; | ||||
|  | ||||
| #define SINK_TYPE_ALIASES(suffix)\ | ||||
| typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix; | ||||
|  | ||||
| #define FGS_TYPE_ALIASES(FImpl, suffix)\ | ||||
| FERM_TYPE_ALIASES(FImpl, suffix)\ | ||||
| GAUGE_TYPE_ALIASES(FImpl, suffix)\ | ||||
| SOLVER_TYPE_ALIASES(FImpl, suffix) | ||||
|  | ||||
| // logger | ||||
| class HadronsLogger: public Logger | ||||
| { | ||||
|   | ||||
| @@ -1,31 +1,3 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules.hpp | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | ||||
| #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | ||||
| @@ -36,13 +8,18 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> | ||||
| #include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | ||||
| #include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSink/Point.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/Point.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/Wall.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | ||||
| #include <Grid/Hadrons/Modules/Quark.hpp> | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_DWF_hpp_ | ||||
| #define Hadrons_DWF_hpp_ | ||||
| #ifndef Hadrons_MAction_DWF_hpp_ | ||||
| #define Hadrons_MAction_DWF_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -48,14 +48,15 @@ public: | ||||
|                                     std::string, gauge, | ||||
|                                     unsigned int, Ls, | ||||
|                                     double      , mass, | ||||
|                                     double      , M5); | ||||
|                                     double      , M5, | ||||
|                                     std::string , boundary); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TDWF: public Module<DWFPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TDWF(const std::string name); | ||||
| @@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void) | ||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||
|                  << std::endl; | ||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||
|                  << std::endl; | ||||
|     env().createGrid(par().Ls); | ||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||
|     auto &g4     = *env().getGrid(); | ||||
|     auto &grb4   = *env().getRbGrid(); | ||||
|     auto &g5     = *env().getGrid(par().Ls); | ||||
|     auto &grb5   = *env().getRbGrid(par().Ls); | ||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||
|     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); | ||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||
|                                                 par().mass, par().M5); | ||||
|                                                 par().mass, par().M5, | ||||
|                                                 implParams); | ||||
|     env().setObject(getName(), fMatPt); | ||||
| } | ||||
|  | ||||
| @@ -131,4 +137,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_DWF_hpp_ | ||||
| #endif // Hadrons_MAction_DWF_hpp_ | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Wilson_hpp_ | ||||
| #define Hadrons_Wilson_hpp_ | ||||
| #ifndef Hadrons_MAction_Wilson_hpp_ | ||||
| #define Hadrons_MAction_Wilson_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -46,14 +46,15 @@ class WilsonPar: Serializable | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||
|                                     std::string, gauge, | ||||
|                                     double     , mass); | ||||
|                                     double     , mass, | ||||
|                                     std::string, boundary); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TWilson: public Module<WilsonPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TWilson(const std::string name); | ||||
| @@ -112,10 +113,15 @@ void TWilson<FImpl>::execute() | ||||
| { | ||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||
|                  << std::endl; | ||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||
|     auto &grid   = *env().getGrid(); | ||||
|     auto &gridRb = *env().getRbGrid(); | ||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); | ||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||
|     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); | ||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, | ||||
|                                             implParams); | ||||
|     env().setObject(getName(), fMatPt); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Baryon_hpp_ | ||||
| #define Hadrons_Baryon_hpp_ | ||||
| #ifndef Hadrons_MContraction_Baryon_hpp_ | ||||
| #define Hadrons_MContraction_Baryon_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| class TBaryon: public Module<BaryonPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl1, 1); | ||||
|     TYPE_ALIASES(FImpl2, 2); | ||||
|     TYPE_ALIASES(FImpl3, 3); | ||||
|     FERM_TYPE_ALIASES(FImpl1, 1); | ||||
|     FERM_TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(FImpl3, 3); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
| @@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | ||||
|      | ||||
|     // FIXME: do contractions | ||||
|      | ||||
|     write(writer, "meson", result); | ||||
|     // write(writer, "meson", result); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Baryon_hpp_ | ||||
| #endif // Hadrons_MContraction_Baryon_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_DiscLoop_hpp_ | ||||
| #define Hadrons_DiscLoop_hpp_ | ||||
| #ifndef Hadrons_MContraction_DiscLoop_hpp_ | ||||
| #define Hadrons_MContraction_DiscLoop_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -52,7 +52,7 @@ public: | ||||
| template <typename FImpl> | ||||
| class TDiscLoop: public Module<DiscLoopPar> | ||||
| { | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
| @@ -141,4 +141,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_DiscLoop_hpp_ | ||||
| #endif // Hadrons_MContraction_DiscLoop_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Gamma3pt_hpp_ | ||||
| #define Hadrons_Gamma3pt_hpp_ | ||||
| #ifndef Hadrons_MContraction_Gamma3pt_hpp_ | ||||
| #define Hadrons_MContraction_Gamma3pt_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -72,9 +72,9 @@ public: | ||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| class TGamma3pt: public Module<Gamma3ptPar> | ||||
| { | ||||
|     TYPE_ALIASES(FImpl1, 1); | ||||
|     TYPE_ALIASES(FImpl2, 2); | ||||
|     TYPE_ALIASES(FImpl3, 3); | ||||
|     FERM_TYPE_ALIASES(FImpl1, 1); | ||||
|     FERM_TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(FImpl3, 3); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
| @@ -167,4 +167,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Gamma3pt_hpp_ | ||||
| #endif // Hadrons_MContraction_Gamma3pt_hpp_ | ||||
|   | ||||
| @@ -29,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Meson_hpp_ | ||||
| #define Hadrons_Meson_hpp_ | ||||
| #ifndef Hadrons_MContraction_Meson_hpp_ | ||||
| #define Hadrons_MContraction_Meson_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -69,7 +69,7 @@ public: | ||||
|                                     std::string, q1, | ||||
|                                     std::string, q2, | ||||
|                                     std::string, gammas, | ||||
|                                     std::string, mom, | ||||
|                                     std::string, sink, | ||||
|                                     std::string, output); | ||||
| }; | ||||
|  | ||||
| @@ -77,8 +77,10 @@ template <typename FImpl1, typename FImpl2> | ||||
| class TMeson: public Module<MesonPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl1, 1); | ||||
|     TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(FImpl1, 1); | ||||
|     FERM_TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(ScalarImplCR, Scalar); | ||||
|     SINK_TYPE_ALIASES(Scalar); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
| @@ -115,7 +117,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name) | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> input = {par().q1, par().q2}; | ||||
|     std::vector<std::string> input = {par().q1, par().q2, par().sink}; | ||||
|      | ||||
|     return input; | ||||
| } | ||||
| @@ -131,12 +133,11 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) | ||||
| { | ||||
|     gammaList.clear(); | ||||
|     // Determine gamma matrices to insert at source/sink. | ||||
|     if (par().gammas.compare("all") == 0) | ||||
|     { | ||||
|         // Do all contractions. | ||||
|         unsigned int n_gam = Ns * Ns; | ||||
|         gammaList.resize(n_gam*n_gam); | ||||
|         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) | ||||
|         { | ||||
|             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) | ||||
| @@ -155,6 +156,9 @@ void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) | ||||
|  | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| #define mesonConnected(q1, q2, gSnk, gSrc) \ | ||||
| (g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) | ||||
|  | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| void TMeson<FImpl1, FImpl2>::execute(void) | ||||
| { | ||||
| @@ -162,43 +166,72 @@ void TMeson<FImpl1, FImpl2>::execute(void) | ||||
|                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" | ||||
|                  << std::endl; | ||||
|      | ||||
|     CorrWriter              writer(par().output); | ||||
|     PropagatorField1       &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||
|     PropagatorField2       &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||
|     LatticeComplex         c(env().getGrid()); | ||||
|     Gamma                  g5(Gamma::Algebra::Gamma5); | ||||
|     std::vector<GammaPair> gammaList; | ||||
|     CorrWriter             writer(par().output); | ||||
|     std::vector<TComplex>  buf; | ||||
|     std::vector<Result>    result; | ||||
|     std::vector<Real>      p; | ||||
|  | ||||
|     p  = strToVec<Real>(par().mom); | ||||
|     LatticeComplex         ph(env().getGrid()), coor(env().getGrid()); | ||||
|     Complex                i(0.0,1.0); | ||||
|     ph = zero; | ||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||
|     { | ||||
|         LatticeCoordinate(coor, mu); | ||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||
|     } | ||||
|     ph = exp((Real)(2*M_PI)*i*ph); | ||||
|     Gamma                  g5(Gamma::Algebra::Gamma5); | ||||
|     std::vector<GammaPair> gammaList; | ||||
|     int                    nt = env().getDim(Tp); | ||||
|      | ||||
|     parseGammaString(gammaList); | ||||
|  | ||||
|     result.resize(gammaList.size()); | ||||
|     for (unsigned int i = 0; i < result.size(); ++i) | ||||
|     { | ||||
|         Gamma gSnk(gammaList[i].first); | ||||
|         Gamma gSrc(gammaList[i].second); | ||||
|         c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph; | ||||
|         sliceSum(c, buf, Tp); | ||||
|  | ||||
|         result[i].gamma_snk = gammaList[i].first; | ||||
|         result[i].gamma_src = gammaList[i].second; | ||||
|         result[i].corr.resize(buf.size()); | ||||
|         for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|         result[i].corr.resize(nt); | ||||
|     } | ||||
|     if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and | ||||
|         env().template isObjectOfType<SlicedPropagator2>(par().q2)) | ||||
|     { | ||||
|         SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1); | ||||
|         SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2); | ||||
|          | ||||
|         LOG(Message) << "(propagator already sinked)" << std::endl; | ||||
|         for (unsigned int i = 0; i < result.size(); ++i) | ||||
|         { | ||||
|             result[i].corr[t] = TensorRemove(buf[t]); | ||||
|             Gamma gSnk(gammaList[i].first); | ||||
|             Gamma gSrc(gammaList[i].second); | ||||
|              | ||||
|             for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|             { | ||||
|                 result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         PropagatorField1 &q1   = *env().template getObject<PropagatorField1>(par().q1); | ||||
|         PropagatorField2 &q2   = *env().template getObject<PropagatorField2>(par().q2); | ||||
|         LatticeComplex   c(env().getGrid()); | ||||
|          | ||||
|         LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; | ||||
|         for (unsigned int i = 0; i < result.size(); ++i) | ||||
|         { | ||||
|             Gamma       gSnk(gammaList[i].first); | ||||
|             Gamma       gSrc(gammaList[i].second); | ||||
|             std::string ns; | ||||
|                  | ||||
|             ns = env().getModuleNamespace(env().getObjectModule(par().sink)); | ||||
|             if (ns == "MSource") | ||||
|             { | ||||
|                 PropagatorField1 &sink = | ||||
|                     *env().template getObject<PropagatorField1>(par().sink); | ||||
|                  | ||||
|                 c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); | ||||
|                 sliceSum(c, buf, Tp); | ||||
|             } | ||||
|             else if (ns == "MSink") | ||||
|             { | ||||
|                 SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink); | ||||
|                  | ||||
|                 c   = trace(mesonConnected(q1, q2, gSnk, gSrc)); | ||||
|                 buf = sink(c); | ||||
|             } | ||||
|             for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|             { | ||||
|                 result[i].corr[t] = TensorRemove(buf[t]); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     write(writer, "meson", result); | ||||
| @@ -208,4 +241,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Meson_hpp_ | ||||
| #endif // Hadrons_MContraction_Meson_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_WeakHamiltonian_hpp_ | ||||
| #define Hadrons_WeakHamiltonian_hpp_ | ||||
| #ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||
| #define Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -83,7 +83,7 @@ public: | ||||
| class T##modname: public Module<WeakHamiltonianPar>\ | ||||
| {\ | ||||
| public:\ | ||||
|     TYPE_ALIASES(FIMPL,)\ | ||||
|     FERM_TYPE_ALIASES(FIMPL,)\ | ||||
|     class Result: Serializable\ | ||||
|     {\ | ||||
|     public:\ | ||||
| @@ -111,4 +111,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_WeakHamiltonian_hpp_ | ||||
| #endif // Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_WeakHamiltonianEye_hpp_ | ||||
| #define Hadrons_WeakHamiltonianEye_hpp_ | ||||
| #ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||
| #define Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||
|  | ||||
| @@ -55,4 +55,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_WeakHamiltonianEye_hpp_ | ||||
| #endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_WeakHamiltonianNonEye_hpp_ | ||||
| #define Hadrons_WeakHamiltonianNonEye_hpp_ | ||||
| #ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||
| #define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||
|  | ||||
| @@ -54,4 +54,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_WeakHamiltonianNonEye_hpp_ | ||||
| #endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_WeakNeutral4ptDisc_hpp_ | ||||
| #define Hadrons_WeakNeutral4ptDisc_hpp_ | ||||
| #ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||
| #define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||
|  | ||||
| @@ -56,4 +56,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_WeakNeutral4ptDisc_hpp_ | ||||
| #endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||
|   | ||||
| @@ -1,34 +1,5 @@ | ||||
| /*************************************************************************************
 | ||||
| 
 | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
| 
 | ||||
| Source file: extras/Hadrons/Modules/Quark.hpp | ||||
| 
 | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
| 
 | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| 
 | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| 
 | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| 
 | ||||
| #ifndef Hadrons_Quark_hpp_ | ||||
| #define Hadrons_Quark_hpp_ | ||||
| #ifndef Hadrons_MFermion_GaugeProp_hpp_ | ||||
| #define Hadrons_MFermion_GaugeProp_hpp_ | ||||
| 
 | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -37,27 +8,29 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
| 
 | ||||
| /******************************************************************************
 | ||||
|  *                               TQuark                                       * | ||||
|  *                                GaugeProp                                   * | ||||
|  ******************************************************************************/ | ||||
| class QuarkPar: Serializable | ||||
| BEGIN_MODULE_NAMESPACE(MFermion) | ||||
| 
 | ||||
| class GaugePropPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, | ||||
|                                     std::string, source, | ||||
|                                     std::string, solver); | ||||
| }; | ||||
| 
 | ||||
| template <typename FImpl> | ||||
| class TQuark: public Module<QuarkPar> | ||||
| class TGaugeProp: public Module<GaugePropPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor
 | ||||
|     TQuark(const std::string name); | ||||
|     TGaugeProp(const std::string name); | ||||
|     // destructor
 | ||||
|     virtual ~TQuark(void) = default; | ||||
|     // dependencies/products
 | ||||
|     virtual ~TGaugeProp(void) = default; | ||||
|     // dependency relation
 | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup
 | ||||
| @@ -69,20 +42,20 @@ private: | ||||
|     SolverFn     *solver_{nullptr}; | ||||
| }; | ||||
| 
 | ||||
| MODULE_REGISTER(Quark, TQuark<FIMPL>); | ||||
| MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion); | ||||
| 
 | ||||
| /******************************************************************************
 | ||||
|  *                          TQuark implementation                             * | ||||
|  *                      TGaugeProp implementation                             * | ||||
|  ******************************************************************************/ | ||||
| // constructor /////////////////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| TQuark<FImpl>::TQuark(const std::string name) | ||||
| : Module(name) | ||||
| TGaugeProp<FImpl>::TGaugeProp(const std::string name) | ||||
| : Module<GaugePropPar>(name) | ||||
| {} | ||||
| 
 | ||||
| // dependencies/products ///////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TQuark<FImpl>::getInput(void) | ||||
| std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().source, par().solver}; | ||||
|      | ||||
| @@ -90,7 +63,7 @@ std::vector<std::string> TQuark<FImpl>::getInput(void) | ||||
| } | ||||
| 
 | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TQuark<FImpl>::getOutput(void) | ||||
| std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName(), getName() + "_5d"}; | ||||
|      | ||||
| @@ -99,7 +72,7 @@ std::vector<std::string> TQuark<FImpl>::getOutput(void) | ||||
| 
 | ||||
| // setup ///////////////////////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| void TQuark<FImpl>::setup(void) | ||||
| void TGaugeProp<FImpl>::setup(void) | ||||
| { | ||||
|     Ls_ = env().getObjectLs(par().solver); | ||||
|     env().template registerLattice<PropagatorField>(getName()); | ||||
| @@ -111,13 +84,13 @@ void TQuark<FImpl>::setup(void) | ||||
| 
 | ||||
| // execution ///////////////////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| void TQuark<FImpl>::execute(void) | ||||
| void TGaugeProp<FImpl>::execute(void) | ||||
| { | ||||
|     LOG(Message) << "Computing quark propagator '" << getName() << "'" | ||||
|                  << std::endl; | ||||
|     << std::endl; | ||||
|      | ||||
|     FermionField    source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), | ||||
|                     tmp(env().getGrid()); | ||||
|     tmp(env().getGrid()); | ||||
|     std::string     propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); | ||||
|     PropagatorField &prop    = *env().template createLattice<PropagatorField>(propName); | ||||
|     PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source); | ||||
| @@ -128,7 +101,7 @@ void TQuark<FImpl>::execute(void) | ||||
|     } | ||||
|      | ||||
|     LOG(Message) << "Inverting using solver '" << par().solver | ||||
|                  << "' on source '" << par().source << "'" << std::endl; | ||||
|     << "' on source '" << par().source << "'" << std::endl; | ||||
|     for (unsigned int s = 0; s < Ns; ++s) | ||||
|     for (unsigned int c = 0; c < Nc; ++c) | ||||
|     { | ||||
| @@ -170,7 +143,7 @@ void TQuark<FImpl>::execute(void) | ||||
|         if (Ls_ > 1) | ||||
|         { | ||||
|             PropagatorField &p4d = | ||||
|                 *env().template getObject<PropagatorField>(getName()); | ||||
|             *env().template getObject<PropagatorField>(getName()); | ||||
|              | ||||
|             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); | ||||
|             axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); | ||||
| @@ -180,6 +153,8 @@ void TQuark<FImpl>::execute(void) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| END_MODULE_NAMESPACE | ||||
| 
 | ||||
| END_HADRONS_NAMESPACE | ||||
| 
 | ||||
| #endif // Hadrons_Quark_hpp_
 | ||||
| #endif // Hadrons_MFermion_GaugeProp_hpp_
 | ||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TLoad::execute(void) | ||||
| { | ||||
|     NerscField  header; | ||||
|     FieldMetaData  header; | ||||
|     std::string fileName = par().file + "." | ||||
|                            + std::to_string(env().getTrajectory()); | ||||
|      | ||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | ||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||
|     NerscIO::readConfiguration(U, header, fileName); | ||||
|     LOG(Message) << "NERSC header:" << std::endl; | ||||
|     dump_nersc_header(header, LOG(Message)); | ||||
|     dump_meta_data(header, LOG(Message)); | ||||
| } | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Load_hpp_ | ||||
| #define Hadrons_Load_hpp_ | ||||
| #ifndef Hadrons_MGauge_Load_hpp_ | ||||
| #define Hadrons_MGauge_Load_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Load_hpp_ | ||||
| #endif // Hadrons_MGauge_Load_hpp_ | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Random_hpp_ | ||||
| #define Hadrons_Random_hpp_ | ||||
| #ifndef Hadrons_MGauge_Random_hpp_ | ||||
| #define Hadrons_MGauge_Random_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Random_hpp_ | ||||
| #endif // Hadrons_MGauge_Random_hpp_ | ||||
|   | ||||
							
								
								
									
										88
									
								
								extras/Hadrons/Modules/MGauge/StochEm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								extras/Hadrons/Modules/MGauge/StochEm.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.cc | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
|  | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MGauge; | ||||
|  | ||||
| /****************************************************************************** | ||||
| *                  TStochEm implementation                             * | ||||
| ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TStochEm::TStochEm(const std::string name) | ||||
| : Module<StochEmPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TStochEm::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TStochEm::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TStochEm::setup(void) | ||||
| { | ||||
|     if (!env().hasRegisteredObject("_" + getName() + "_weight")) | ||||
|     { | ||||
|         env().registerLattice<EmComp>("_" + getName() + "_weight"); | ||||
|     } | ||||
|     env().registerLattice<EmField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TStochEm::execute(void) | ||||
| { | ||||
|     PhotonR photon(par().gauge, par().zmScheme); | ||||
|     EmField &a = *env().createLattice<EmField>(getName()); | ||||
|     EmComp  *w; | ||||
|      | ||||
|     if (!env().hasCreatedObject("_" + getName() + "_weight")) | ||||
|     { | ||||
|         LOG(Message) << "Caching stochatic EM potential weight (gauge: " | ||||
|                      << par().gauge << ", zero-mode scheme: " | ||||
|                      << par().zmScheme << ")..." << std::endl; | ||||
|         w = env().createLattice<EmComp>("_" + getName() + "_weight"); | ||||
|         photon.StochasticWeight(*w); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         w = env().getObject<EmComp>("_" + getName() + "_weight"); | ||||
|     } | ||||
|     LOG(Message) << "Generating stochatic EM potential..." << std::endl; | ||||
|     photon.StochasticField(a, *env().get4dRng(), *w); | ||||
| } | ||||
							
								
								
									
										75
									
								
								extras/Hadrons/Modules/MGauge/StochEm.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								extras/Hadrons/Modules/MGauge/StochEm.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
|  | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef Hadrons_MGauge_StochEm_hpp_ | ||||
| #define Hadrons_MGauge_StochEm_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         StochEm                                 * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MGauge) | ||||
|  | ||||
| class StochEmPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, | ||||
|                                     PhotonR::Gauge,    gauge, | ||||
|                                     PhotonR::ZmScheme, zmScheme); | ||||
| }; | ||||
|  | ||||
| class TStochEm: public Module<StochEmPar> | ||||
| { | ||||
| public: | ||||
|     typedef PhotonR::GaugeField     EmField; | ||||
|     typedef PhotonR::GaugeLinkField EmComp; | ||||
| public: | ||||
|     // constructor | ||||
|     TStochEm(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TStochEm(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MGauge_StochEm_hpp_ | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Unit_hpp_ | ||||
| #define Hadrons_Unit_hpp_ | ||||
| #ifndef Hadrons_MGauge_Unit_hpp_ | ||||
| #define Hadrons_MGauge_Unit_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Unit_hpp_ | ||||
| #endif // Hadrons_MGauge_Unit_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_NoiseLoop_hpp_ | ||||
| #define Hadrons_NoiseLoop_hpp_ | ||||
| #ifndef Hadrons_MLoop_NoiseLoop_hpp_ | ||||
| #define Hadrons_MLoop_NoiseLoop_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -65,7 +65,7 @@ template <typename FImpl> | ||||
| class TNoiseLoop: public Module<NoiseLoopPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TNoiseLoop(const std::string name); | ||||
| @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_NoiseLoop_hpp_ | ||||
| #endif // Hadrons_MLoop_NoiseLoop_hpp_ | ||||
|   | ||||
							
								
								
									
										226
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										226
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,226 @@ | ||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MScalar; | ||||
|  | ||||
| /****************************************************************************** | ||||
| *                     TChargedProp implementation                             * | ||||
| ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TChargedProp::TChargedProp(const std::string name) | ||||
| : Module<ChargedPropPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TChargedProp::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().source, par().emField}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TChargedProp::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TChargedProp::setup(void) | ||||
| { | ||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); | ||||
|     phaseName_.clear(); | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         phaseName_.push_back("_shiftphase_" + std::to_string(mu)); | ||||
|     } | ||||
|     GFSrcName_ = "_" + getName() + "_DinvSrc"; | ||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) | ||||
|     { | ||||
|         env().registerLattice<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     if (!env().hasRegisteredObject(phaseName_[0])) | ||||
|     { | ||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|         { | ||||
|             env().registerLattice<ScalarField>(phaseName_[mu]); | ||||
|         } | ||||
|     } | ||||
|     if (!env().hasRegisteredObject(GFSrcName_)) | ||||
|     { | ||||
|         env().registerLattice<ScalarField>(GFSrcName_); | ||||
|     } | ||||
|     env().registerLattice<ScalarField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TChargedProp::execute(void) | ||||
| { | ||||
|     // CACHING ANALYTIC EXPRESSIONS | ||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); | ||||
|     Complex     ci(0.0,1.0); | ||||
|     FFT         fft(env().getGrid()); | ||||
|      | ||||
|     // cache free scalar propagator | ||||
|     if (!env().hasCreatedObject(freeMomPropName_)) | ||||
|     { | ||||
|         LOG(Message) << "Caching momentum space free scalar propagator" | ||||
|                      << " (mass= " << par().mass << ")..." << std::endl; | ||||
|         freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_); | ||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     // cache G*F*src | ||||
|     if (!env().hasCreatedObject(GFSrcName_)) | ||||
|          | ||||
|     { | ||||
|         GFSrc_ = env().createLattice<ScalarField>(GFSrcName_); | ||||
|         fft.FFT_all_dim(*GFSrc_, source, FFT::forward); | ||||
|         *GFSrc_ = (*freeMomProp_)*(*GFSrc_); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         GFSrc_ = env().getObject<ScalarField>(GFSrcName_); | ||||
|     } | ||||
|     // cache phases | ||||
|     if (!env().hasCreatedObject(phaseName_[0])) | ||||
|     { | ||||
|         std::vector<int> &l = env().getGrid()->_fdimensions; | ||||
|          | ||||
|         LOG(Message) << "Caching shift phases..." << std::endl; | ||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|         { | ||||
|             Real    twoPiL = M_PI*2./l[mu]; | ||||
|              | ||||
|             phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu])); | ||||
|             LatticeCoordinate(*(phase_[mu]), mu); | ||||
|             *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|         { | ||||
|             phase_.push_back(env().getObject<ScalarField>(phaseName_[mu])); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // PROPAGATOR CALCULATION | ||||
|     LOG(Message) << "Computing charged scalar propagator" | ||||
|                  << " (mass= " << par().mass | ||||
|                  << ", charge= " << par().charge << ")..." << std::endl; | ||||
|      | ||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); | ||||
|     ScalarField buf(env().getGrid()); | ||||
|     ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; | ||||
|     double      q = par().charge; | ||||
|      | ||||
|     // G*F*Src | ||||
|     prop = GFSrc; | ||||
|  | ||||
|     // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) | ||||
|     buf = GFSrc; | ||||
|     momD1(buf, fft); | ||||
|     buf = G*buf; | ||||
|     prop = prop - q*buf; | ||||
|  | ||||
|     // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) | ||||
|     momD1(buf, fft); | ||||
|     prop = prop + q*q*G*buf; | ||||
|  | ||||
|     // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) | ||||
|     buf = GFSrc; | ||||
|     momD2(buf, fft); | ||||
|     prop = prop - q*q*G*buf; | ||||
|  | ||||
|     // final FT | ||||
|     fft.FFT_all_dim(prop, prop, FFT::backward); | ||||
|      | ||||
|     // OUTPUT IF NECESSARY | ||||
|     if (!par().output.empty()) | ||||
|     { | ||||
|         std::string           filename = par().output + "." + | ||||
|                                          std::to_string(env().getTrajectory()); | ||||
|          | ||||
|         LOG(Message) << "Saving zero-momentum projection to '" | ||||
|                      << filename << "'..." << std::endl; | ||||
|          | ||||
|         CorrWriter            writer(filename); | ||||
|         std::vector<TComplex> vecBuf; | ||||
|         std::vector<Complex>  result; | ||||
|          | ||||
|         sliceSum(prop, vecBuf, Tp); | ||||
|         result.resize(vecBuf.size()); | ||||
|         for (unsigned int t = 0; t < vecBuf.size(); ++t) | ||||
|         { | ||||
|             result[t] = TensorRemove(vecBuf[t]); | ||||
|         } | ||||
|         write(writer, "charge", q); | ||||
|         write(writer, "prop", result); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void TChargedProp::momD1(ScalarField &s, FFT &fft) | ||||
| { | ||||
|     EmField     &A = *env().getObject<EmField>(par().emField); | ||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), | ||||
|                 Amu(env().getGrid()); | ||||
|     Complex     ci(0.0,1.0); | ||||
|  | ||||
|     result = zero; | ||||
|  | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu); | ||||
|         buf = (*phase_[mu])*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); | ||||
|         buf = Amu*buf; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result - ci*buf; | ||||
|     } | ||||
|     fft.FFT_all_dim(s, s, FFT::backward); | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu); | ||||
|         buf = Amu*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result + ci*adj(*phase_[mu])*buf; | ||||
|     } | ||||
|  | ||||
|     s = result; | ||||
| } | ||||
|  | ||||
| void TChargedProp::momD2(ScalarField &s, FFT &fft) | ||||
| { | ||||
|     EmField     &A = *env().getObject<EmField>(par().emField); | ||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), | ||||
|                 Amu(env().getGrid()); | ||||
|  | ||||
|     result = zero; | ||||
|      | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu); | ||||
|         buf = (*phase_[mu])*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); | ||||
|         buf = Amu*Amu*buf; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result + .5*buf; | ||||
|     } | ||||
|     fft.FFT_all_dim(s, s, FFT::backward); | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu);         | ||||
|         buf = Amu*Amu*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result + .5*adj(*phase_[mu])*buf; | ||||
|     } | ||||
|  | ||||
|     s = result; | ||||
| } | ||||
							
								
								
									
										61
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| #ifndef Hadrons_MScalar_ChargedProp_hpp_ | ||||
| #define Hadrons_MScalar_ChargedProp_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       Charged scalar propagator                            * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MScalar) | ||||
|  | ||||
| class ChargedPropPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, | ||||
|                                     std::string, emField, | ||||
|                                     std::string, source, | ||||
|                                     double,      mass, | ||||
|                                     double,      charge, | ||||
|                                     std::string, output); | ||||
| }; | ||||
|  | ||||
| class TChargedProp: public Module<ChargedPropPar> | ||||
| { | ||||
| public: | ||||
|     SCALAR_TYPE_ALIASES(SIMPL,); | ||||
|     typedef PhotonR::GaugeField     EmField; | ||||
|     typedef PhotonR::GaugeLinkField EmComp; | ||||
| public: | ||||
|     // constructor | ||||
|     TChargedProp(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TChargedProp(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| private: | ||||
|     void momD1(ScalarField &s, FFT &fft); | ||||
|     void momD2(ScalarField &s, FFT &fft); | ||||
| private: | ||||
|     std::string                freeMomPropName_, GFSrcName_; | ||||
|     std::vector<std::string>   phaseName_; | ||||
|     ScalarField                *freeMomProp_, *GFSrc_; | ||||
|     std::vector<ScalarField *> phase_; | ||||
|     EmField                    *A; | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MScalar_ChargedProp_hpp_ | ||||
							
								
								
									
										79
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | ||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MScalar; | ||||
|  | ||||
| /****************************************************************************** | ||||
| *                        TFreeProp implementation                             * | ||||
| ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TFreeProp::TFreeProp(const std::string name) | ||||
| : Module<FreePropPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TFreeProp::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().source}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TFreeProp::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TFreeProp::setup(void) | ||||
| { | ||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); | ||||
|      | ||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) | ||||
|     { | ||||
|         env().registerLattice<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     env().registerLattice<ScalarField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TFreeProp::execute(void) | ||||
| { | ||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); | ||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); | ||||
|     ScalarField *freeMomProp; | ||||
|  | ||||
|     if (!env().hasCreatedObject(freeMomPropName_)) | ||||
|     { | ||||
|         LOG(Message) << "Caching momentum space free scalar propagator" | ||||
|                      << " (mass= " << par().mass << ")..." << std::endl; | ||||
|         freeMomProp = env().createLattice<ScalarField>(freeMomPropName_); | ||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         freeMomProp = env().getObject<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     LOG(Message) << "Computing free scalar propagator..." << std::endl; | ||||
|     SIMPL::FreePropagator(source, prop, *freeMomProp); | ||||
|      | ||||
|     if (!par().output.empty()) | ||||
|     { | ||||
|         TextWriter            writer(par().output + "." + | ||||
|                                      std::to_string(env().getTrajectory())); | ||||
|         std::vector<TComplex> buf; | ||||
|         std::vector<Complex>  result; | ||||
|          | ||||
|         sliceSum(prop, buf, Tp); | ||||
|         result.resize(buf.size()); | ||||
|         for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|         { | ||||
|             result[t] = TensorRemove(buf[t]); | ||||
|         } | ||||
|         write(writer, "prop", result); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										50
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| #ifndef Hadrons_MScalar_FreeProp_hpp_ | ||||
| #define Hadrons_MScalar_FreeProp_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                               FreeProp                                     * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MScalar) | ||||
|  | ||||
| class FreePropPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, | ||||
|                                     std::string, source, | ||||
|                                     double,      mass, | ||||
|                                     std::string, output); | ||||
| }; | ||||
|  | ||||
| class TFreeProp: public Module<FreePropPar> | ||||
| { | ||||
| public: | ||||
|     SCALAR_TYPE_ALIASES(SIMPL,); | ||||
| public: | ||||
|     // constructor | ||||
|     TFreeProp(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TFreeProp(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| private: | ||||
|     std::string freeMomPropName_; | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MScalar_FreeProp_hpp_ | ||||
							
								
								
									
										6
									
								
								extras/Hadrons/Modules/MScalar/Scalar.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								extras/Hadrons/Modules/MScalar/Scalar.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| #ifndef Hadrons_Scalar_hpp_ | ||||
| #define Hadrons_Scalar_hpp_ | ||||
|  | ||||
| #define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) | ||||
|  | ||||
| #endif // Hadrons_Scalar_hpp_ | ||||
							
								
								
									
										114
									
								
								extras/Hadrons/Modules/MSink/Point.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								extras/Hadrons/Modules/MSink/Point.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| #ifndef Hadrons_MSink_Point_hpp_ | ||||
| #define Hadrons_MSink_Point_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                                   Point                                    * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MSink) | ||||
|  | ||||
| class PointPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, | ||||
|                                     std::string, mom); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TPoint: public Module<PointPar> | ||||
| { | ||||
| public: | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
|     SINK_TYPE_ALIASES(); | ||||
| public: | ||||
|     // constructor | ||||
|     TPoint(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TPoint(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSink); | ||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                          TPoint implementation                             * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| TPoint<FImpl>::TPoint(const std::string name) | ||||
| : Module<PointPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TPoint<FImpl>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TPoint<FImpl>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TPoint<FImpl>::setup(void) | ||||
| { | ||||
|     unsigned int size; | ||||
|      | ||||
|     size = env().template lattice4dSize<LatticeComplex>(); | ||||
|     env().registerObject(getName(), size); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TPoint<FImpl>::execute(void) | ||||
| { | ||||
|     std::vector<Real> p = strToVec<Real>(par().mom); | ||||
|     LatticeComplex    ph(env().getGrid()), coor(env().getGrid()); | ||||
|     Complex           i(0.0,1.0); | ||||
|      | ||||
|     LOG(Message) << "Setting up point sink function for momentum [" | ||||
|                  << par().mom << "]" << std::endl; | ||||
|     ph = zero; | ||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||
|     { | ||||
|         LatticeCoordinate(coor, mu); | ||||
|         ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; | ||||
|     } | ||||
|     ph = exp((Real)(2*M_PI)*i*ph); | ||||
|     auto sink = [ph](const PropagatorField &field) | ||||
|     { | ||||
|         SlicedPropagator res; | ||||
|         PropagatorField  tmp = ph*field; | ||||
|          | ||||
|         sliceSum(tmp, res, Tp); | ||||
|          | ||||
|         return res; | ||||
|     }; | ||||
|     env().setObject(getName(), new SinkFn(sink)); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MSink_Point_hpp_ | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_RBPrecCG_hpp_ | ||||
| #define Hadrons_RBPrecCG_hpp_ | ||||
| #ifndef Hadrons_MSolver_RBPrecCG_hpp_ | ||||
| #define Hadrons_MSolver_RBPrecCG_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -53,7 +53,7 @@ template <typename FImpl> | ||||
| class TRBPrecCG: public Module<RBPrecCGPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TRBPrecCG(const std::string name); | ||||
| @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_RBPrecCG_hpp_ | ||||
| #endif // Hadrons_MSolver_RBPrecCG_hpp_ | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Point_hpp_ | ||||
| #define Hadrons_Point_hpp_ | ||||
| #ifndef Hadrons_MSource_Point_hpp_ | ||||
| #define Hadrons_MSource_Point_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -63,7 +63,7 @@ template <typename FImpl> | ||||
| class TPoint: public Module<PointPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TPoint(const std::string name); | ||||
| @@ -78,7 +78,8 @@ public: | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource); | ||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSource); | ||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       TPoint template implementation                       * | ||||
| @@ -132,4 +133,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Point_hpp_ | ||||
| #endif // Hadrons_MSource_Point_hpp_ | ||||
|   | ||||
| @@ -28,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_SeqGamma_hpp_ | ||||
| #define Hadrons_SeqGamma_hpp_ | ||||
| #ifndef Hadrons_MSource_SeqGamma_hpp_ | ||||
| #define Hadrons_MSource_SeqGamma_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -72,7 +72,7 @@ template <typename FImpl> | ||||
| class TSeqGamma: public Module<SeqGammaPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TSeqGamma(const std::string name); | ||||
| @@ -161,4 +161,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_SeqGamma_hpp_ | ||||
| #endif // Hadrons_MSource_SeqGamma_hpp_ | ||||
|   | ||||
| @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_WallSource_hpp_ | ||||
| #define Hadrons_WallSource_hpp_ | ||||
| #ifndef Hadrons_MSource_WallSource_hpp_ | ||||
| #define Hadrons_MSource_WallSource_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -64,7 +64,7 @@ template <typename FImpl> | ||||
| class TWall: public Module<WallPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TWall(const std::string name); | ||||
| @@ -144,4 +144,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_WallSource_hpp_ | ||||
| #endif // Hadrons_MSource_WallSource_hpp_ | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Z2_hpp_ | ||||
| #define Hadrons_Z2_hpp_ | ||||
| #ifndef Hadrons_MSource_Z2_hpp_ | ||||
| #define Hadrons_MSource_Z2_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -67,7 +67,7 @@ template <typename FImpl> | ||||
| class TZ2: public Module<Z2Par> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TZ2(const std::string name); | ||||
| @@ -82,7 +82,8 @@ public: | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource); | ||||
| MODULE_REGISTER_NS(Z2,       TZ2<FIMPL>,        MSource); | ||||
| MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       TZ2 template implementation                          * | ||||
| @@ -148,4 +149,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Z2_hpp_ | ||||
| #endif // Hadrons_MSource_Z2_hpp_ | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| #ifndef Hadrons____FILEBASENAME____hpp_ | ||||
| #define Hadrons____FILEBASENAME____hpp_ | ||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons____FILEBASENAME____hpp_ | ||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| #ifndef Hadrons____FILEBASENAME____hpp_ | ||||
| #define Hadrons____FILEBASENAME____hpp_ | ||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons____FILEBASENAME____hpp_ | ||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|   | ||||
| @@ -4,7 +4,10 @@ modules_cc =\ | ||||
|   Modules/MContraction/WeakNeutral4ptDisc.cc \ | ||||
|   Modules/MGauge/Load.cc \ | ||||
|   Modules/MGauge/Random.cc \ | ||||
|   Modules/MGauge/Unit.cc | ||||
|   Modules/MGauge/StochEm.cc \ | ||||
|   Modules/MGauge/Unit.cc \ | ||||
|   Modules/MScalar/ChargedProp.cc \ | ||||
|   Modules/MScalar/FreeProp.cc | ||||
|  | ||||
| modules_hpp =\ | ||||
|   Modules/MAction/DWF.hpp \ | ||||
| @@ -17,14 +20,19 @@ modules_hpp =\ | ||||
|   Modules/MContraction/WeakHamiltonianEye.hpp \ | ||||
|   Modules/MContraction/WeakHamiltonianNonEye.hpp \ | ||||
|   Modules/MContraction/WeakNeutral4ptDisc.hpp \ | ||||
|   Modules/MFermion/GaugeProp.hpp \ | ||||
|   Modules/MGauge/Load.hpp \ | ||||
|   Modules/MGauge/Random.hpp \ | ||||
|   Modules/MGauge/StochEm.hpp \ | ||||
|   Modules/MGauge/Unit.hpp \ | ||||
|   Modules/MLoop/NoiseLoop.hpp \ | ||||
|   Modules/MScalar/ChargedProp.hpp \ | ||||
|   Modules/MScalar/FreeProp.hpp \ | ||||
|   Modules/MScalar/Scalar.hpp \ | ||||
|   Modules/MSink/Point.hpp \ | ||||
|   Modules/MSolver/RBPrecCG.hpp \ | ||||
|   Modules/MSource/Point.hpp \ | ||||
|   Modules/MSource/SeqGamma.hpp \ | ||||
|   Modules/MSource/Wall.hpp \ | ||||
|   Modules/MSource/Z2.hpp \ | ||||
|   Modules/Quark.hpp | ||||
|   Modules/MSource/Z2.hpp | ||||
|  | ||||
|   | ||||
							
								
								
									
										11
									
								
								extras/qed-fvol/Global.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								extras/qed-fvol/Global.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| #include <qed-fvol/Global.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace QCD; | ||||
| using namespace QedFVol; | ||||
|  | ||||
| QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); | ||||
| QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); | ||||
| QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); | ||||
| QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); | ||||
| QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); | ||||
							
								
								
									
										42
									
								
								extras/qed-fvol/Global.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								extras/qed-fvol/Global.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| #ifndef QedFVol_Global_hpp_ | ||||
| #define QedFVol_Global_hpp_ | ||||
|  | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| #define BEGIN_QEDFVOL_NAMESPACE \ | ||||
| namespace Grid {\ | ||||
| using namespace QCD;\ | ||||
| namespace QedFVol {\ | ||||
| using Grid::operator<<; | ||||
| #define END_QEDFVOL_NAMESPACE }} | ||||
|  | ||||
| /* the 'using Grid::operator<<;' statement prevents a very nasty compilation | ||||
|  * error with GCC (clang compiles fine without it). | ||||
|  */ | ||||
|  | ||||
| BEGIN_QEDFVOL_NAMESPACE | ||||
|  | ||||
| class QedFVolLogger: public Logger | ||||
| { | ||||
| public: | ||||
|     QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, | ||||
|                                                   GridLogColours, "BLACK"){}; | ||||
| }; | ||||
|  | ||||
| #define LOG(channel) std::cout << QedFVolLog##channel | ||||
| #define QEDFVOL_ERROR(msg)\ | ||||
| LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ | ||||
|            << __LINE__ << ")" << std::endl;\ | ||||
| abort(); | ||||
|  | ||||
| #define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; | ||||
|  | ||||
| extern QedFVolLogger QedFVolLogError; | ||||
| extern QedFVolLogger QedFVolLogWarning; | ||||
| extern QedFVolLogger QedFVolLogMessage; | ||||
| extern QedFVolLogger QedFVolLogIterative; | ||||
| extern QedFVolLogger QedFVolLogDebug; | ||||
|  | ||||
| END_QEDFVOL_NAMESPACE | ||||
|  | ||||
| #endif // QedFVol_Global_hpp_ | ||||
							
								
								
									
										9
									
								
								extras/qed-fvol/Makefile.am
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								extras/qed-fvol/Makefile.am
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| AM_CXXFLAGS += -I$(top_srcdir)/extras | ||||
|  | ||||
| bin_PROGRAMS = qed-fvol | ||||
|  | ||||
| qed_fvol_SOURCES =   \ | ||||
|     qed-fvol.cc      \ | ||||
|     Global.cc | ||||
|  | ||||
| qed_fvol_LDADD   = -lGrid | ||||
							
								
								
									
										265
									
								
								extras/qed-fvol/WilsonLoops.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										265
									
								
								extras/qed-fvol/WilsonLoops.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,265 @@ | ||||
| #ifndef QEDFVOL_WILSONLOOPS_H | ||||
| #define QEDFVOL_WILSONLOOPS_H | ||||
|  | ||||
| #include <Global.hpp> | ||||
|  | ||||
| BEGIN_QEDFVOL_NAMESPACE | ||||
|  | ||||
| template <class Gimpl> class NewWilsonLoops : public Gimpl { | ||||
| public: | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; | ||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; | ||||
|  | ||||
|   ////////////////////////////////////////////////// | ||||
|   // directed plaquette oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U, | ||||
|                            const int mu, const int nu) { | ||||
|     // Annoyingly, must use either scope resolution to find dependent base | ||||
|     // class, | ||||
|     // or this-> ; there is no "this" in a static method. This forces explicit | ||||
|     // Gimpl scope | ||||
|     // resolution throughout the usage in this file, and rather defeats the | ||||
|     // purpose of deriving | ||||
|     // from Gimpl. | ||||
|     plaq = Gimpl::CovShiftBackward( | ||||
|         U[mu], mu, Gimpl::CovShiftBackward( | ||||
|                        U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // trace of directed plaquette oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void traceDirPlaquette(LatticeComplex &plaq, | ||||
|                                 const std::vector<GaugeMat> &U, const int mu, | ||||
|                                 const int nu) { | ||||
|     GaugeMat sp(U[0]._grid); | ||||
|     dirPlaquette(sp, U, mu, nu); | ||||
|     plaq = trace(sp); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all planes of plaquette | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void sitePlaquette(LatticeComplex &Plaq, | ||||
|                             const std::vector<GaugeMat> &U) { | ||||
|     LatticeComplex sitePlaq(U[0]._grid); | ||||
|     Plaq = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceDirPlaquette(sitePlaq, U, mu, nu); | ||||
|         Plaq = Plaq + sitePlaq; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of plaquette | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumPlaquette(const GaugeLorentz &Umu) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Plaq(Umu._grid); | ||||
|  | ||||
|     sitePlaquette(Plaq, U); | ||||
|  | ||||
|     TComplex Tp = sum(Plaq); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of plaquette | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgPlaquette(const GaugeLorentz &Umu) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumplaq = sumPlaquette(Umu); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; | ||||
|     return sumplaq / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////////////// | ||||
|   // Wilson loop of size (R1, R2), oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U, | ||||
|                            const int Rmu, const int Rnu, | ||||
|                            const int mu, const int nu) { | ||||
|     wl = U[nu]; | ||||
|  | ||||
|     for(int i = 0; i < Rnu-1; i++){ | ||||
|       wl = Gimpl::CovShiftForward(U[nu], nu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rmu; i++){ | ||||
|       wl = Gimpl::CovShiftForward(U[mu], mu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rnu; i++){ | ||||
|       wl = Gimpl::CovShiftBackward(U[nu], nu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rmu; i++){ | ||||
|       wl = Gimpl::CovShiftBackward(U[mu], mu, wl); | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // trace of Wilson Loop oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void traceWilsonLoop(LatticeComplex &wl, | ||||
|                                 const std::vector<GaugeMat> &U, | ||||
|                                 const int Rmu, const int Rnu, | ||||
|                                 const int mu, const int nu) { | ||||
|     GaugeMat sp(U[0]._grid); | ||||
|     wilsonLoop(sp, U, Rmu, Rnu, mu, nu); | ||||
|     wl = trace(sp); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|     Wl = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over planes of Wilson loop with length R1 | ||||
|   // in the time direction | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteTimelikeWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|  | ||||
|     int ndim = U[0]._grid->_ndimension; | ||||
|  | ||||
|     Wl = zero; | ||||
|     for (int nu = 0; nu < ndim - 1; nu++) { | ||||
|       traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); | ||||
|       Wl = Wl + siteWl; | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum Wilson loop over all planes orthogonal to the time direction | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteSpatialWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|  | ||||
|     Wl = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of timelike Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteTimelikeWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of spatial Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteSpatialWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * ndim * (ndim - 1); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of timelike Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * (ndim - 1); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of spatial Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * (ndim - 1) * (ndim - 2); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
| }; | ||||
|  | ||||
| END_QEDFVOL_NAMESPACE | ||||
|  | ||||
| #endif // QEDFVOL_WILSONLOOPS_H | ||||
							
								
								
									
										88
									
								
								extras/qed-fvol/qed-fvol.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								extras/qed-fvol/qed-fvol.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| #include <Global.hpp> | ||||
| #include <WilsonLoops.h> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace QCD; | ||||
| using namespace QedFVol; | ||||
|  | ||||
| typedef PeriodicGaugeImpl<QedGimplR>    QedPeriodicGimplR; | ||||
| typedef PhotonR::GaugeField             EmField; | ||||
| typedef PhotonR::GaugeLinkField         EmComp; | ||||
|  | ||||
| const int NCONFIGS = 10; | ||||
| const int NWILSON = 10; | ||||
|  | ||||
| int main(int argc, char *argv[]) | ||||
| { | ||||
|     // parse command line | ||||
|     std::string parameterFileName; | ||||
|      | ||||
|     if (argc < 2) | ||||
|     { | ||||
|         std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]"; | ||||
|         std::cerr << std::endl; | ||||
|         std::exit(EXIT_FAILURE); | ||||
|     } | ||||
|     parameterFileName = argv[1]; | ||||
|      | ||||
|     // initialization | ||||
|     Grid_init(&argc, &argv); | ||||
|     QedFVolLogError.Active(GridLogError.isActive()); | ||||
|     QedFVolLogWarning.Active(GridLogWarning.isActive()); | ||||
|     QedFVolLogMessage.Active(GridLogMessage.isActive()); | ||||
|     QedFVolLogIterative.Active(GridLogIterative.isActive()); | ||||
|     QedFVolLogDebug.Active(GridLogDebug.isActive()); | ||||
|     LOG(Message) << "Grid initialized" << std::endl; | ||||
|      | ||||
|     // QED stuff | ||||
|     std::vector<int> latt_size   = GridDefaultLatt(); | ||||
|     std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); | ||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|     GridCartesian    grid(latt_size,simd_layout,mpi_layout); | ||||
|     GridParallelRNG  pRNG(&grid); | ||||
|     PhotonR          photon(PhotonR::Gauge::feynman, | ||||
|                             PhotonR::ZmScheme::qedL); | ||||
|     EmField          a(&grid); | ||||
|     EmField          expA(&grid); | ||||
|  | ||||
|     Complex imag_unit(0, 1); | ||||
|  | ||||
|     Real wlA; | ||||
|     std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); | ||||
|  | ||||
|     pRNG.SeedRandomDevice(); | ||||
|  | ||||
|     LOG(Message) << "Wilson loop calculation beginning" << std::endl; | ||||
|     for(int ic = 0; ic < NCONFIGS; ic++){ | ||||
|         LOG(Message) << "Configuration " << ic <<std::endl; | ||||
|         photon.StochasticField(a, pRNG); | ||||
|  | ||||
|         // Exponentiate photon field | ||||
|         expA = exp(imag_unit*a); | ||||
|  | ||||
|         // Calculate Wilson loops | ||||
|         for(int iw=1; iw<=NWILSON; iw++){ | ||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3; | ||||
|             logWlAvg[iw-1] -= 2*log(wlA); | ||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3; | ||||
|             logWlTime[iw-1] -= 2*log(wlA); | ||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3; | ||||
|             logWlSpace[iw-1] -= 2*log(wlA); | ||||
|         } | ||||
|     } | ||||
|     LOG(Message) << "Wilson loop calculation completed" << std::endl; | ||||
|      | ||||
|     // Calculate Wilson loops | ||||
|     for(int iw=1; iw<=10; iw++){ | ||||
|         LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; | ||||
|         LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; | ||||
|         LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; | ||||
|         LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; | ||||
|     } | ||||
|  | ||||
|     // epilogue | ||||
|     LOG(Message) << "Grid is finalizing now" << std::endl; | ||||
|     Grid_finalize(); | ||||
|      | ||||
|     return EXIT_SUCCESS; | ||||
| } | ||||
| @@ -41,7 +41,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/GridCore.h> | ||||
| #include <Grid/GridQCDcore.h> | ||||
| #include <Grid/qcd/action/Action.h> | ||||
| #include <Grid/qcd/utils/GaugeFix.h> | ||||
| #include <Grid/qcd/smearing/Smearing.h> | ||||
| #include <Grid/parallelIO/MetaData.h> | ||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <cassert> | ||||
| #include <complex> | ||||
| #include <vector> | ||||
| #include <string> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <random> | ||||
| @@ -18,6 +19,7 @@ | ||||
| #include <ctime> | ||||
| #include <sys/time.h> | ||||
| #include <chrono> | ||||
| #include <zlib.h> | ||||
|  | ||||
| /////////////////// | ||||
| // Grid config | ||||
|   | ||||
| @@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3 | ||||
|   extra_sources+=communicator/Communicator_base.cc | ||||
| endif | ||||
|  | ||||
| if BUILD_COMMS_MPI3L | ||||
|   extra_sources+=communicator/Communicator_mpi3_leader.cc | ||||
| if BUILD_COMMS_MPIT | ||||
|   extra_sources+=communicator/Communicator_mpit.cc | ||||
|   extra_sources+=communicator/Communicator_base.cc | ||||
| endif | ||||
|  | ||||
|   | ||||
| @@ -1,137 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/DenseMatrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_DENSE_MATRIX_H | ||||
| #define GRID_DENSE_MATRIX_H | ||||
|  | ||||
| namespace Grid { | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Matrix untils | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|  | ||||
| template<class T> using DenseVector = std::vector<T>; | ||||
| template<class T> using DenseMatrix = DenseVector<DenseVector<T> >; | ||||
|  | ||||
| template<class T> void Size(DenseVector<T> & vec, int &N)  | ||||
| {  | ||||
|   N= vec.size(); | ||||
| } | ||||
| template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)  | ||||
| {  | ||||
|   N= mat.size(); | ||||
|   M= mat[0].size(); | ||||
| } | ||||
|  | ||||
| template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)  | ||||
| {  | ||||
|   int M; Size(mat,N,M); | ||||
|   assert(N==M); | ||||
| } | ||||
|  | ||||
| template<class T> void Resize(DenseVector<T > & mat, int N) {  | ||||
|   mat.resize(N); | ||||
| } | ||||
| template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {  | ||||
|   mat.resize(N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     mat[i].resize(M); | ||||
|   } | ||||
| } | ||||
| template<class T> void Fill(DenseMatrix<T> & mat, T&val) {  | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   for(int i=0;i<N;i++){ | ||||
|   for(int j=0;j<M;j++){ | ||||
|     mat[i][j] = val; | ||||
|   }} | ||||
| } | ||||
|  | ||||
| /** Transpose of a matrix **/ | ||||
| template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){ | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   DenseMatrix<T> C; Resize(C,M,N); | ||||
|   for(int i=0;i<M;i++){ | ||||
|   for(int j=0;j<N;j++){ | ||||
|     C[i][j] = mat[j][i]; | ||||
|   }}  | ||||
|   return C; | ||||
| } | ||||
| /** Set DenseMatrix to unit matrix **/ | ||||
| template<class T> void Unity(DenseMatrix<T> &A){ | ||||
|   int N;  SizeSquare(A,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     for(int j=0;j<N;j++){ | ||||
|       if ( i==j ) A[i][j] = 1; | ||||
|       else        A[i][j] = 0; | ||||
|     }  | ||||
|   }  | ||||
| } | ||||
|  | ||||
| /** Add C * I to matrix **/ | ||||
| template<class T> | ||||
| void PlusUnit(DenseMatrix<T> & A,T c){ | ||||
|   int dim;  SizeSquare(A,dim); | ||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  | ||||
| } | ||||
|  | ||||
| /** return the Hermitian conjugate of matrix **/ | ||||
| template<class T> | ||||
| DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){ | ||||
|  | ||||
|   int dim; SizeSquare(mat,dim); | ||||
|  | ||||
|   DenseMatrix<T> C; Resize(C,dim,dim); | ||||
|  | ||||
|   for(int i=0;i<dim;i++){ | ||||
|     for(int j=0;j<dim;j++){ | ||||
|       C[i][j] = conj(mat[j][i]); | ||||
|     }  | ||||
|   }  | ||||
|   return C; | ||||
| } | ||||
| /**Get a square submatrix**/ | ||||
| template <class T> | ||||
| DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end) | ||||
| { | ||||
|   DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st); | ||||
|  | ||||
|   for(int i = row_st; i<row_end; i++){ | ||||
|   for(int j = col_st; j<col_end; j++){ | ||||
|     H[i-row_st][j-col_st]=A[i][j]; | ||||
|   }} | ||||
|   return H; | ||||
| } | ||||
|  | ||||
| } | ||||
|  | ||||
| #include "Householder.h" | ||||
| #include "Francis.h" | ||||
|  | ||||
| #endif | ||||
|  | ||||
| @@ -1,525 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/Francis.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef FRANCIS_H | ||||
| #define FRANCIS_H | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <stdexcept> | ||||
| #include <fstream> | ||||
| #include <complex> | ||||
| #include <algorithm> | ||||
|  | ||||
| //#include <timer.h> | ||||
| //#include <lapacke.h> | ||||
| //#include <Eigen/Dense> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||
| template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||
|  | ||||
| /** | ||||
|   Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. | ||||
| H = | ||||
|       x  x  x  x  x  x  x  x  x | ||||
|       x  x  x  x  x  x  x  x  x | ||||
|       0  x  x  x  x  x  x  x  x | ||||
|       0  0  x  x  x  x  x  x  x | ||||
|       0  0  0  x  x  x  x  x  x | ||||
|       0  0  0  0  x  x  x  x  x | ||||
|       0  0  0  0  0  x  x  x  x | ||||
|       0  0  0  0  0  0  x  x  x | ||||
|       0  0  0  0  0  0  0  x  x | ||||
| Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. | ||||
| **/ | ||||
| template <class T> | ||||
| int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||
| { | ||||
|   DenseMatrix<T> H = Hin;  | ||||
|  | ||||
|   int N ; SizeSquare(H,N); | ||||
|   int M = N; | ||||
|  | ||||
|   Fill(evals,0); | ||||
|   Fill(evecs,0); | ||||
|  | ||||
|   T s,t,x=0,y=0,z=0; | ||||
|   T u,d; | ||||
|   T apd,amd,bc; | ||||
|   DenseVector<T> p(N,0); | ||||
|   T nrm = Norm(H);    ///DenseMatrix Norm | ||||
|   int n, m; | ||||
|   int e = 0; | ||||
|   int it = 0; | ||||
|   int tot_it = 0; | ||||
|   int l = 0; | ||||
|   int r = 0; | ||||
|   DenseMatrix<T> P; Resize(P,N,N); Unity(P); | ||||
|   DenseVector<int> trows(N,0); | ||||
|  | ||||
|   /// Check if the matrix is really hessenberg, if not abort | ||||
|   RealD sth = 0; | ||||
|   for(int j=0;j<N;j++){ | ||||
|     for(int i=j+2;i<N;i++){ | ||||
|       sth = abs(H[i][j]); | ||||
|       if(sth > small){ | ||||
| 	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; | ||||
| 	exit(1); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   do{ | ||||
|     std::cout << "Francis QR Step N = " << N << std::endl; | ||||
|     /** Check for convergence | ||||
|       x  x  x  x  x | ||||
|       0  x  x  x  x | ||||
|       0  0  x  x  x | ||||
|       0  0  x  x  x | ||||
|       0  0  0  0  x | ||||
|       for this matrix l = 4 | ||||
|      **/ | ||||
|     do{ | ||||
|       l = Chop_subdiag(H,nrm,e,small); | ||||
|       r = 0;    ///May have converged on more than one eval | ||||
|       ///Single eval | ||||
|       if(l == N-1){ | ||||
|         evals[e] = H[l][l]; | ||||
|         N--; e++; r++; it = 0; | ||||
|       } | ||||
|       ///RealD eval | ||||
|       if(l == N-2){ | ||||
|         trows[l+1] = 1;    ///Needed for UTSolve | ||||
|         apd = H[l][l] + H[l+1][l+1]; | ||||
|         amd = H[l][l] - H[l+1][l+1]; | ||||
|         bc =  (T)4.0*H[l+1][l]*H[l][l+1]; | ||||
|         evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) ); | ||||
|         evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); | ||||
|         N-=2; e+=2; r++; it = 0; | ||||
|       } | ||||
|     } while(r>0); | ||||
|  | ||||
|     if(N ==0) break; | ||||
|  | ||||
|     DenseVector<T > ck; Resize(ck,3); | ||||
|     DenseVector<T> v;   Resize(v,3); | ||||
|  | ||||
|     for(int m = N-3; m >= l; m--){ | ||||
|       ///Starting vector essentially random shift. | ||||
|       if(it%10 == 0 && N >= 3 && it > 0){ | ||||
|         s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||
|         t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||
|         z = H[m+1][m]*H[m+2][m+1]; | ||||
|       } | ||||
|       ///Starting vector implicit Q theorem | ||||
|       else{ | ||||
|         s = (H[N-2][N-2] + H[N-1][N-1]); | ||||
|         t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); | ||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||
|         z = H[m+1][m]*H[m+2][m+1]; | ||||
|       } | ||||
|       ck[0] = x; ck[1] = y; ck[2] = z; | ||||
|  | ||||
|       if(m == l) break; | ||||
|  | ||||
|       /** Some stupid thing from numerical recipies, seems to work**/ | ||||
|       // PAB.. for heaven's sake quote page, purpose, evidence it works. | ||||
|       //       what sort of comment is that!?!?!? | ||||
|       u=abs(H[m][m-1])*(abs(y)+abs(z)); | ||||
|       d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); | ||||
|       if ((T)abs(u+d) == (T)abs(d) ){ | ||||
| 	l = m; break; | ||||
|       } | ||||
|  | ||||
|       //if (u < small){l = m; break;} | ||||
|     } | ||||
|     if(it > 100000){ | ||||
|      std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; | ||||
|      std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|     normalize(ck);    ///Normalization cancels in PHP anyway | ||||
|     T beta; | ||||
|     Householder_vector<T >(ck, 0, 2, v, beta); | ||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,0); | ||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,1); | ||||
|     ///Accumulate eigenvector | ||||
|     Householder_mult<T >(P,v,beta,0,l,l+2,1); | ||||
|     int sw = 0;      ///Are we on the last row? | ||||
|     for(int k=l;k<N-2;k++){ | ||||
|       x = H[k+1][k]; | ||||
|       y = H[k+2][k]; | ||||
|       z = (T)0.0; | ||||
|       if(k+3 <= N-1){ | ||||
| 	z = H[k+3][k]; | ||||
|       } else{ | ||||
| 	sw = 1;  | ||||
| 	v[2] = (T)0.0; | ||||
|       } | ||||
|       ck[0] = x; ck[1] = y; ck[2] = z; | ||||
|       normalize(ck); | ||||
|       Householder_vector<T >(ck, 0, 2-sw, v, beta); | ||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0); | ||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1); | ||||
|       ///Accumulate eigenvector | ||||
|       Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1); | ||||
|     } | ||||
|     it++; | ||||
|     tot_it++; | ||||
|   }while(N > 1); | ||||
|   N = evals.size(); | ||||
|   ///Annoying - UT solves in reverse order; | ||||
|   DenseVector<T> tmp; Resize(tmp,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     tmp[i] = evals[N-i-1]; | ||||
|   }  | ||||
|   evals = tmp; | ||||
|   UTeigenvectors(H, trows, evals, evecs); | ||||
|   for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);} | ||||
|   return tot_it; | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||
| { | ||||
|   /** | ||||
|   Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. | ||||
|   H = | ||||
|   x  x  0  0  0  0 | ||||
|   x  x  x  0  0  0 | ||||
|   0  x  x  x  0  0 | ||||
|   0  0  x  x  x  0 | ||||
|   0  0  0  x  x  x | ||||
|   0  0  0  0  x  x | ||||
|   Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/ | ||||
|   return my_Wilkinson(Hin, evals, evecs, small, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol) | ||||
| { | ||||
|   int N; SizeSquare(Hin,N); | ||||
|   int M = N; | ||||
|  | ||||
|   ///I don't want to modify the input but matricies must be passed by reference | ||||
|   //Scale a matrix by its "norm" | ||||
|   //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm); | ||||
|   DenseMatrix<T> H;  H = Hin; | ||||
|    | ||||
|   RealD Hnorm = abs(Norm(Hin)); | ||||
|   H = H * (1.0 / Hnorm); | ||||
|  | ||||
|   // TODO use openmp and memset | ||||
|   Fill(evals,0); | ||||
|   Fill(evecs,0); | ||||
|  | ||||
|   T s, t, x = 0, y = 0, z = 0; | ||||
|   T u, d; | ||||
|   T apd, amd, bc; | ||||
|   DenseVector<T> p; Resize(p,N); Fill(p,0); | ||||
|  | ||||
|   T nrm = Norm(H);    ///DenseMatrix Norm | ||||
|   int n, m; | ||||
|   int e = 0; | ||||
|   int it = 0; | ||||
|   int tot_it = 0; | ||||
|   int l = 0; | ||||
|   int r = 0; | ||||
|   DenseMatrix<T> P; Resize(P,N,N); | ||||
|   Unity(P); | ||||
|   DenseVector<int> trows(N, 0); | ||||
|   /// Check if the matrix is really symm tridiag | ||||
|   RealD sth = 0; | ||||
|   for(int j = 0; j < N; ++j) | ||||
|   { | ||||
|     for(int i = j + 2; i < N; ++i) | ||||
|     { | ||||
|       if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) | ||||
|       { | ||||
| 	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; | ||||
| 	std::cout << "Warning tridiagonalize and call again" << std::endl; | ||||
|         // exit(1); // see what is going on | ||||
|         //return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   do{ | ||||
|     do{ | ||||
|       //Jasper | ||||
|       //Check if the subdiagonal term is small enough (<small) | ||||
|       //if true then it is converged. | ||||
|       //check start from H.dim - e - 1 | ||||
|       //How to deal with more than 2 are converged? | ||||
|       //What if Chop_symm_subdiag return something int the middle? | ||||
|       //-------------- | ||||
|       l = Chop_symm_subdiag(H,nrm, e, small); | ||||
|       r = 0;    ///May have converged on more than one eval | ||||
|       //Jasper | ||||
|       //In this case | ||||
|       // x  x  0  0  0  0 | ||||
|       // x  x  x  0  0  0 | ||||
|       // 0  x  x  x  0  0 | ||||
|       // 0  0  x  x  x  0 | ||||
|       // 0  0  0  x  x  0 | ||||
|       // 0  0  0  0  0  x  <- l | ||||
|       //-------------- | ||||
|       ///Single eval | ||||
|       if(l == N - 1) | ||||
|       { | ||||
|         evals[e] = H[l][l]; | ||||
|         N--; | ||||
|         e++; | ||||
|         r++; | ||||
|         it = 0; | ||||
|       } | ||||
|       //Jasper | ||||
|       // x  x  0  0  0  0 | ||||
|       // x  x  x  0  0  0 | ||||
|       // 0  x  x  x  0  0 | ||||
|       // 0  0  x  x  0  0 | ||||
|       // 0  0  0  0  x  x  <- l | ||||
|       // 0  0  0  0  x  x | ||||
|       //-------------- | ||||
|       ///RealD eval | ||||
|       if(l == N - 2) | ||||
|       { | ||||
|         trows[l + 1] = 1;    ///Needed for UTSolve | ||||
|         apd = H[l][l] + H[l + 1][ l + 1]; | ||||
|         amd = H[l][l] - H[l + 1][l + 1]; | ||||
|         bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1]; | ||||
|         evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc)); | ||||
|         evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc)); | ||||
|         N -= 2; | ||||
|         e += 2; | ||||
|         r++; | ||||
|         it = 0; | ||||
|       } | ||||
|     }while(r > 0); | ||||
|     //Jasper | ||||
|     //Already converged | ||||
|     //-------------- | ||||
|     if(N == 0) break; | ||||
|  | ||||
|     DenseVector<T> ck,v; Resize(ck,2); Resize(v,2); | ||||
|  | ||||
|     for(int m = N - 3; m >= l; m--) | ||||
|     { | ||||
|       ///Starting vector essentially random shift. | ||||
|       if(it%10 == 0 && N >= 3 && it > 0) | ||||
|       { | ||||
|         t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); | ||||
|         x = H[m][m] - t; | ||||
|         z = H[m + 1][m]; | ||||
|       } else { | ||||
|       ///Starting vector implicit Q theorem | ||||
|         d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; | ||||
|         t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]  | ||||
| 	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); | ||||
|         x = H[m][m] - t; | ||||
|         z = H[m + 1][m]; | ||||
|       } | ||||
|       //Jasper | ||||
|       //why it is here???? | ||||
|       //----------------------- | ||||
|       if(m == l) | ||||
|         break; | ||||
|  | ||||
|       u = abs(H[m][m - 1]) * (abs(y) + abs(z)); | ||||
|       d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); | ||||
|       if ((T)abs(u + d) == (T)abs(d)) | ||||
|       { | ||||
|         l = m; | ||||
|         break; | ||||
|       } | ||||
|     } | ||||
|     //Jasper | ||||
|     if(it > 1000000) | ||||
|     { | ||||
|       std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; | ||||
|       std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|     // | ||||
|     T s, c; | ||||
|     Givens_calc<T>(x, z, c, s); | ||||
|     Givens_mult<T>(H, l, l + 1, c, -s, 0); | ||||
|     Givens_mult<T>(H, l, l + 1, c,  s, 1); | ||||
|     Givens_mult<T>(P, l, l + 1, c,  s, 1); | ||||
|     // | ||||
|     for(int k = l; k < N - 2; ++k) | ||||
|     { | ||||
|       x = H.A[k + 1][k]; | ||||
|       z = H.A[k + 2][k]; | ||||
|       Givens_calc<T>(x, z, c, s); | ||||
|       Givens_mult<T>(H, k + 1, k + 2, c, -s, 0); | ||||
|       Givens_mult<T>(H, k + 1, k + 2, c,  s, 1); | ||||
|       Givens_mult<T>(P, k + 1, k + 2, c,  s, 1); | ||||
|     } | ||||
|     it++; | ||||
|     tot_it++; | ||||
|   }while(N > 1); | ||||
|  | ||||
|   N = evals.size(); | ||||
|   ///Annoying - UT solves in reverse order; | ||||
|   DenseVector<T> tmp(N); | ||||
|   for(int i = 0; i < N; ++i) | ||||
|     tmp[i] = evals[N-i-1]; | ||||
|   evals = tmp; | ||||
|   // | ||||
|   UTeigenvectors(H, trows, evals, evecs); | ||||
|   //UTSymmEigenvectors(H, trows, evals, evecs); | ||||
|   for(int i = 0; i < evals.size(); ++i) | ||||
|   { | ||||
|     evecs[i] = P * evecs[i]; | ||||
|     normalize(evecs[i]); | ||||
|     evals[i] = evals[i] * Hnorm; | ||||
|   } | ||||
|   // // FIXME this is to test | ||||
|   // Hin.write("evecs3", evecs); | ||||
|   // Hin.write("evals3", evals); | ||||
|   // // check rsd | ||||
|   // for(int i = 0; i < M; i++) { | ||||
|   //   vector<T> Aevec = Hin * evecs[i]; | ||||
|   //   RealD norm2(0.); | ||||
|   //   for(int j = 0; j < M; j++) { | ||||
|   //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); | ||||
|   //   } | ||||
|   // } | ||||
|   return tot_it; | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||
|  | ||||
|   /** | ||||
|   turn a matrix A = | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   into | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   0  x  x  x  x | ||||
|   0  0  x  x  x | ||||
|   0  0  0  x  x | ||||
|   with householder rotations | ||||
|   Slow. | ||||
|   */ | ||||
|   int N ; SizeSquare(A,N); | ||||
|   DenseVector<T > p; Resize(p,N); Fill(p,0); | ||||
|  | ||||
|   for(int k=start;k<N-2;k++){ | ||||
|     //cerr << "hess" << k << std::endl; | ||||
|     DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); | ||||
|     for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column | ||||
|     normalize(ck);    ///Normalization cancels in PHP anyway | ||||
|     T beta; | ||||
|     Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector | ||||
|     Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA | ||||
|     Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H | ||||
|     ///Accumulate eigenvector | ||||
|     Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H | ||||
|   } | ||||
|   /*for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|     A(0,k,l); | ||||
|     } | ||||
|     }*/ | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||
| ///Tridiagonalize a matrix | ||||
|   int N; SizeSquare(A,N); | ||||
|   Hess(A,Q,start); | ||||
|   /*for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|     A(0,l,k); | ||||
|     } | ||||
|     }*/ | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void ForceTridiagonal(DenseMatrix<T> &A){ | ||||
| ///Tridiagonalize a matrix | ||||
|   int N ; SizeSquare(A,N); | ||||
|   for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|       A[l][k]=0; | ||||
|       A[k][l]=0; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   ///Solve a symmetric eigensystem, not necessarily in tridiagonal form | ||||
|   int N; SizeSquare(Ain,N); | ||||
|   DenseMatrix<T > A; A = Ain; | ||||
|   DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q); | ||||
|   Tri(A,Q,0); | ||||
|   int it = my_Wilkinson<T>(A, evals, evecs, small); | ||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||
|   return it; | ||||
| } | ||||
|  | ||||
|  | ||||
| template <class T> | ||||
| int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   return my_Wilkinson(Ain, evals, evecs, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   return my_SymmEigensystem(Ain, evals, evecs, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
| ///Solve a general eigensystem, not necessarily in tridiagonal form | ||||
|   int N = Ain.dim; | ||||
|   DenseMatrix<T > A(N); A = Ain; | ||||
|   DenseMatrix<T > Q(N);Q.Unity(); | ||||
|   Hess(A,Q,0); | ||||
|   int it = QReigensystem<T>(A, evals, evecs, small); | ||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||
|   return it; | ||||
| } | ||||
|  | ||||
| } | ||||
| #endif | ||||
| @@ -1,242 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/Householder.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef HOUSEHOLDER_H | ||||
| #define HOUSEHOLDER_H | ||||
|  | ||||
| #define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
| #define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
| #define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <stdexcept> | ||||
| #include <fstream> | ||||
| #include <complex> | ||||
| #include <algorithm> | ||||
|  | ||||
| namespace Grid { | ||||
| /** Comparison function for finding the max element in a vector **/ | ||||
| template <class T> bool cf(T i, T j) {  | ||||
|   return abs(i) < abs(j);  | ||||
| } | ||||
|  | ||||
| /**  | ||||
| 	Calculate a real Givens angle  | ||||
|  **/ | ||||
| template <class T> inline void Givens_calc(T y, T z, T &c, T &s){ | ||||
|  | ||||
|   RealD mz = (RealD)abs(z); | ||||
|    | ||||
|   if(mz==0.0){ | ||||
|     c = 1; s = 0; | ||||
|   } | ||||
|   if(mz >= (RealD)abs(y)){ | ||||
|     T t = -y/z; | ||||
|     s = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||
|     c = s * t; | ||||
|   } else { | ||||
|     T t = -z/y; | ||||
|     c = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||
|     s = c * t; | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir) | ||||
| { | ||||
|   int q ; SizeSquare(A,q); | ||||
|  | ||||
|   if(dir == 0){ | ||||
|     for(int j=0;j<q;j++){ | ||||
|       T nu = A[i][j]; | ||||
|       T w  = A[k][j]; | ||||
|       A[i][j] = (c*nu + s*w); | ||||
|       A[k][j] = (-s*nu + c*w); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if(dir == 1){ | ||||
|     for(int j=0;j<q;j++){ | ||||
|       T nu = A[j][i]; | ||||
|       T w  = A[j][k]; | ||||
|       A[j][i] = (c*nu - s*w); | ||||
|       A[j][k] = (s*nu + c*w); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	from input = x; | ||||
| 	Compute the complex Householder vector, v, such that | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
|  | ||||
| 	P | x |    | x | k = 0 | ||||
| 	| x |    | 0 |  | ||||
| 	| x | =  | 0 | | ||||
| 	| x |    | 0 | j = 3 | ||||
| 	| x |	   | x | | ||||
|  | ||||
| 	These are the "Unreduced" Householder vectors. | ||||
|  | ||||
|  **/ | ||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta) | ||||
| { | ||||
|   int N ; Size(input,N); | ||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> ); | ||||
|  | ||||
|   if(abs(m) > 0.0){ | ||||
|     T alpha = 0; | ||||
|  | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = input[i]/m; | ||||
|       alpha = alpha + v[i]*conj(v[i]); | ||||
|     } | ||||
|     alpha = sqrt(alpha); | ||||
|     beta = (T)1.0/(alpha*(alpha + abs(v[k]) )); | ||||
|  | ||||
|     if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha; | ||||
|     else                 v[k] = -alpha; | ||||
|   } else{ | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = 0.0; | ||||
|     }  | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	from input = x; | ||||
| 	Compute the complex Householder vector, v, such that | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
|  | ||||
| 	Px = alpha*e_dir | ||||
|  | ||||
| 	These are the "Unreduced" Householder vectors. | ||||
|  | ||||
|  **/ | ||||
|  | ||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta) | ||||
| { | ||||
|   int N = input.size(); | ||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); | ||||
|    | ||||
|   if(abs(m) > 0.0){ | ||||
|     T alpha = 0; | ||||
|  | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = input[i]/m; | ||||
|       alpha = alpha + v[i]*conj(v[i]); | ||||
|     } | ||||
|      | ||||
|     alpha = sqrt(alpha); | ||||
|     beta = 1.0/(alpha*(alpha + abs(v[dir]) )); | ||||
| 	 | ||||
|     if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; | ||||
|     else                  v[dir] = -alpha; | ||||
|   }else{ | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = 0.0; | ||||
|     }  | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	Compute the product PA if trans = 0 | ||||
| 	AP if trans = 1 | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
| 	start at element l of matrix A | ||||
| 	v is of length j - k + 1 of v are nonzero | ||||
|  **/ | ||||
|  | ||||
| template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans) | ||||
| { | ||||
|   int N ; SizeSquare(A,N); | ||||
|  | ||||
|   if(abs(beta) > 0.0){ | ||||
|     for(int p=l; p<N; p++){ | ||||
|       T s = 0; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p]; | ||||
| 	s *= beta; | ||||
| 	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);} | ||||
|       } else { | ||||
| 	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];} | ||||
| 	s *= beta; | ||||
| 	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);} | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	Compute the product PA if trans = 0 | ||||
| 	AP if trans = 1 | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
| 	start at element l of matrix A | ||||
| 	v is of length j - k + 1 of v are nonzero | ||||
| 	A is tridiagonal | ||||
|  **/ | ||||
| template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans) | ||||
| { | ||||
|   if(abs(beta) > 0.0){ | ||||
|  | ||||
|     int N ; SizeSquare(A,N); | ||||
|  | ||||
|     DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);  | ||||
|  | ||||
|     T s; | ||||
|     for(int p=l; p<M; p++){ | ||||
|       s = 0; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i]; | ||||
|       } | ||||
|       s = beta*s; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]); | ||||
|       } | ||||
|     } | ||||
|     for(int p=l; p<M; p++){ | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| } | ||||
| #endif | ||||
| @@ -33,6 +33,8 @@ directory | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient. Dimension zero should be the block direction | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| @@ -40,25 +42,280 @@ template <class Field> | ||||
| class BlockConjugateGradient : public OperatorFunction<Field> { | ||||
|  public: | ||||
|  | ||||
|  | ||||
|   typedef typename Field::scalar_type scomplex; | ||||
|  | ||||
|   const int blockDim = 0; | ||||
|  | ||||
|   int blockDim ; | ||||
|   int Nblock; | ||||
|  | ||||
|   BlockCGtype CGtype; | ||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|    | ||||
|   BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), | ||||
|     MaxIterations(maxit), | ||||
|     ErrorOnNoConverge(err_on_no_conv){}; | ||||
|   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) | ||||
|   {}; | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Thin QR factorisation (google it) | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||
| 		 Eigen::MatrixXcd &C, | ||||
| 		 Eigen::MatrixXcd &Cinv, | ||||
| 		 Field & Q, | ||||
| 		 const Field & R) | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   //Dimensions | ||||
|   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock | ||||
|   // | ||||
|   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) | ||||
|   // | ||||
|   //   Q  C = R => Q = R C^{-1} | ||||
|   // | ||||
|   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  | ||||
|   // | ||||
|   // Set C = L^{dag}, and then Q^dag Q = ident  | ||||
|   // | ||||
|   // Checks: | ||||
|   // Cdag C = Rdag R ; passes. | ||||
|   // QdagQ  = 1      ; passes | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Cholesky from Eigen | ||||
|   // There exists a ldlt that is documented as more stable | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||
|  | ||||
|   C    = L.adjoint(); | ||||
|   Cinv = C.inverse(); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Q = R C^{-1} | ||||
|   // | ||||
|   // Q_j  = R_i Cinv(i,j)  | ||||
|   // | ||||
|   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // FIXME:: make a sliceMulMatrix to avoid zero vector | ||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); | ||||
| } | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Call one of several implementations | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = 0; // First dimension is block dim | ||||
|   if ( CGtype == BlockCGrQ ) { | ||||
|     BlockCGrQsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == BlockCG ) { | ||||
|     BlockCGsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == CGmultiRHS ) { | ||||
|     CGmultiRHSsolve(Linop,Src,Psi); | ||||
|   } else { | ||||
|     assert(0); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| // BlockCGrQ implementation: | ||||
| //-------------------------- | ||||
| // X is guess/Solution | ||||
| // B is RHS | ||||
| // Solve A X_i = B_i    ;        i refers to Nblock index | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = B._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   X.checkerboard = B.checkerboard; | ||||
|   conformable(X, B); | ||||
|  | ||||
|   Field tmp(B); | ||||
|   Field Q(B); | ||||
|   Field D(B); | ||||
|   Field Z(B); | ||||
|   Field AD(B); | ||||
|  | ||||
|   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|  | ||||
|   // Initial residual computation & set up | ||||
|   std::vector<RealD> residuals(Nblock); | ||||
|   std::vector<RealD> ssq(Nblock); | ||||
|  | ||||
|   sliceNorm(ssq,B,Orthog); | ||||
|   RealD sssum=0; | ||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||
|  | ||||
|   sliceNorm(residuals,B,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   sliceNorm(residuals,X,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   /************************************************************************ | ||||
|    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) | ||||
|    ************************************************************************ | ||||
|    * Dimensions: | ||||
|    * | ||||
|    *   X,B==(Nferm x Nblock) | ||||
|    *   A==(Nferm x Nferm) | ||||
|    *   | ||||
|    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site | ||||
|    *  | ||||
|    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|    * for k:  | ||||
|    *   Z  = AD | ||||
|    *   M  = [D^dag Z]^{-1} | ||||
|    *   X  = X + D MC | ||||
|    *   QS = Q - ZM | ||||
|    *   D  = Q + D S^dag | ||||
|    *   C  = S C | ||||
|    */ | ||||
|   /////////////////////////////////////// | ||||
|   // Initial block: initial search dir is guess | ||||
|   /////////////////////////////////////// | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; | ||||
|  | ||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|  | ||||
|   Linop.HermOp(X, AD); | ||||
|   tmp = B - AD;   | ||||
|   //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; | ||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); | ||||
|   //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; | ||||
|   //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl; | ||||
|   //std::cout << GridLogMessage << " m_C " << m_C<<std::endl; | ||||
|   //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl; | ||||
|   D=Q; | ||||
|  | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; | ||||
|  | ||||
|   /////////////////////////////////////// | ||||
|   // Timers | ||||
|   /////////////////////////////////////// | ||||
|   GridStopWatch sliceInnerTimer; | ||||
|   GridStopWatch sliceMaddTimer; | ||||
|   GridStopWatch QRTimer; | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch SolverTimer; | ||||
|   SolverTimer.Start(); | ||||
|  | ||||
|   int k; | ||||
|   for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|     //3. Z  = AD | ||||
|     MatrixTimer.Start(); | ||||
|     Linop.HermOp(D, Z);       | ||||
|     MatrixTimer.Stop(); | ||||
|     //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl; | ||||
|  | ||||
|     //4. M  = [D^dag Z]^{-1} | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_M       = m_DZ.inverse(); | ||||
|     //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl; | ||||
|      | ||||
|     //5. X  = X + D MC | ||||
|     m_tmp     = m_M * m_C; | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //6. QS = Q - ZM | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); | ||||
|     sliceMaddTimer.Stop(); | ||||
|     QRTimer.Start(); | ||||
|     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); | ||||
|     QRTimer.Stop(); | ||||
|      | ||||
|     //7. D  = Q + D S^dag | ||||
|     m_tmp = m_S.adjoint(); | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //8. C  = S C | ||||
|     m_C = m_S*m_C; | ||||
|      | ||||
|     /********************* | ||||
|      * convergence monitor | ||||
|      ********************* | ||||
|      */ | ||||
|     m_rr = m_C.adjoint() * m_C; | ||||
|  | ||||
|     RealD max_resid=0; | ||||
|     RealD rrsum=0; | ||||
|     RealD rr; | ||||
|  | ||||
|     for(int b=0;b<Nblock;b++) { | ||||
|       rrsum+=real(m_rr(b,b)); | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||
| 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; | ||||
|  | ||||
|     if ( max_resid < Tolerance*Tolerance ) {  | ||||
|  | ||||
|       SolverTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; | ||||
|  | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(X, AD); | ||||
|       AD = AD-B; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; | ||||
| 	     | ||||
|       IterationsToComplete = k; | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; | ||||
|  | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = Src._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
| @@ -162,8 +419,9 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|      ********************* | ||||
|      */ | ||||
|     RealD max_resid=0; | ||||
|     RealD rr; | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       RealD rr = real(m_rr(b,b))/ssq[b]; | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|      | ||||
| @@ -173,13 +431,14 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(Psi, AP); | ||||
|       AP = AP-Src; | ||||
|       std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
| @@ -197,35 +456,13 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| }; | ||||
|  | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||
| // Use this for spread out across nodes | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| template <class Field> | ||||
| class MultiRHSConjugateGradient : public OperatorFunction<Field> { | ||||
|  public: | ||||
|  | ||||
|   typedef typename Field::scalar_type scomplex; | ||||
|  | ||||
|   const int blockDim = 0; | ||||
|  | ||||
|   int Nblock; | ||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|    | ||||
|    MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), | ||||
|     MaxIterations(maxit), | ||||
|     ErrorOnNoConverge(err_on_no_conv){}; | ||||
|  | ||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = 0; // First dimension is block dim | ||||
|   int Orthog = blockDim; // First dimension is block dim | ||||
|   Nblock = Src._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
| @@ -285,12 +522,10 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     // Alpha | ||||
|     //    sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog); | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductVector(v_pAp,P,AP,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       //      std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl; | ||||
|       v_alpha[b] = v_rr[b]/real(v_pAp[b]); | ||||
|     } | ||||
|  | ||||
| @@ -332,7 +567,7 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|  | ||||
|       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
| @@ -358,9 +593,8 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -1,81 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/EigenSort.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_EIGENSORT_H | ||||
| #define GRID_EIGENSORT_H | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Eigen sorter to begin with | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|  | ||||
| template<class Field> | ||||
| class SortEigen { | ||||
|  private: | ||||
|    | ||||
| //hacking for testing for now | ||||
|  private: | ||||
|   static bool less_lmd(RealD left,RealD right){ | ||||
|     return left > right; | ||||
|   }   | ||||
|   static bool less_pair(std::pair<RealD,Field const*>& left, | ||||
|                         std::pair<RealD,Field const*>& right){ | ||||
|     return left.first > (right.first); | ||||
|   }   | ||||
|    | ||||
|    | ||||
|  public: | ||||
|  | ||||
|   void push(DenseVector<RealD>& lmd, | ||||
|             DenseVector<Field>& evec,int N) { | ||||
|     DenseVector<Field> cpy(lmd.size(),evec[0]._grid); | ||||
|     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; | ||||
|      | ||||
|     DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());     | ||||
|     for(int i=0;i<lmd.size();++i) | ||||
|       emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); | ||||
|  | ||||
|     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); | ||||
|  | ||||
|     typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); | ||||
|     for(int i=0;i<N;++i){ | ||||
|       lmd[i]=it->first; | ||||
|       evec[i]=*(it->second); | ||||
|       ++it; | ||||
|     } | ||||
|   } | ||||
|   void push(DenseVector<RealD>& lmd,int N) { | ||||
|     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); | ||||
|   } | ||||
|   bool saturated(RealD lmd, RealD thrs) { | ||||
|     return fabs(lmd) > fabs(thrs); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -11,7 +11,7 @@ int PointerCache::victim; | ||||
|  | ||||
| void *PointerCache::Insert(void *ptr,size_t bytes) { | ||||
|  | ||||
|   if (bytes < 4096 ) return NULL; | ||||
|   if (bytes < 4096 ) return ptr; | ||||
|  | ||||
| #ifdef GRID_OMP | ||||
|   assert(omp_in_parallel()==0); | ||||
|   | ||||
| @@ -92,18 +92,34 @@ public: | ||||
|     size_type bytes = __n*sizeof(_Tp); | ||||
|  | ||||
|     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); | ||||
|      | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); | ||||
| #else | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); | ||||
| #endif | ||||
|     //    if ( ptr != NULL )  | ||||
|     //      std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl; | ||||
|  | ||||
|     ////////////////// | ||||
|     // Hack 2MB align; could make option probably doesn't need configurability | ||||
|     ////////////////// | ||||
| //define GRID_ALLOC_ALIGN (128) | ||||
| #define GRID_ALLOC_ALIGN (2*1024*1024) | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); | ||||
| #else | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); | ||||
| #endif | ||||
|     //    std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl; | ||||
|     // First touch optimise in threaded loop | ||||
|     uint8_t *cp = (uint8_t *)ptr; | ||||
| #ifdef GRID_OMP | ||||
| #pragma omp parallel for | ||||
| #endif | ||||
|     for(size_type n=0;n<bytes;n+=4096){ | ||||
|       cp[n]=0; | ||||
|     } | ||||
|     return ptr; | ||||
|   } | ||||
|  | ||||
|   void deallocate(pointer __p, size_type __n) {  | ||||
|     size_type bytes = __n * sizeof(_Tp); | ||||
|  | ||||
|     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); | ||||
|  | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
| @@ -182,10 +198,19 @@ public: | ||||
|   pointer allocate(size_type __n, const void* _p= 0)  | ||||
|   { | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); | ||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); | ||||
| #else | ||||
|     _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); | ||||
|     _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); | ||||
| #endif | ||||
|     size_type bytes = __n*sizeof(_Tp); | ||||
|     uint8_t *cp = (uint8_t *)ptr; | ||||
|     if ( ptr ) {  | ||||
|     // One touch per 4k page, static OMP loop to catch same loop order | ||||
| #pragma omp parallel for schedule(static) | ||||
|       for(size_type n=0;n<bytes;n+=4096){ | ||||
| 	cp[n]=0; | ||||
|       } | ||||
|     } | ||||
|     return ptr; | ||||
|   } | ||||
|   void deallocate(pointer __p, size_type) {  | ||||
|   | ||||
| @@ -50,7 +50,6 @@ public: | ||||
|  | ||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||
|  | ||||
|  | ||||
|     // Physics Grid information. | ||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||
|     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal | ||||
| @@ -63,13 +62,12 @@ public: | ||||
|     int _isites; | ||||
|     int _fsites;                  // _isites*_osites = product(dimensions). | ||||
|     int _gsites; | ||||
|     std::vector<int> _slice_block;   // subslice information | ||||
|     std::vector<int> _slice_block;// subslice information | ||||
|     std::vector<int> _slice_stride; | ||||
|     std::vector<int> _slice_nblock; | ||||
|  | ||||
|     // Might need these at some point | ||||
|     //    std::vector<int> _lstart;     // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] | ||||
|     //    std::vector<int> _lend;       // local end of array in gcoors    _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||
|     std::vector<int> _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] | ||||
|     std::vector<int> _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||
|  | ||||
| public: | ||||
|  | ||||
| @@ -176,6 +174,7 @@ public: | ||||
|     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||
|     inline int Nd    (void) const { return _ndimension;}; | ||||
|  | ||||
|     inline const std::vector<int> LocalStarts(void)             { return _lstart;    }; | ||||
|     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; | ||||
|     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; | ||||
|     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; | ||||
| @@ -186,17 +185,18 @@ public: | ||||
|     //////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     void show_decomposition(){ | ||||
|       std::cout << GridLogMessage << "Full Dimensions    : " << _fdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "Global Dimensions  : " << _gdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "Local Dimensions   : " << _ldimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "Outer strides      : " << _ostride << std::endl; | ||||
|       std::cout << GridLogMessage << "Inner strides      : " << _istride << std::endl; | ||||
|       std::cout << GridLogMessage << "iSites             : " << _isites << std::endl; | ||||
|       std::cout << GridLogMessage << "oSites             : " << _osites << std::endl; | ||||
|       std::cout << GridLogMessage << "lSites             : " << lSites() << std::endl;         | ||||
|       std::cout << GridLogMessage << "gSites             : " << gSites() << std::endl; | ||||
|       std::cout << GridLogMessage << "Nd                 : " << _ndimension << std::endl;              | ||||
|       std::cout << GridLogMessage << "\tFull Dimensions    : " << _fdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tSIMD layout        : " << _simd_layout << std::endl; | ||||
|       std::cout << GridLogMessage << "\tGlobal Dimensions  : " << _gdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tLocal Dimensions   : " << _ldimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tOuter strides      : " << _ostride << std::endl; | ||||
|       std::cout << GridLogMessage << "\tInner strides      : " << _istride << std::endl; | ||||
|       std::cout << GridLogMessage << "\tiSites             : " << _isites << std::endl; | ||||
|       std::cout << GridLogMessage << "\toSites             : " << _osites << std::endl; | ||||
|       std::cout << GridLogMessage << "\tlSites             : " << lSites() << std::endl;         | ||||
|       std::cout << GridLogMessage << "\tgSites             : " << gSites() << std::endl; | ||||
|       std::cout << GridLogMessage << "\tNd                 : " << _ndimension << std::endl;              | ||||
|     }  | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -62,73 +62,81 @@ public: | ||||
|       return shift; | ||||
|     } | ||||
|     GridCartesian(const std::vector<int> &dimensions, | ||||
| 		  const std::vector<int> &simd_layout, | ||||
| 		  const std::vector<int> &processor_grid | ||||
| 		  ) : GridBase(processor_grid) | ||||
|                   const std::vector<int> &simd_layout, | ||||
|                   const std::vector<int> &processor_grid) : GridBase(processor_grid) | ||||
|     { | ||||
|         /////////////////////// | ||||
|         // Grid information | ||||
|         /////////////////////// | ||||
|         _ndimension = dimensions.size(); | ||||
|              | ||||
|         _fdimensions.resize(_ndimension); | ||||
|         _gdimensions.resize(_ndimension); | ||||
|         _ldimensions.resize(_ndimension); | ||||
|         _rdimensions.resize(_ndimension); | ||||
|         _simd_layout.resize(_ndimension); | ||||
|              | ||||
|         _ostride.resize(_ndimension); | ||||
|         _istride.resize(_ndimension); | ||||
|              | ||||
|         _fsites = _gsites = _osites = _isites = 1; | ||||
|       /////////////////////// | ||||
|       // Grid information | ||||
|       /////////////////////// | ||||
|       _ndimension = dimensions.size(); | ||||
|  | ||||
|         for(int d=0;d<_ndimension;d++){ | ||||
| 	  _fdimensions[d] = dimensions[d]; // Global dimensions | ||||
| 	  _gdimensions[d] = _fdimensions[d]; // Global dimensions | ||||
| 	  _simd_layout[d] = simd_layout[d]; | ||||
| 	  _fsites = _fsites * _fdimensions[d]; | ||||
| 	  _gsites = _gsites * _gdimensions[d]; | ||||
|       _fdimensions.resize(_ndimension); | ||||
|       _gdimensions.resize(_ndimension); | ||||
|       _ldimensions.resize(_ndimension); | ||||
|       _rdimensions.resize(_ndimension); | ||||
|       _simd_layout.resize(_ndimension); | ||||
|       _lstart.resize(_ndimension); | ||||
|       _lend.resize(_ndimension); | ||||
|  | ||||
| 	  //FIXME check for exact division | ||||
|       _ostride.resize(_ndimension); | ||||
|       _istride.resize(_ndimension); | ||||
|  | ||||
| 	  // Use a reduced simd grid | ||||
| 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions | ||||
| 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition | ||||
| 	  _osites *= _rdimensions[d]; | ||||
| 	  _isites *= _simd_layout[d]; | ||||
|                  | ||||
| 	  // Addressing support | ||||
| 	  if ( d==0 ) { | ||||
| 	    _ostride[d] = 1; | ||||
| 	    _istride[d] = 1; | ||||
| 	  } else { | ||||
| 	    _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||
| 	    _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||
| 	  } | ||||
|       _fsites = _gsites = _osites = _isites = 1; | ||||
|  | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         _fdimensions[d] = dimensions[d];   // Global dimensions | ||||
|         _gdimensions[d] = _fdimensions[d]; // Global dimensions | ||||
|         _simd_layout[d] = simd_layout[d]; | ||||
|         _fsites = _fsites * _fdimensions[d]; | ||||
|         _gsites = _gsites * _gdimensions[d]; | ||||
|  | ||||
|         // Use a reduced simd grid | ||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions | ||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | ||||
|  | ||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition | ||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | ||||
|  | ||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; | ||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; | ||||
|         _osites *= _rdimensions[d]; | ||||
|         _isites *= _simd_layout[d]; | ||||
|  | ||||
|         // Addressing support | ||||
|         if (d == 0) | ||||
|         { | ||||
|           _ostride[d] = 1; | ||||
|           _istride[d] = 1; | ||||
|         } | ||||
|          | ||||
|         /////////////////////// | ||||
|         // subplane information | ||||
|         /////////////////////// | ||||
|         _slice_block.resize(_ndimension); | ||||
|         _slice_stride.resize(_ndimension); | ||||
|         _slice_nblock.resize(_ndimension); | ||||
|              | ||||
|         int block =1; | ||||
|         int nblock=1; | ||||
|         for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||
|              | ||||
|         for(int d=0;d<_ndimension;d++){ | ||||
|             nblock/=_rdimensions[d]; | ||||
|             _slice_block[d] =block; | ||||
|             _slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||
|             _slice_nblock[d]=nblock; | ||||
|             block = block*_rdimensions[d]; | ||||
|         else | ||||
|         { | ||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; | ||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       /////////////////////// | ||||
|       // subplane information | ||||
|       /////////////////////// | ||||
|       _slice_block.resize(_ndimension); | ||||
|       _slice_stride.resize(_ndimension); | ||||
|       _slice_nblock.resize(_ndimension); | ||||
|  | ||||
|       int block = 1; | ||||
|       int nblock = 1; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|         nblock *= _rdimensions[d]; | ||||
|  | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         nblock /= _rdimensions[d]; | ||||
|         _slice_block[d] = block; | ||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | ||||
|         _slice_nblock[d] = nblock; | ||||
|         block = block * _rdimensions[d]; | ||||
|       } | ||||
|     }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -131,132 +131,155 @@ public: | ||||
|       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); | ||||
|     } | ||||
|     void Init(const std::vector<int> &dimensions, | ||||
| 	      const std::vector<int> &simd_layout, | ||||
| 	      const std::vector<int> &processor_grid, | ||||
| 	      const std::vector<int> &checker_dim_mask, | ||||
| 	      int checker_dim) | ||||
|               const std::vector<int> &simd_layout, | ||||
|               const std::vector<int> &processor_grid, | ||||
|               const std::vector<int> &checker_dim_mask, | ||||
|               int checker_dim) | ||||
|     { | ||||
|     /////////////////////// | ||||
|     // Grid information | ||||
|     /////////////////////// | ||||
|       /////////////////////// | ||||
|       // Grid information | ||||
|       /////////////////////// | ||||
|       _checker_dim = checker_dim; | ||||
|       assert(checker_dim_mask[checker_dim]==1); | ||||
|       assert(checker_dim_mask[checker_dim] == 1); | ||||
|       _ndimension = dimensions.size(); | ||||
|       assert(checker_dim_mask.size()==_ndimension); | ||||
|       assert(processor_grid.size()==_ndimension); | ||||
|       assert(simd_layout.size()==_ndimension); | ||||
|        | ||||
|       assert(checker_dim_mask.size() == _ndimension); | ||||
|       assert(processor_grid.size() == _ndimension); | ||||
|       assert(simd_layout.size() == _ndimension); | ||||
|  | ||||
|       _fdimensions.resize(_ndimension); | ||||
|       _gdimensions.resize(_ndimension); | ||||
|       _ldimensions.resize(_ndimension); | ||||
|       _rdimensions.resize(_ndimension); | ||||
|       _simd_layout.resize(_ndimension); | ||||
|        | ||||
|       _lstart.resize(_ndimension); | ||||
|       _lend.resize(_ndimension); | ||||
|  | ||||
|       _ostride.resize(_ndimension); | ||||
|       _istride.resize(_ndimension); | ||||
|        | ||||
|  | ||||
|       _fsites = _gsites = _osites = _isites = 1; | ||||
| 	 | ||||
|       _checker_dim_mask=checker_dim_mask; | ||||
|  | ||||
|       for(int d=0;d<_ndimension;d++){ | ||||
| 	_fdimensions[d] = dimensions[d]; | ||||
| 	_gdimensions[d] = _fdimensions[d]; | ||||
| 	_fsites = _fsites * _fdimensions[d]; | ||||
| 	_gsites = _gsites * _gdimensions[d]; | ||||
|          | ||||
| 	if (d==_checker_dim) { | ||||
| 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard | ||||
| 	} | ||||
| 	_ldimensions[d] = _gdimensions[d]/_processors[d]; | ||||
|       _checker_dim_mask = checker_dim_mask; | ||||
|  | ||||
| 	// Use a reduced simd grid | ||||
| 	_simd_layout[d] = simd_layout[d]; | ||||
| 	_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; | ||||
| 	assert(_rdimensions[d]>0); | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         _fdimensions[d] = dimensions[d]; | ||||
|         _gdimensions[d] = _fdimensions[d]; | ||||
|         _fsites = _fsites * _fdimensions[d]; | ||||
|         _gsites = _gsites * _gdimensions[d]; | ||||
|  | ||||
| 	// all elements of a simd vector must have same checkerboard. | ||||
| 	// If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||
| 	if ( _simd_layout[d]>1 ) { | ||||
| 	  if ( checker_dim_mask[d] ) {  | ||||
| 	    assert( (_rdimensions[d]&0x1) == 0 ); | ||||
| 	  } | ||||
| 	} | ||||
|         if (d == _checker_dim) | ||||
|         { | ||||
|           assert((_gdimensions[d] & 0x1) == 0); | ||||
|           _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard | ||||
|         } | ||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; | ||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | ||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; | ||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; | ||||
|  | ||||
| 	_osites *= _rdimensions[d]; | ||||
| 	_isites *= _simd_layout[d]; | ||||
|          | ||||
| 	// Addressing support | ||||
| 	if ( d==0 ) { | ||||
| 	  _ostride[d] = 1; | ||||
| 	  _istride[d] = 1; | ||||
| 	} else { | ||||
| 	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||
| 	  _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||
| 	} | ||||
|         // Use a reduced simd grid | ||||
|         _simd_layout[d] = simd_layout[d]; | ||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer | ||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | ||||
|         assert(_rdimensions[d] > 0); | ||||
|  | ||||
|         // all elements of a simd vector must have same checkerboard. | ||||
|         // If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||
|         if (_simd_layout[d] > 1) | ||||
|         { | ||||
|           if (checker_dim_mask[d]) | ||||
|           { | ||||
|             assert((_rdimensions[d] & 0x1) == 0); | ||||
|           } | ||||
|         } | ||||
|  | ||||
|         _osites *= _rdimensions[d]; | ||||
|         _isites *= _simd_layout[d]; | ||||
|  | ||||
|         // Addressing support | ||||
|         if (d == 0) | ||||
|         { | ||||
|           _ostride[d] = 1; | ||||
|           _istride[d] = 1; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; | ||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; | ||||
|         } | ||||
|       } | ||||
|              | ||||
|  | ||||
|       //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|       // subplane information | ||||
|       //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|       _slice_block.resize(_ndimension); | ||||
|       _slice_stride.resize(_ndimension); | ||||
|       _slice_nblock.resize(_ndimension); | ||||
|          | ||||
|       int block =1; | ||||
|       int nblock=1; | ||||
|       for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||
|        | ||||
|       for(int d=0;d<_ndimension;d++){ | ||||
| 	nblock/=_rdimensions[d]; | ||||
| 	_slice_block[d] =block; | ||||
| 	_slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||
| 	_slice_nblock[d]=nblock; | ||||
| 	block = block*_rdimensions[d]; | ||||
|  | ||||
|       int block = 1; | ||||
|       int nblock = 1; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|         nblock *= _rdimensions[d]; | ||||
|  | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         nblock /= _rdimensions[d]; | ||||
|         _slice_block[d] = block; | ||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | ||||
|         _slice_nblock[d] = nblock; | ||||
|         block = block * _rdimensions[d]; | ||||
|       } | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // Create a checkerboard lookup table | ||||
|       //////////////////////////////////////////////// | ||||
|       int rvol = 1; | ||||
|       for(int d=0;d<_ndimension;d++){ | ||||
| 	rvol=rvol * _rdimensions[d]; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         rvol = rvol * _rdimensions[d]; | ||||
|       } | ||||
|       _checker_board.resize(rvol); | ||||
|       for(int osite=0;osite<_osites;osite++){ | ||||
| 	_checker_board[osite] = CheckerBoardFromOindex (osite); | ||||
|       for (int osite = 0; osite < _osites; osite++) | ||||
|       { | ||||
|         _checker_board[osite] = CheckerBoardFromOindex(osite); | ||||
|       } | ||||
|        | ||||
|     }; | ||||
| protected: | ||||
|  | ||||
|   protected: | ||||
|     virtual int oIndex(std::vector<int> &coor) | ||||
|     { | ||||
|       int idx=0; | ||||
|       for(int d=0;d<_ndimension;d++) { | ||||
| 	if( d==_checker_dim ) { | ||||
| 	  idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]); | ||||
| 	} else { | ||||
| 	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||
| 	} | ||||
|       int idx = 0; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         if (d == _checker_dim) | ||||
|         { | ||||
|           idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           idx += _ostride[d] * (coor[d] % _rdimensions[d]); | ||||
|         } | ||||
|       } | ||||
|       return idx; | ||||
|     }; | ||||
|          | ||||
|  | ||||
|     virtual int iIndex(std::vector<int> &lcoor) | ||||
|     { | ||||
|         int idx=0; | ||||
|         for(int d=0;d<_ndimension;d++) { | ||||
| 	  if( d==_checker_dim ) { | ||||
| 	    idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); | ||||
| 	  } else {  | ||||
| 	    idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); | ||||
| 	  } | ||||
| 	} | ||||
|         return idx; | ||||
|       int idx = 0; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         if (d == _checker_dim) | ||||
|         { | ||||
|           idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           idx += _istride[d] * (lcoor[d] / _rdimensions[d]); | ||||
|         } | ||||
|       } | ||||
|       return idx; | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -26,6 +26,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/GridCore.h> | ||||
| #include <fcntl.h> | ||||
| #include <unistd.h> | ||||
| #include <limits.h> | ||||
| #include <sys/mman.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| @@ -33,8 +37,11 @@ namespace Grid { | ||||
| // Info that is setup once and indept of cartesian layout | ||||
| /////////////////////////////////////////////////////////////// | ||||
| void *              CartesianCommunicator::ShmCommBuf; | ||||
| uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024;  | ||||
| CartesianCommunicator::CommunicatorPolicy_t  CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | ||||
| uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 1024LL*1024LL*1024LL;  | ||||
| CartesianCommunicator::CommunicatorPolicy_t   | ||||
| CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | ||||
| int CartesianCommunicator::nCommThreads = -1; | ||||
| int CartesianCommunicator::Hugepages = 0; | ||||
|  | ||||
| ///////////////////////////////// | ||||
| // Alloc, free shmem region | ||||
| @@ -60,6 +67,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { | ||||
| ///////////////////////////////// | ||||
| // Grid information queries | ||||
| ///////////////////////////////// | ||||
| int                      CartesianCommunicator::Dimensions(void)         { return _ndimension; }; | ||||
| int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | ||||
| int                      CartesianCommunicator::BossRank(void)          { return 0; }; | ||||
| int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | ||||
| @@ -88,24 +96,43 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | ||||
|   GlobalSumVector((double *)c,2*N); | ||||
| } | ||||
|  | ||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | ||||
| #if !defined( GRID_COMMS_MPI3)  | ||||
|  | ||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||
|  | ||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 						       void *xmit, | ||||
| 						       int xmit_to_rank, | ||||
| 						       void *recv, | ||||
| 						       int recv_from_rank, | ||||
| 						       int bytes) | ||||
| int                      CartesianCommunicator::RankCount(void)    { return ProcessorCount();}; | ||||
| #endif | ||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) | ||||
| double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, | ||||
| 						     int xmit_to_rank, | ||||
| 						     void *recv, | ||||
| 						     int recv_from_rank, | ||||
| 						     int bytes, int dir) | ||||
| { | ||||
|   std::vector<CommsRequest_t> list; | ||||
|   // Discard the "dir" | ||||
|   SendToRecvFromBegin   (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||
|   SendToRecvFromComplete(list); | ||||
|   return 2.0*bytes; | ||||
| } | ||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 							 void *xmit, | ||||
| 							 int xmit_to_rank, | ||||
| 							 void *recv, | ||||
| 							 int recv_from_rank, | ||||
| 							 int bytes, int dir) | ||||
| { | ||||
|   // Discard the "dir" | ||||
|   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||
|   return 2.0*bytes; | ||||
| } | ||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | ||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | ||||
| { | ||||
|   SendToRecvFromComplete(waitall); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #if !defined( GRID_COMMS_MPI3)  | ||||
|  | ||||
| void CartesianCommunicator::StencilBarrier(void){}; | ||||
|  | ||||
| commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector; | ||||
| @@ -119,8 +146,25 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { | ||||
|   return NULL; | ||||
| } | ||||
| void CartesianCommunicator::ShmInitGeneric(void){ | ||||
| #if 1 | ||||
|  | ||||
|   int mmap_flag = MAP_SHARED | MAP_ANONYMOUS; | ||||
| #ifdef MAP_HUGETLB | ||||
|   if ( Hugepages ) mmap_flag |= MAP_HUGETLB; | ||||
| #endif | ||||
|   ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);  | ||||
|   if (ShmCommBuf == (void *)MAP_FAILED) { | ||||
|     perror("mmap failed "); | ||||
|     exit(EXIT_FAILURE);   | ||||
|   } | ||||
| #ifdef MADV_HUGEPAGE | ||||
|   if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); | ||||
| #endif | ||||
| #else  | ||||
|   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); | ||||
|   ShmCommBuf=(void *)&ShmBufStorageVector[0]; | ||||
| #endif | ||||
|   bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); | ||||
| } | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -38,7 +38,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifdef GRID_COMMS_MPI3 | ||||
| #include <mpi.h> | ||||
| #endif | ||||
| #ifdef GRID_COMMS_MPI3L | ||||
| #ifdef GRID_COMMS_MPIT | ||||
| #include <mpi.h> | ||||
| #endif | ||||
| #ifdef GRID_COMMS_SHMEM | ||||
| @@ -50,12 +50,24 @@ namespace Grid { | ||||
| class CartesianCommunicator { | ||||
|   public:     | ||||
|  | ||||
|   // 65536 ranks per node adequate for now | ||||
|  | ||||
|   //////////////////////////////////////////// | ||||
|   // Isend/Irecv/Wait, or Sendrecv blocking | ||||
|   //////////////////////////////////////////// | ||||
|   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; | ||||
|   static CommunicatorPolicy_t CommunicatorPolicy; | ||||
|   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } | ||||
|  | ||||
|   /////////////////////////////////////////// | ||||
|   // Up to 65536 ranks per node adequate for now | ||||
|   // 128MB shared memory for comms enought for 48^4 local vol comms | ||||
|   // Give external control (command line override?) of this | ||||
|  | ||||
|   static const int      MAXLOG2RANKSPERNODE = 16;             | ||||
|   static uint64_t MAX_MPI_SHM_BYTES; | ||||
|   /////////////////////////////////////////// | ||||
|   static const int MAXLOG2RANKSPERNODE = 16;             | ||||
|   static uint64_t  MAX_MPI_SHM_BYTES; | ||||
|   static int       nCommThreads; | ||||
|   // use explicit huge pages | ||||
|   static int       Hugepages; | ||||
|  | ||||
|   // Communicator should know nothing of the physics grid, only processor grid. | ||||
|   int              _Nprocessors;     // How many in all | ||||
| @@ -64,14 +76,18 @@ class CartesianCommunicator { | ||||
|   std::vector<int> _processor_coor;  // linear processor coordinate | ||||
|   unsigned long _ndimension; | ||||
|  | ||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L) | ||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) | ||||
|   static MPI_Comm communicator_world; | ||||
|          MPI_Comm communicator; | ||||
|  | ||||
|   MPI_Comm              communicator; | ||||
|   std::vector<MPI_Comm> communicator_halo; | ||||
|  | ||||
|   typedef MPI_Request CommsRequest_t; | ||||
| #else  | ||||
|   typedef int CommsRequest_t; | ||||
| #endif | ||||
|  | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////// | ||||
|   // Helper functionality for SHM Windows common to all other impls | ||||
|   //////////////////////////////////////////////////////////////////// | ||||
| @@ -117,11 +133,7 @@ class CartesianCommunicator { | ||||
|   ///////////////////////////////// | ||||
|   static void * ShmCommBuf; | ||||
|  | ||||
|   // Isend/Irecv/Wait, or Sendrecv blocking | ||||
|   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; | ||||
|   static CommunicatorPolicy_t CommunicatorPolicy; | ||||
|   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } | ||||
|  | ||||
|    | ||||
|   size_t heap_top; | ||||
|   size_t heap_bytes; | ||||
|  | ||||
| @@ -148,6 +160,7 @@ class CartesianCommunicator { | ||||
|   int  RankFromProcessorCoor(std::vector<int> &coor); | ||||
|   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); | ||||
|    | ||||
|   int                      Dimensions(void)        ; | ||||
|   int                      IsBoss(void)            ; | ||||
|   int                      BossRank(void)          ; | ||||
|   int                      ThisRank(void)          ; | ||||
| @@ -155,6 +168,7 @@ class CartesianCommunicator { | ||||
|   const std::vector<int> & ProcessorGrid(void)     ; | ||||
|   int                      ProcessorCount(void)    ; | ||||
|   int                      NodeCount(void)    ; | ||||
|   int                      RankCount(void)    ; | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||
| @@ -175,6 +189,8 @@ class CartesianCommunicator { | ||||
|   void GlobalSumVector(ComplexF *c,int N); | ||||
|   void GlobalSum(ComplexD &c); | ||||
|   void GlobalSumVector(ComplexD *c,int N); | ||||
|   void GlobalXOR(uint32_t &); | ||||
|   void GlobalXOR(uint64_t &); | ||||
|    | ||||
|   template<class obj> void GlobalSum(obj &o){ | ||||
|     typedef typename obj::scalar_type scalar_type; | ||||
| @@ -207,14 +223,21 @@ class CartesianCommunicator { | ||||
|    | ||||
|   void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||
|  | ||||
|   double StencilSendToRecvFrom(void *xmit, | ||||
| 			       int xmit_to_rank, | ||||
| 			       void *recv, | ||||
| 			       int recv_from_rank, | ||||
| 			       int bytes,int dir); | ||||
|  | ||||
|   double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 				  void *xmit, | ||||
| 				  int xmit_to_rank, | ||||
| 				  void *recv, | ||||
| 				  int recv_from_rank, | ||||
| 				  int bytes); | ||||
| 				    void *xmit, | ||||
| 				    int xmit_to_rank, | ||||
| 				    void *recv, | ||||
| 				    int recv_from_rank, | ||||
| 				    int bytes,int dir); | ||||
|    | ||||
|   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||
|    | ||||
|   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i); | ||||
|   void StencilBarrier(void); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -83,6 +83,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(float &f){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
|   | ||||
| @@ -37,11 +37,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <sys/ipc.h> | ||||
| #include <sys/shm.h> | ||||
| #include <sys/mman.h> | ||||
| //#include <zlib.h> | ||||
| #ifndef SHM_HUGETLB | ||||
| #define SHM_HUGETLB 04000 | ||||
| #include <zlib.h> | ||||
| #ifdef HAVE_NUMAIF_H | ||||
| #include <numaif.h> | ||||
| #endif | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -65,6 +66,7 @@ std::vector<int> CartesianCommunicator::MyGroup; | ||||
| std::vector<void *> CartesianCommunicator::ShmCommBufs; | ||||
|  | ||||
| int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | ||||
| int CartesianCommunicator::RankCount(void)    { return WorldSize;}; | ||||
|  | ||||
|  | ||||
| #undef FORCE_COMMS | ||||
| @@ -196,7 +198,46 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|   ShmCommBuf = 0; | ||||
|   ShmCommBufs.resize(ShmSize); | ||||
|  | ||||
| #if 1 | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Hugetlbf and others map filesystems as mappable huge pages | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef GRID_MPI3_SHMMMAP | ||||
|   char shm_name [NAME_MAX]; | ||||
|   for(int r=0;r<ShmSize;r++){ | ||||
|      | ||||
|     size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; | ||||
|     sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",GroupRank,r); | ||||
|     //sprintf(shm_name,"/var/lib/hugetlbfs/group/wheel/pagesize-2MB/" "Grid_mpi3_shm_%d_%d",GroupRank,r); | ||||
|     //    printf("Opening file %s \n",shm_name); | ||||
|     int fd=open(shm_name,O_RDWR|O_CREAT,0666); | ||||
|     if ( fd == -1) {  | ||||
|       printf("open %s failed\n",shm_name); | ||||
|       perror("open hugetlbfs"); | ||||
|       exit(0); | ||||
|     } | ||||
|     int mmap_flag = MAP_SHARED ; | ||||
| #ifdef MAP_POPULATE     | ||||
|     mmap_flag|=MAP_POPULATE; | ||||
| #endif | ||||
| #ifdef MAP_HUGETLB | ||||
|     if ( Hugepages ) mmap_flag |= MAP_HUGETLB; | ||||
| #endif | ||||
|     void *ptr = (void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);  | ||||
|     if ( ptr == (void *)MAP_FAILED ) {     | ||||
|       printf("mmap %s failed\n",shm_name); | ||||
|       perror("failed mmap");      assert(0);     | ||||
|     } | ||||
|     assert(((uint64_t)ptr&0x3F)==0); | ||||
|     ShmCommBufs[r] =ptr; | ||||
|      | ||||
|   } | ||||
| #endif | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case | ||||
|   // tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for  | ||||
|   // the posix shm virtual file system | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef GRID_MPI3_SHMOPEN | ||||
|   char shm_name [NAME_MAX]; | ||||
|   if ( ShmRank == 0 ) { | ||||
|     for(int r=0;r<ShmSize;r++){ | ||||
| @@ -209,11 +250,39 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|       int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); | ||||
|       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } | ||||
|       ftruncate(fd, size); | ||||
|        | ||||
|       int mmap_flag = MAP_SHARED; | ||||
| #ifdef MAP_POPULATE  | ||||
|       mmap_flag |= MAP_POPULATE; | ||||
| #endif | ||||
| #ifdef MAP_HUGETLB | ||||
|       if (Hugepages) mmap_flag |= MAP_HUGETLB; | ||||
| #endif | ||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); | ||||
|  | ||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||||
|       if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    } | ||||
|       if ( ptr == (void * )MAP_FAILED ) {       perror("failed mmap");      assert(0);    } | ||||
|       assert(((uint64_t)ptr&0x3F)==0); | ||||
|       ShmCommBufs[r] =ptr; | ||||
|  | ||||
| // Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h | ||||
| #if 0 | ||||
| //#ifdef HAVE_NUMAIF_H | ||||
| 	int status; | ||||
| 	int flags=MPOL_MF_MOVE; | ||||
| #ifdef KNL | ||||
| 	int nodes=1; // numa domain == MCDRAM | ||||
| 	// Find out if in SNC2,SNC4 mode ? | ||||
| #else | ||||
| 	int nodes=r; // numa domain == MPI ID | ||||
| #endif | ||||
| 	unsigned long count=1; | ||||
| 	for(uint64_t page=0;page<size;page+=4096){ | ||||
| 	  void *pages = (void *) ( page + (uint64_t)ptr ); | ||||
| 	  uint64_t *cow_it = (uint64_t *)pages;	*cow_it = 1; | ||||
| 	  ierr= move_pages(0,count, &pages,&nodes,&status,flags); | ||||
| 	  if (ierr && (page==0)) perror("numa relocate command failed"); | ||||
| 	} | ||||
| #endif | ||||
| 	ShmCommBufs[r] =ptr; | ||||
|        | ||||
|     } | ||||
|   } | ||||
| @@ -235,21 +304,32 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|       ShmCommBufs[r] =ptr; | ||||
|     } | ||||
|   } | ||||
|  | ||||
| #else | ||||
| #endif | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // SHMGET SHMAT and SHM_HUGETLB flag | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef GRID_MPI3_SHMGET | ||||
|   std::vector<int> shmids(ShmSize); | ||||
|  | ||||
|   if ( ShmRank == 0 ) { | ||||
|     for(int r=0;r<ShmSize;r++){ | ||||
|       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; | ||||
|       key_t key   = 0x4545 + r; | ||||
|       if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { | ||||
|       key_t key   = IPC_PRIVATE; | ||||
|       int flags = IPC_CREAT | SHM_R | SHM_W; | ||||
| #ifdef SHM_HUGETLB | ||||
|       if (Hugepages) flags|=SHM_HUGETLB; | ||||
| #endif | ||||
|       if ((shmids[r]= shmget(key,size, flags)) ==-1) { | ||||
| 	int errsv = errno; | ||||
| 	printf("Errno %d\n",errsv); | ||||
| 	printf("key   %d\n",key); | ||||
| 	printf("size  %lld\n",size); | ||||
| 	printf("flags %d\n",flags); | ||||
| 	perror("shmget"); | ||||
| 	exit(1); | ||||
|       } else {  | ||||
| 	printf("shmid: 0x%x\n", shmids[r]); | ||||
|       } | ||||
|       printf("shmid: 0x%x\n", shmids[r]); | ||||
|     } | ||||
|   } | ||||
|   MPI_Barrier(ShmComm); | ||||
| @@ -374,8 +454,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| {  | ||||
|   int ierr; | ||||
|   communicator=communicator_world; | ||||
|  | ||||
|   _ndimension = processors.size(); | ||||
|  | ||||
|   communicator_halo.resize (2*_ndimension); | ||||
|   for(int i=0;i<_ndimension*2;i++){ | ||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Assert power of two shm_size. | ||||
|   //////////////////////////////////////////////////////////////// | ||||
| @@ -509,6 +595,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(float &f){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| @@ -590,13 +684,27 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis | ||||
|   } | ||||
| } | ||||
|  | ||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 						       void *xmit, | ||||
| 						       int dest, | ||||
| 						       void *recv, | ||||
| 						       int from, | ||||
| 						       int bytes) | ||||
| double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, | ||||
| 						     int dest, | ||||
| 						     void *recv, | ||||
| 						     int from, | ||||
| 						     int bytes,int dir) | ||||
| { | ||||
|   std::vector<CommsRequest_t> list; | ||||
|   double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); | ||||
|   StencilSendToRecvFromComplete(list,dir); | ||||
|   return offbytes; | ||||
| } | ||||
|  | ||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 							 void *xmit, | ||||
| 							 int dest, | ||||
| 							 void *recv, | ||||
| 							 int from, | ||||
| 							 int bytes,int dir) | ||||
| { | ||||
|   assert(dir < communicator_halo.size()); | ||||
|  | ||||
|   MPI_Request xrq; | ||||
|   MPI_Request rrq; | ||||
|  | ||||
| @@ -615,26 +723,26 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | ||||
|   gfrom = MPI_UNDEFINED; | ||||
| #endif | ||||
|   if ( gfrom ==MPI_UNDEFINED) { | ||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); | ||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq); | ||||
|     assert(ierr==0); | ||||
|     list.push_back(rrq); | ||||
|     off_node_bytes+=bytes; | ||||
|   } | ||||
|  | ||||
|   if ( gdest == MPI_UNDEFINED ) { | ||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); | ||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq); | ||||
|     assert(ierr==0); | ||||
|     list.push_back(xrq); | ||||
|     off_node_bytes+=bytes; | ||||
|   } | ||||
|  | ||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) {  | ||||
|     this->StencilSendToRecvFromComplete(list); | ||||
|     this->StencilSendToRecvFromComplete(list,dir); | ||||
|   } | ||||
|  | ||||
|   return off_node_bytes; | ||||
| } | ||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | ||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | ||||
| { | ||||
|   SendToRecvFromComplete(waitall); | ||||
| } | ||||
|   | ||||
							
								
								
									
										286
									
								
								lib/communicator/Communicator_mpit.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								lib/communicator/Communicator_mpit.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,286 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/communicator/Communicator_mpi.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/GridCore.h> | ||||
| #include <Grid/GridQCDcore.h> | ||||
| #include <Grid/qcd/action/ActionCore.h> | ||||
| #include <mpi.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Info that is setup once and indept of cartesian layout | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| MPI_Comm CartesianCommunicator::communicator_world; | ||||
|  | ||||
| // Should error check all MPI calls. | ||||
| void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|   int flag; | ||||
|   int provided; | ||||
|   MPI_Initialized(&flag); // needed to coexist with other libs apparently | ||||
|   if ( !flag ) { | ||||
|     MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); | ||||
|     if ( provided != MPI_THREAD_MULTIPLE ) { | ||||
|       QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; | ||||
|     } | ||||
|   } | ||||
|   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| { | ||||
|   _ndimension = processors.size(); | ||||
|   std::vector<int> periodic(_ndimension,1); | ||||
|  | ||||
|   _Nprocessors=1; | ||||
|   _processors = processors; | ||||
|   _processor_coor.resize(_ndimension); | ||||
|    | ||||
|   MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); | ||||
|   MPI_Comm_rank(communicator,&_processor); | ||||
|   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); | ||||
|  | ||||
|   for(int i=0;i<_ndimension;i++){ | ||||
|     _Nprocessors*=_processors[i]; | ||||
|   } | ||||
|  | ||||
|   communicator_halo.resize (2*_ndimension); | ||||
|   for(int i=0;i<_ndimension*2;i++){ | ||||
|     MPI_Comm_dup(communicator,&communicator_halo[i]); | ||||
|   } | ||||
|    | ||||
|   int Size;  | ||||
|   MPI_Comm_size(communicator,&Size); | ||||
|    | ||||
|   assert(Size==_Nprocessors); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(float &f){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSumVector(float *f,int N) | ||||
| { | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(double &d) | ||||
| { | ||||
|   int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSumVector(double *d,int N) | ||||
| { | ||||
|   int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) | ||||
| { | ||||
|   int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) | ||||
| { | ||||
|   int rank; | ||||
|   int ierr=MPI_Cart_rank  (communicator, &coor[0], &rank); | ||||
|   assert(ierr==0); | ||||
|   return rank; | ||||
| } | ||||
| void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) | ||||
| { | ||||
|   coor.resize(_ndimension); | ||||
|   int ierr=MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]); | ||||
|   assert(ierr==0); | ||||
| } | ||||
|  | ||||
| // Basic Halo comms primitive | ||||
| void CartesianCommunicator::SendToRecvFrom(void *xmit, | ||||
| 					   int dest, | ||||
| 					   void *recv, | ||||
| 					   int from, | ||||
| 					   int bytes) | ||||
| { | ||||
|   std::vector<CommsRequest_t> reqs(0); | ||||
|   SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); | ||||
|   SendToRecvFromComplete(reqs); | ||||
| } | ||||
|  | ||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||
| 					   void *recv, | ||||
| 					   int sender, | ||||
| 					   int receiver, | ||||
| 					   int bytes) | ||||
| { | ||||
|   MPI_Status stat; | ||||
|   assert(sender != receiver); | ||||
|   int tag = sender; | ||||
|   if ( _processor == sender ) { | ||||
|     MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); | ||||
|   } | ||||
|   if ( _processor == receiver ) {  | ||||
|     MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Basic Halo comms primitive | ||||
| void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 						void *xmit, | ||||
| 						int dest, | ||||
| 						void *recv, | ||||
| 						int from, | ||||
| 						int bytes) | ||||
| { | ||||
|   int myrank = _processor; | ||||
|   int ierr; | ||||
|   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  | ||||
|     MPI_Request xrq; | ||||
|     MPI_Request rrq; | ||||
|  | ||||
|     ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); | ||||
|     ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); | ||||
|      | ||||
|     assert(ierr==0); | ||||
|     list.push_back(xrq); | ||||
|     list.push_back(rrq); | ||||
|   } else {  | ||||
|     // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||
|     ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, | ||||
| 		      recv,bytes,MPI_CHAR,from, from, | ||||
| 		      communicator,MPI_STATUS_IGNORE); | ||||
|     assert(ierr==0); | ||||
|   } | ||||
| } | ||||
| void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) | ||||
| { | ||||
|   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  | ||||
|     int nreq=list.size(); | ||||
|     std::vector<MPI_Status> status(nreq); | ||||
|     int ierr = MPI_Waitall(nreq,&list[0],&status[0]); | ||||
|     assert(ierr==0); | ||||
|   } | ||||
| } | ||||
|  | ||||
| void CartesianCommunicator::Barrier(void) | ||||
| { | ||||
|   int ierr = MPI_Barrier(communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
|  | ||||
| void CartesianCommunicator::Broadcast(int root,void* data, int bytes) | ||||
| { | ||||
|   int ierr=MPI_Bcast(data, | ||||
| 		     bytes, | ||||
| 		     MPI_BYTE, | ||||
| 		     root, | ||||
| 		     communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
|   /////////////////////////////////////////////////////// | ||||
|   // Should only be used prior to Grid Init finished. | ||||
|   // Check for this? | ||||
|   /////////////////////////////////////////////////////// | ||||
| int CartesianCommunicator::RankWorld(void){  | ||||
|   int r;  | ||||
|   MPI_Comm_rank(communicator_world,&r); | ||||
|   return r; | ||||
| } | ||||
| void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | ||||
| { | ||||
|   int ierr= MPI_Bcast(data, | ||||
| 		      bytes, | ||||
| 		      MPI_BYTE, | ||||
| 		      root, | ||||
| 		      communicator_world); | ||||
|   assert(ierr==0); | ||||
| } | ||||
|  | ||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 							 void *xmit, | ||||
| 							 int xmit_to_rank, | ||||
| 							 void *recv, | ||||
| 							 int recv_from_rank, | ||||
| 							 int bytes,int dir) | ||||
| { | ||||
|   int myrank = _processor; | ||||
|   int ierr; | ||||
|   assert(dir < communicator_halo.size()); | ||||
|    | ||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; | ||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||
|   MPI_Request req[2]; | ||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); | ||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); | ||||
|  | ||||
|   list.push_back(req[0]); | ||||
|   list.push_back(req[1]); | ||||
|   return 2.0*bytes; | ||||
| } | ||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | ||||
| {  | ||||
|   int nreq=waitall.size(); | ||||
|   MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); | ||||
| }; | ||||
| double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, | ||||
| 						    int xmit_to_rank, | ||||
| 						    void *recv, | ||||
| 						    int recv_from_rank, | ||||
| 						    int bytes,int dir) | ||||
| { | ||||
|   int myrank = _processor; | ||||
|   int ierr; | ||||
|   assert(dir < communicator_halo.size()); | ||||
|    | ||||
|   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; | ||||
|   // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||
|   MPI_Request req[2]; | ||||
|   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); | ||||
|   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); | ||||
|   MPI_Waitall(2, req, MPI_STATUSES_IGNORE); | ||||
|   return 2.0*bytes; | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
| @@ -59,6 +59,8 @@ void CartesianCommunicator::GlobalSum(double &){} | ||||
| void CartesianCommunicator::GlobalSum(uint32_t &){} | ||||
| void CartesianCommunicator::GlobalSum(uint64_t &){} | ||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||
| void CartesianCommunicator::GlobalXOR(uint32_t &){} | ||||
| void CartesianCommunicator::GlobalXOR(uint64_t &){} | ||||
|  | ||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||
| 					   void *recv, | ||||
|   | ||||
| @@ -42,7 +42,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/cshift/Cshift_mpi.h> | ||||
| #endif  | ||||
|  | ||||
| #ifdef GRID_COMMS_MPI3L | ||||
| #ifdef GRID_COMMS_MPIT | ||||
| #include <Grid/cshift/Cshift_mpi.h> | ||||
| #endif  | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
|  /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|     Source file: ./lib/lattice/Lattice_reduction.h | ||||
|     Copyright (C) 2015 | ||||
| @@ -328,6 +328,8 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|   typedef typename vobj::tensor_reduced tensor_reduced; | ||||
|    | ||||
|   scalar_type zscale(scale); | ||||
|  | ||||
|   GridBase *grid  = X._grid; | ||||
|  | ||||
|   int Nsimd  =grid->Nsimd(); | ||||
| @@ -353,7 +355,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | ||||
|       grid->iCoorFromIindex(icoor,l); | ||||
|       int ldx =r+icoor[orthogdim]*rd; | ||||
|       scalar_type *as =(scalar_type *)&av; | ||||
|       as[l] = scalar_type(a[ldx])*scale; | ||||
|       as[l] = scalar_type(a[ldx])*zscale; | ||||
|     } | ||||
|  | ||||
|     tensor_reduced at; at=av; | ||||
| @@ -367,71 +369,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| /* | ||||
| template<class vobj> | ||||
| static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y, | ||||
| 			     int Orthog,RealD scale=1.0)  | ||||
| {     | ||||
|   // FIXME: Implementation is slow | ||||
|   // Best base the linear combination by constructing a  | ||||
|   // set of vectors of size grid->_rdimensions[Orthog]. | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   typedef typename vobj::scalar_type scalar_type; | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|    | ||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||
|    | ||||
|   GridBase *FullGrid  = X._grid; | ||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|    | ||||
|   Lattice<vobj> Xslice(SliceGrid); | ||||
|   Lattice<vobj> Rslice(SliceGrid); | ||||
|   // If we based this on Cshift it would work for spread out | ||||
|   // but it would be even slower | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     ExtractSlice(Rslice,Y,i,Orthog); | ||||
|     ExtractSlice(Xslice,X,i,Orthog); | ||||
|     Rslice = Rslice + Xslice*(scale*a[i]); | ||||
|     InsertSlice(Rslice,R,i,Orthog); | ||||
|   } | ||||
| }; | ||||
| template<class vobj> | ||||
| static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | ||||
|   { | ||||
|     // FIXME: Implementation is slow | ||||
|     // Look at localInnerProduct implementation, | ||||
|     // and do inside a site loop with block strided iterators | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     typedef typename vobj::scalar_type scalar_type; | ||||
|     typedef typename vobj::vector_type vector_type; | ||||
|     typedef typename vobj::tensor_reduced scalar; | ||||
|     typedef typename scalar::scalar_object  scomplex; | ||||
|    | ||||
|     int Nblock = lhs._grid->GlobalDimensions()[Orthog]; | ||||
|     vec.resize(Nblock); | ||||
|     std::vector<scomplex> sip(Nblock); | ||||
|     Lattice<scalar> IP(lhs._grid);  | ||||
|     IP=localInnerProduct(lhs,rhs); | ||||
|     sliceSum(IP,sip,Orthog); | ||||
|    | ||||
|     for(int ss=0;ss<Nblock;ss++){ | ||||
|       vec[ss] = TensorRemove(sip[ss]); | ||||
|     } | ||||
|   } | ||||
| */ | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // FIXME: Implementation is slow | ||||
| // If we based this on Cshift it would work for spread out | ||||
| // but it would be even slower | ||||
| // | ||||
| // Repeated extract slice is inefficient | ||||
| // | ||||
| // Best base the linear combination by constructing a  | ||||
| // set of vectors of size grid->_rdimensions[Orthog]. | ||||
| ////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) | ||||
| { | ||||
|   int NN    = BlockSolverGrid->_ndimension; | ||||
| @@ -450,7 +388,7 @@ inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or | ||||
|   } | ||||
|   return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);  | ||||
| } | ||||
|  | ||||
| */ | ||||
|  | ||||
| template<class vobj> | ||||
| static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)  | ||||
| @@ -460,57 +398,170 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|  | ||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||
|    | ||||
|  | ||||
|   GridBase *FullGrid  = X._grid; | ||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|    | ||||
|   Lattice<vobj> Xslice(SliceGrid); | ||||
|   Lattice<vobj> Rslice(SliceGrid); | ||||
|    | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     ExtractSlice(Rslice,Y,i,Orthog); | ||||
|     for(int j=0;j<Nblock;j++){ | ||||
|       ExtractSlice(Xslice,X,j,Orthog); | ||||
|       Rslice = Rslice + Xslice*(scale*aa(j,i)); | ||||
|     } | ||||
|     InsertSlice(Rslice,R,i,Orthog); | ||||
|   //  GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|  | ||||
|   //  Lattice<vobj> Xslice(SliceGrid); | ||||
|   //  Lattice<vobj> Rslice(SliceGrid); | ||||
|  | ||||
|   assert( FullGrid->_simd_layout[Orthog]==1); | ||||
|   int nh =  FullGrid->_ndimension; | ||||
|   //  int nl = SliceGrid->_ndimension; | ||||
|   int nl = nh-1; | ||||
|  | ||||
|   //FIXME package in a convenient iterator | ||||
|   //Should loop over a plane orthogonal to direction "Orthog" | ||||
|   int stride=FullGrid->_slice_stride[Orthog]; | ||||
|   int block =FullGrid->_slice_block [Orthog]; | ||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||
|   int ostride=FullGrid->_ostride[Orthog]; | ||||
| #pragma omp parallel  | ||||
|   { | ||||
|     std::vector<vobj> s_x(Nblock); | ||||
|  | ||||
| #pragma omp for collapse(2) | ||||
|     for(int n=0;n<nblock;n++){ | ||||
|     for(int b=0;b<block;b++){ | ||||
|       int o  = n*stride + b; | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	s_x[i] = X[o+i*ostride]; | ||||
|       } | ||||
|  | ||||
|       vobj dot; | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	dot = Y[o+i*ostride]; | ||||
| 	for(int j=0;j<Nblock;j++){ | ||||
| 	  dot = dot + s_x[j]*(scale*aa(j,i)); | ||||
| 	} | ||||
| 	R[o+i*ostride]=dot; | ||||
|       } | ||||
|     }} | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class vobj> | ||||
| static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)  | ||||
| {     | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   typedef typename vobj::scalar_type scalar_type; | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|  | ||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||
|  | ||||
|   GridBase *FullGrid  = X._grid; | ||||
|   //  GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|   //  Lattice<vobj> Xslice(SliceGrid); | ||||
|   //  Lattice<vobj> Rslice(SliceGrid); | ||||
|  | ||||
|   assert( FullGrid->_simd_layout[Orthog]==1); | ||||
|   int nh =  FullGrid->_ndimension; | ||||
|   //  int nl = SliceGrid->_ndimension; | ||||
|   int nl=1; | ||||
|  | ||||
|   //FIXME package in a convenient iterator | ||||
|   //Should loop over a plane orthogonal to direction "Orthog" | ||||
|   int stride=FullGrid->_slice_stride[Orthog]; | ||||
|   int block =FullGrid->_slice_block [Orthog]; | ||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||
|   int ostride=FullGrid->_ostride[Orthog]; | ||||
| #pragma omp parallel  | ||||
|   { | ||||
|     std::vector<vobj> s_x(Nblock); | ||||
|  | ||||
| #pragma omp for collapse(2) | ||||
|     for(int n=0;n<nblock;n++){ | ||||
|     for(int b=0;b<block;b++){ | ||||
|       int o  = n*stride + b; | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	s_x[i] = X[o+i*ostride]; | ||||
|       } | ||||
|  | ||||
|       vobj dot; | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	dot = s_x[0]*(scale*aa(0,i)); | ||||
| 	for(int j=1;j<Nblock;j++){ | ||||
| 	  dot = dot + s_x[j]*(scale*aa(j,i)); | ||||
| 	} | ||||
| 	R[o+i*ostride]=dot; | ||||
|       } | ||||
|     }} | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| static void sliceInnerProductMatrix(  Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | ||||
| { | ||||
|   // FIXME: Implementation is slow | ||||
|   // Not sure of best solution.. think about it | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   typedef typename vobj::scalar_type scalar_type; | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|    | ||||
|   GridBase *FullGrid  = lhs._grid; | ||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|   //  GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|    | ||||
|   int Nblock = FullGrid->GlobalDimensions()[Orthog]; | ||||
|    | ||||
|   Lattice<vobj> Lslice(SliceGrid); | ||||
|   Lattice<vobj> Rslice(SliceGrid); | ||||
|   //  Lattice<vobj> Lslice(SliceGrid); | ||||
|   //  Lattice<vobj> Rslice(SliceGrid); | ||||
|    | ||||
|   mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|    | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     ExtractSlice(Lslice,lhs,i,Orthog); | ||||
|     for(int j=0;j<Nblock;j++){ | ||||
|       ExtractSlice(Rslice,rhs,j,Orthog); | ||||
|       mat(i,j) = innerProduct(Lslice,Rslice); | ||||
|     } | ||||
|  | ||||
|   assert( FullGrid->_simd_layout[Orthog]==1); | ||||
|   int nh =  FullGrid->_ndimension; | ||||
|   //  int nl = SliceGrid->_ndimension; | ||||
|   int nl = nh-1; | ||||
|  | ||||
|   //FIXME package in a convenient iterator | ||||
|   //Should loop over a plane orthogonal to direction "Orthog" | ||||
|   int stride=FullGrid->_slice_stride[Orthog]; | ||||
|   int block =FullGrid->_slice_block [Orthog]; | ||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||
|   int ostride=FullGrid->_ostride[Orthog]; | ||||
|  | ||||
|   typedef typename vobj::vector_typeD vector_typeD; | ||||
|  | ||||
| #pragma omp parallel  | ||||
|   { | ||||
|     std::vector<vobj> Left(Nblock); | ||||
|     std::vector<vobj> Right(Nblock); | ||||
|     Eigen::MatrixXcd  mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
| #pragma omp for collapse(2) | ||||
|     for(int n=0;n<nblock;n++){ | ||||
|     for(int b=0;b<block;b++){ | ||||
|  | ||||
|       int o  = n*stride + b; | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	Left [i] = lhs[o+i*ostride]; | ||||
| 	Right[i] = rhs[o+i*ostride]; | ||||
|       } | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
|       for(int j=0;j<Nblock;j++){ | ||||
| 	auto tmp = innerProduct(Left[i],Right[j]); | ||||
| 	//	vector_typeD rtmp = TensorRemove(tmp); | ||||
| 	auto rtmp = TensorRemove(tmp); | ||||
| 	mat_thread(i,j) += Reduce(rtmp); | ||||
|       }} | ||||
|     }} | ||||
| #pragma omp critical | ||||
|     { | ||||
|       mat += mat_thread; | ||||
|     }   | ||||
|   } | ||||
| #undef FORCE_DIAG | ||||
| #ifdef FORCE_DIAG | ||||
|  | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     for(int j=0;j<Nblock;j++){ | ||||
|       if ( i != j ) mat(i,j)=0.0; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|   for(int j=0;j<Nblock;j++){ | ||||
|     ComplexD sum = mat(i,j); | ||||
|     FullGrid->GlobalSum(sum); | ||||
|     mat(i,j)=sum; | ||||
|   }} | ||||
|  | ||||
|   return; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -551,7 +551,10 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
|  | ||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||
| template<typename vobj, typename sobj> | ||||
| typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){ | ||||
| typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type  | ||||
| unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in) | ||||
| { | ||||
|  | ||||
|   typedef typename vobj::vector_type vtype; | ||||
|    | ||||
|   GridBase* in_grid = in._grid; | ||||
| @@ -590,6 +593,54 @@ typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj> | ||||
|     extract1(in_vobj, out_ptrs, 0); | ||||
|   } | ||||
| } | ||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||
| template<typename vobj, typename sobj> | ||||
| typename std::enable_if<isSIMDvectorized<vobj>::value  | ||||
|                     && !isSIMDvectorized<sobj>::value, void>::type  | ||||
| vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out) | ||||
| { | ||||
|  | ||||
|   typedef typename vobj::vector_type vtype; | ||||
|    | ||||
|   GridBase* grid = out._grid; | ||||
|   assert(in.size()==grid->lSites()); | ||||
|    | ||||
|   int ndim     = grid->Nd(); | ||||
|   int nsimd    = vtype::Nsimd(); | ||||
|  | ||||
|   std::vector<std::vector<int> > icoor(nsimd); | ||||
|        | ||||
|   for(int lane=0; lane < nsimd; lane++){ | ||||
|     icoor[lane].resize(ndim); | ||||
|     grid->iCoorFromIindex(icoor[lane],lane); | ||||
|   } | ||||
|    | ||||
|   parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index | ||||
|     //Assemble vector of pointers to output elements | ||||
|     std::vector<sobj*> ptrs(nsimd); | ||||
|  | ||||
|     std::vector<int> ocoor(ndim); | ||||
|     grid->oCoorFromOindex(ocoor, oidx); | ||||
|  | ||||
|     std::vector<int> lcoor(grid->Nd()); | ||||
|        | ||||
|     for(int lane=0; lane < nsimd; lane++){ | ||||
|  | ||||
|       for(int mu=0;mu<ndim;mu++){ | ||||
| 	lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu]; | ||||
|       } | ||||
|  | ||||
|       int lex; | ||||
|       Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions); | ||||
|       ptrs[lane] = &in[lex]; | ||||
|     } | ||||
|      | ||||
|     //pack from those ptrs | ||||
|     vobj vecobj; | ||||
|     merge1(vecobj, ptrs, 0); | ||||
|     out._odata[oidx] = vecobj;  | ||||
|   } | ||||
| } | ||||
|  | ||||
| //Convert a Lattice from one precision to another | ||||
| template<class VobjOut, class VobjIn> | ||||
| @@ -615,7 +666,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | ||||
|   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); | ||||
|   unvectorizeToLexOrdArray(in_slex_conv, in); | ||||
|      | ||||
|   parallel_for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ | ||||
|   parallel_for(uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ | ||||
|     std::vector<int> out_ocoor(ndim); | ||||
|     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | ||||
|  | ||||
|   | ||||
| @@ -62,14 +62,20 @@ namespace Grid { | ||||
|     return ret; | ||||
|   } | ||||
|  | ||||
|   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ | ||||
|   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ | ||||
|     Lattice<obj> ret(rhs._grid); | ||||
|     ret.checkerboard = rhs.checkerboard; | ||||
|     conformable(ret,rhs); | ||||
|     parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ | ||||
|       ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); | ||||
|     } | ||||
|  | ||||
|     return ret; | ||||
|  | ||||
|      | ||||
|      | ||||
|  | ||||
|      | ||||
|   } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -95,7 +95,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
| //////////////////////////////////////////////////////////// | ||||
| void Grid_quiesce_nodes(void) { | ||||
|   int me = 0; | ||||
| #if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L) | ||||
| #if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) | ||||
|   MPI_Comm_rank(MPI_COMM_WORLD, &me); | ||||
| #endif | ||||
| #ifdef GRID_COMMS_SHMEM | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -27,6 +27,7 @@ directory | ||||
| #ifndef GRID_ILDG_IO_H | ||||
| #define GRID_ILDG_IO_H | ||||
|  | ||||
| #ifdef HAVE_LIME | ||||
| #include <algorithm> | ||||
| #include <fstream> | ||||
| #include <iomanip> | ||||
| @@ -37,213 +38,677 @@ directory | ||||
| #include <sys/utsname.h> | ||||
| #include <unistd.h> | ||||
|  | ||||
| #ifdef HAVE_LIME | ||||
|  | ||||
| extern "C" {  // for linkage | ||||
| //C-Lime is a must have for this functionality | ||||
| extern "C" {   | ||||
| #include "lime.h" | ||||
| } | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| inline void ILDGGrid(GridBase *grid, ILDGField &header) { | ||||
|   assert(grid->_ndimension == 4);  // emit error if not | ||||
|   header.dimension.resize(4); | ||||
|   header.boundary.resize(4); | ||||
|   for (int d = 0; d < 4; d++) { | ||||
|     header.dimension[d] = grid->_fdimensions[d]; | ||||
|     // Read boundary conditions from ... ? | ||||
|     header.boundary[d] = std::string("periodic"); | ||||
|   } | ||||
| } | ||||
|   ///////////////////////////////// | ||||
|   // Encode word types as strings | ||||
|   ///////////////////////////////// | ||||
|  template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<double>  (void){ return std::string("D"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<float>   (void){ return std::string("F"); } | ||||
|  template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); } | ||||
|  template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); } | ||||
|  | ||||
| inline void ILDGChecksum(uint32_t *buf, uint32_t buf_size_bytes, | ||||
|                          uint32_t &csum) { | ||||
|   BinaryIO::Uint32Checksum(buf, buf_size_bytes, csum); | ||||
| } | ||||
|   ///////////////////////////////////////// | ||||
|   // Encode a generic tensor as a string | ||||
|   ///////////////////////////////////////// | ||||
|  template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {  | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| // Utilities ; these are QCD aware | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| template <class GaugeField> | ||||
| inline void ILDGStatistics(GaugeField &data, ILDGField &header) { | ||||
|   // How to convert data precision etc... | ||||
|   header.link_trace = Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data); | ||||
|   header.plaquette = Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data); | ||||
|   // header.polyakov = | ||||
| } | ||||
|    typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||
|  | ||||
| // Forcing QCD here | ||||
| template <class fobj, class sobj> | ||||
| struct ILDGMunger { | ||||
|   void operator()(fobj &in, sobj &out, uint32_t &csum) { | ||||
|     for (int mu = 0; mu < 4; mu++) { | ||||
|       for (int i = 0; i < 3; i++) { | ||||
|         for (int j = 0; j < 3; j++) { | ||||
|           out(mu)()(i, j) = in(mu)()(i, j); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     ILDGChecksum((uint32_t *)&in, sizeof(in), csum); | ||||
|   }; | ||||
| }; | ||||
|    int _ColourN       = indexRank<ColourIndex,vobj>(); | ||||
|    int _ColourScalar  =  isScalar<ColourIndex,vobj>(); | ||||
|    int _ColourVector  =  isVector<ColourIndex,vobj>(); | ||||
|    int _ColourMatrix  =  isMatrix<ColourIndex,vobj>(); | ||||
|  | ||||
| template <class fobj, class sobj> | ||||
| struct ILDGUnmunger { | ||||
|   void operator()(sobj &in, fobj &out, uint32_t &csum) { | ||||
|     for (int mu = 0; mu < 4; mu++) { | ||||
|       for (int i = 0; i < 3; i++) { | ||||
|         for (int j = 0; j < 3; j++) { | ||||
|           out(mu)()(i, j) = in(mu)()(i, j); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     ILDGChecksum((uint32_t *)&out, sizeof(out), csum); | ||||
|   }; | ||||
| }; | ||||
|    int _SpinN       = indexRank<SpinIndex,vobj>(); | ||||
|    int _SpinScalar  =  isScalar<SpinIndex,vobj>(); | ||||
|    int _SpinVector  =  isVector<SpinIndex,vobj>(); | ||||
|    int _SpinMatrix  =  isMatrix<SpinIndex,vobj>(); | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // Write and read from fstream; compute header offset for payload | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| enum ILDGstate {ILDGread, ILDGwrite}; | ||||
|    int _LorentzN       = indexRank<LorentzIndex,vobj>(); | ||||
|    int _LorentzScalar  =  isScalar<LorentzIndex,vobj>(); | ||||
|    int _LorentzVector  =  isVector<LorentzIndex,vobj>(); | ||||
|    int _LorentzMatrix  =  isMatrix<LorentzIndex,vobj>(); | ||||
|  | ||||
| class ILDGIO : public BinaryIO { | ||||
|   FILE *File; | ||||
|   LimeWriter *LimeW; | ||||
|   LimeRecordHeader *LimeHeader; | ||||
|   LimeReader *LimeR; | ||||
|   std::string filename; | ||||
|    std::stringstream stream; | ||||
|  | ||||
|    stream << "GRID_"; | ||||
|    stream << ScidacWordMnemonic<stype>(); | ||||
|  | ||||
|    //   std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl; | ||||
|    //   std::cout << " Spin    N/S/V/M : " << _SpinN   <<" "<<_SpinScalar   <<"/"<<_SpinVector   <<"/"<<_SpinMatrix<<std::endl; | ||||
|    //   std::cout << " Colour  N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl; | ||||
|  | ||||
|    if ( _LorentzVector )   stream << "_LorentzVector"<<_LorentzN; | ||||
|    if ( _LorentzMatrix )   stream << "_LorentzMatrix"<<_LorentzN; | ||||
|  | ||||
|    if ( _SpinVector )   stream << "_SpinVector"<<_SpinN; | ||||
|    if ( _SpinMatrix )   stream << "_SpinMatrix"<<_SpinN; | ||||
|  | ||||
|    if ( _ColourVector )   stream << "_ColourVector"<<_ColourN; | ||||
|    if ( _ColourMatrix )   stream << "_ColourMatrix"<<_ColourN; | ||||
|  | ||||
|    if ( _ColourScalar && _LorentzScalar && _SpinScalar )   stream << "_Complex"; | ||||
|  | ||||
|  | ||||
|    typesize = sizeof(typename vobj::scalar_type); | ||||
|  | ||||
|    if ( _ColourMatrix ) typesize*= _ColourN*_ColourN; | ||||
|    else                 typesize*= _ColourN; | ||||
|  | ||||
|    if ( _SpinMatrix )   typesize*= _SpinN*_SpinN; | ||||
|    else                 typesize*= _SpinN; | ||||
|  | ||||
|    colors    = _ColourN; | ||||
|    spins     = _SpinN; | ||||
|    datacount = _LorentzN; | ||||
|  | ||||
|    return stream.str(); | ||||
|  } | ||||
|   | ||||
|  template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {  | ||||
|    return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount); | ||||
|  }; | ||||
|  | ||||
|  | ||||
|  //////////////////////////////////////////////////////////// | ||||
|  // Helper to fill out metadata | ||||
|  //////////////////////////////////////////////////////////// | ||||
|  template<class vobj> void ScidacMetaData(Lattice<vobj> & field, | ||||
| 					  FieldMetaData &header, | ||||
| 					  scidacRecord & _scidacRecord, | ||||
| 					  scidacFile   & _scidacFile)  | ||||
|  { | ||||
|    typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||
|  | ||||
|    ///////////////////////////////////// | ||||
|    // Pull Grid's metadata | ||||
|    ///////////////////////////////////// | ||||
|    PrepareMetaData(field,header); | ||||
|  | ||||
|    ///////////////////////////////////// | ||||
|    // Scidac Private File structure | ||||
|    ///////////////////////////////////// | ||||
|    _scidacFile              = scidacFile(field._grid); | ||||
|  | ||||
|    ///////////////////////////////////// | ||||
|    // Scidac Private Record structure | ||||
|    ///////////////////////////////////// | ||||
|    scidacRecord sr; | ||||
|    sr.datatype   = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount); | ||||
|    sr.date       = header.creation_date; | ||||
|    sr.precision  = ScidacWordMnemonic<stype>(); | ||||
|    sr.recordtype = GRID_IO_FIELD; | ||||
|  | ||||
|    _scidacRecord = sr; | ||||
|  | ||||
|    std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; | ||||
|  } | ||||
|   | ||||
|  /////////////////////////////////////////////////////// | ||||
|  // Scidac checksum | ||||
|  /////////////////////////////////////////////////////// | ||||
|  static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb) | ||||
|  { | ||||
|    uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16); | ||||
|    uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||
|    if ( scidac_csuma !=scidac_checksuma) return 0; | ||||
|    if ( scidac_csumb !=scidac_checksumb) return 0; | ||||
|     return 1; | ||||
|  } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////// | ||||
| // Lime, ILDG and Scidac I/O classes | ||||
| //////////////////////////////////////////////////////////////////////////////////// | ||||
| class GridLimeReader : public BinaryIO { | ||||
|  public: | ||||
|   ILDGIO(std::string file, ILDGstate RW) { | ||||
|       filename = file; | ||||
|     if (RW == ILDGwrite){ | ||||
|       File = fopen(file.c_str(), "w"); | ||||
|       // check if opened correctly | ||||
|    /////////////////////////////////////////////////// | ||||
|    // FIXME: format for RNG? Now just binary out instead | ||||
|    /////////////////////////////////////////////////// | ||||
|  | ||||
|       LimeW = limeCreateWriter(File); | ||||
|     } else { | ||||
|       File = fopen(file.c_str(), "r"); | ||||
|       // check if opened correctly | ||||
|    FILE       *File; | ||||
|    LimeReader *LimeR; | ||||
|    std::string filename; | ||||
|  | ||||
|       LimeR = limeCreateReader(File); | ||||
|    ///////////////////////////////////////////// | ||||
|    // Open the file | ||||
|    ///////////////////////////////////////////// | ||||
|    void open(std::string &_filename)  | ||||
|    { | ||||
|      filename= _filename; | ||||
|      File = fopen(filename.c_str(), "r"); | ||||
|      LimeR = limeCreateReader(File); | ||||
|    } | ||||
|    ///////////////////////////////////////////// | ||||
|    // Close the file | ||||
|    ///////////////////////////////////////////// | ||||
|    void close(void){ | ||||
|      fclose(File); | ||||
|      //     limeDestroyReader(LimeR); | ||||
|    } | ||||
|  | ||||
|   //////////////////////////////////////////// | ||||
|   // Read a generic lattice field and verify checksum | ||||
|   //////////////////////////////////////////// | ||||
|   template<class vobj> | ||||
|   void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) | ||||
|   { | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     scidacChecksum scidacChecksum_; | ||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|  | ||||
|     std::string format = getFormatString<vobj>(); | ||||
|  | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|  | ||||
|       std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl; | ||||
| 	 | ||||
|       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { | ||||
|  | ||||
|  | ||||
| 	off_t offset= ftell(File); | ||||
| 	BinarySimpleMunger<sobj,sobj> munge; | ||||
| 	BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|  | ||||
| 	///////////////////////////////////////////// | ||||
| 	// Insist checksum is next record | ||||
| 	///////////////////////////////////////////// | ||||
| 	readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); | ||||
|  | ||||
| 	///////////////////////////////////////////// | ||||
| 	// Verify checksums | ||||
| 	///////////////////////////////////////////// | ||||
| 	scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); | ||||
| 	return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Read a generic serialisable object | ||||
|   //////////////////////////////////////////// | ||||
|   template<class serialisable_object> | ||||
|   void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) | ||||
|   { | ||||
|     std::string xmlstring; | ||||
|     // should this be a do while; can we miss a first record?? | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|  | ||||
|   ~ILDGIO() { fclose(File); } | ||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|  | ||||
|   int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L){ | ||||
|       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { | ||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||
| 	XmlReader RD(&xmlc[0],""); | ||||
| 	read(RD,object_name,object); | ||||
| 	return; | ||||
|       } | ||||
|  | ||||
|     }   | ||||
|     assert(0); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| class GridLimeWriter : public BinaryIO { | ||||
|  public: | ||||
|    /////////////////////////////////////////////////// | ||||
|    // FIXME: format for RNG? Now just binary out instead | ||||
|    /////////////////////////////////////////////////// | ||||
|  | ||||
|    FILE       *File; | ||||
|    LimeWriter *LimeW; | ||||
|    std::string filename; | ||||
|  | ||||
|    void open(std::string &_filename) {  | ||||
|      filename= _filename; | ||||
|      File = fopen(filename.c_str(), "w"); | ||||
|      LimeW = limeCreateWriter(File); assert(LimeW != NULL ); | ||||
|    } | ||||
|    ///////////////////////////////////////////// | ||||
|    // Close the file | ||||
|    ///////////////////////////////////////////// | ||||
|    void close(void) { | ||||
|      fclose(File); | ||||
|      //  limeDestroyWriter(LimeW); | ||||
|    } | ||||
|   /////////////////////////////////////////////////////// | ||||
|   // Lime utility functions | ||||
|   /////////////////////////////////////////////////////// | ||||
|   int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) | ||||
|   { | ||||
|     LimeRecordHeader *h; | ||||
|     h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize); | ||||
|     int status = limeWriteRecordHeader(h, L); | ||||
|     if (status < 0) { | ||||
|       std::cerr << "ILDG Header error\n"; | ||||
|       return status; | ||||
|     } | ||||
|     assert(limeWriteRecordHeader(h, LimeW) >= 0); | ||||
|     limeDestroyHeader(h); | ||||
|     return LIME_SUCCESS; | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Write a generic serialisable object | ||||
|   //////////////////////////////////////////// | ||||
|   template<class serialisable_object> | ||||
|   void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) | ||||
|   { | ||||
|     std::string xmlstring; | ||||
|     { | ||||
|       XmlWriter WR("",""); | ||||
|       write(WR,object_name,object); | ||||
|       xmlstring = WR.XmlString(); | ||||
|     } | ||||
|     uint64_t nbytes = xmlstring.size(); | ||||
|     int err; | ||||
|     LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL); | ||||
|  | ||||
|   unsigned int writeHeader(ILDGField &header) { | ||||
|     // write header in LIME | ||||
|     n_uint64_t nbytes; | ||||
|     int MB_flag = 1, ME_flag = 0; | ||||
|     err=limeWriteRecordHeader(h, LimeW);                    assert(err>=0); | ||||
|     err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); | ||||
|     err=limeWriterCloseRecord(LimeW);                       assert(err>=0); | ||||
|     limeDestroyHeader(h); | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Write a generic lattice field and csum | ||||
|   //////////////////////////////////////////// | ||||
|   template<class vobj> | ||||
|   void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) | ||||
|   { | ||||
|     //////////////////////////////////////////// | ||||
|     // Create record header | ||||
|     //////////////////////////////////////////// | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     int err; | ||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|     uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; | ||||
|     createLimeRecordHeader(record_name, 0, 0, PayloadSize); | ||||
|  | ||||
|     char message[] = "ildg-format"; | ||||
|     nbytes = strlen(message); | ||||
|     LimeHeader = limeCreateHeader(MB_flag, ME_flag, message, nbytes); | ||||
|     limeWriteRecordHeader(LimeHeader, LimeW); | ||||
|     limeDestroyHeader(LimeHeader); | ||||
|     // save the xml header here | ||||
|     // use the xml_writer to c++ streams in pugixml | ||||
|     // and convert to char message | ||||
|     limeWriteRecordData(message, &nbytes, LimeW); | ||||
|     limeWriterCloseRecord(LimeW); | ||||
|     //////////////////////////////////////////////////////////////////// | ||||
|     // NB: FILE and iostream are jointly writing disjoint sequences in the | ||||
|     // the same file through different file handles (integer units). | ||||
|     //  | ||||
|     // These are both buffered, so why I think this code is right is as follows. | ||||
|     // | ||||
|     // i)  write record header to FILE *File, telegraphing the size.  | ||||
|     // ii) ftell reads the offset from FILE *File . | ||||
|     // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. | ||||
|     //      Closes iostream and flushes. | ||||
|     // iv) fseek on FILE * to end of this disjoint section. | ||||
|     //  v) Continue writing scidac record. | ||||
|     //////////////////////////////////////////////////////////////////// | ||||
|     off_t offset = ftell(File); | ||||
|     std::string format = getFormatString<vobj>(); | ||||
|     BinarySimpleMunger<sobj,sobj> munge; | ||||
|     BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|     err=limeWriterCloseRecord(LimeW);  assert(err>=0); | ||||
|     //////////////////////////////////////// | ||||
|     // Write checksum element, propagaing forward from the BinaryIO | ||||
|     // Always pair a checksum with a binary object, and close message | ||||
|     //////////////////////////////////////// | ||||
|     scidacChecksum checksum; | ||||
|     std::stringstream streama; streama << std::hex << scidac_csuma; | ||||
|     std::stringstream streamb; streamb << std::hex << scidac_csumb; | ||||
|     checksum.suma= streama.str(); | ||||
|     checksum.sumb= streamb.str(); | ||||
|     std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; | ||||
|     writeLimeObject(0,1,checksum,std::string("scidacChecksum"    ),std::string(SCIDAC_CHECKSUM)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|     return 0; | ||||
|   } | ||||
|  | ||||
|   unsigned int readHeader(ILDGField &header) { | ||||
|     return 0; | ||||
| class ScidacWriter : public GridLimeWriter { | ||||
|  public: | ||||
|  | ||||
|    template<class SerialisableUserFile> | ||||
|    void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) | ||||
|    { | ||||
|      scidacFile    _scidacFile(grid); | ||||
|      writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); | ||||
|      writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); | ||||
|    } | ||||
|   //////////////////////////////////////////////// | ||||
|   // Write generic lattice field in scidac format | ||||
|   //////////////////////////////////////////////// | ||||
|    template <class vobj, class userRecord> | ||||
|   void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)  | ||||
|   { | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     uint64_t nbytes; | ||||
|     GridBase * grid = field._grid; | ||||
|  | ||||
|     //////////////////////////////////////// | ||||
|     // fill the Grid header | ||||
|     //////////////////////////////////////// | ||||
|     FieldMetaData header; | ||||
|     scidacRecord  _scidacRecord; | ||||
|     scidacFile    _scidacFile; | ||||
|  | ||||
|     ScidacMetaData(field,header,_scidacRecord,_scidacFile); | ||||
|  | ||||
|     ////////////////////////////////////////////// | ||||
|     // Fill the Lime file record by record | ||||
|     ////////////////////////////////////////////// | ||||
|     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  | ||||
|     writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); | ||||
|     writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); | ||||
|     writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));      // Closes message with checksum | ||||
|   } | ||||
| }; | ||||
|  | ||||
| class IldgWriter : public ScidacWriter { | ||||
|  public: | ||||
|  | ||||
|   /////////////////////////////////// | ||||
|   // A little helper | ||||
|   /////////////////////////////////// | ||||
|   void writeLimeIldgLFN(std::string &LFN) | ||||
|   { | ||||
|     uint64_t PayloadSize = LFN.size(); | ||||
|     int err; | ||||
|     createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize); | ||||
|     err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0); | ||||
|     err=limeWriterCloseRecord(LimeW); assert(err>=0); | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Special ILDG operations ; gauge configs only. | ||||
|   // Don't require scidac records EXCEPT checksum | ||||
|   // Use Grid MetaData object if present. | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   template <class vsimd> | ||||
|   uint32_t readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu) { | ||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|     typedef LorentzColourMatrixD sobjd; | ||||
|     typedef LorentzColourMatrixF sobjf; | ||||
|     typedef iLorentzColourMatrix<vsimd> itype; | ||||
|     typedef LorentzColourMatrix sobj; | ||||
|     GridBase *grid = Umu._grid; | ||||
|  | ||||
|     ILDGField header; | ||||
|     readHeader(header); | ||||
|  | ||||
|     // now just the conf, ignore the header | ||||
|     std::string format = std::string("IEEE64BIG"); | ||||
|     do {limeReaderNextRecord(LimeR);} | ||||
|     while (strncmp(limeReaderType(LimeR), "ildg-binary-data",16)); | ||||
|  | ||||
|     n_uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|  | ||||
|  | ||||
|     ILDGtype ILDGt(true, LimeR); | ||||
|     // this is special for double prec data, just for the moment | ||||
|     uint32_t csum = BinaryIO::readObjectParallel< itype, sobjd >( | ||||
|        Umu, filename, ILDGMunger<sobjd, sobj>(), 0, format, ILDGt); | ||||
|  | ||||
|     // Check configuration  | ||||
|     // todo | ||||
|  | ||||
|     return csum; | ||||
|   } | ||||
|  | ||||
|   template <class vsimd> | ||||
|   uint32_t writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, std::string format) { | ||||
|   void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)  | ||||
|   { | ||||
|     GridBase * grid = Umu._grid; | ||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|     typedef iLorentzColourMatrix<vsimd> vobj; | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     typedef LorentzColourMatrixD fobj; | ||||
|  | ||||
|     ILDGField header; | ||||
|     // fill the header | ||||
|     header.floating_point = format; | ||||
|     uint64_t nbytes; | ||||
|  | ||||
|     ILDGUnmunger<fobj, sobj> munge; | ||||
|     unsigned int offset = writeHeader(header); | ||||
|     //////////////////////////////////////// | ||||
|     // fill the Grid header | ||||
|     //////////////////////////////////////// | ||||
|     FieldMetaData header; | ||||
|     scidacRecord  _scidacRecord; | ||||
|     scidacFile    _scidacFile; | ||||
|  | ||||
|     BinaryIO::Uint32Checksum<vobj, fobj>(Umu, munge, header.checksum); | ||||
|     ScidacMetaData(Umu,header,_scidacRecord,_scidacFile); | ||||
|  | ||||
|     // Write data record header | ||||
|     n_uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites; | ||||
|     createHeader("ildg-binary-data", 0, 1, PayloadSize, LimeW); | ||||
|     std::string format = header.floating_point; | ||||
|     header.ensemble_id    = description; | ||||
|     header.ensemble_label = description; | ||||
|     header.sequence_number = sequence; | ||||
|     header.ildg_lfn = LFN; | ||||
|  | ||||
|     ILDGtype ILDGt(true, LimeW); | ||||
|     uint32_t csum = BinaryIO::writeObjectParallel<vobj, fobj>( | ||||
|        Umu, filename, munge, 0, header.floating_point, ILDGt); | ||||
|     assert ( (format == std::string("IEEE32BIG"))   | ||||
|            ||(format == std::string("IEEE64BIG")) ); | ||||
|  | ||||
|     limeWriterCloseRecord(LimeW); | ||||
|     ////////////////////////////////////////////////////// | ||||
|     // Fill ILDG header data struct | ||||
|     ////////////////////////////////////////////////////// | ||||
|     ildgFormat ildgfmt ; | ||||
|     ildgfmt.field     = std::string("su3gauge"); | ||||
|  | ||||
|     // Last record | ||||
|     // the logical file name LNF | ||||
|     // look into documentation on how to generate this string | ||||
|     std::string LNF = "empty";  | ||||
|     if ( format == std::string("IEEE32BIG") ) {  | ||||
|       ildgfmt.precision = 32; | ||||
|     } else {  | ||||
|       ildgfmt.precision = 64; | ||||
|     } | ||||
|     ildgfmt.version = 1.0; | ||||
|     ildgfmt.lx = header.dimension[0]; | ||||
|     ildgfmt.ly = header.dimension[1]; | ||||
|     ildgfmt.lz = header.dimension[2]; | ||||
|     ildgfmt.lt = header.dimension[3]; | ||||
|     assert(header.nd==4); | ||||
|     assert(header.nd==header.dimension.size()); | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     // Fill the USQCD info field | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     usqcdInfo info; | ||||
|     info.version=1.0; | ||||
|     info.plaq   = header.plaquette; | ||||
|     info.linktr = header.link_trace; | ||||
|  | ||||
|     PayloadSize = sizeof(LNF); | ||||
|     createHeader("ildg-binary-lfn", 1 , 1, PayloadSize, LimeW); | ||||
|     limeWriteRecordData(const_cast<char*>(LNF.c_str()), &PayloadSize, LimeW); | ||||
|  | ||||
|     limeWriterCloseRecord(LimeW); | ||||
|  | ||||
|     return csum; | ||||
|     std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl; | ||||
|     ////////////////////////////////////////////// | ||||
|     // Fill the Lime file record by record | ||||
|     ////////////////////////////////////////////// | ||||
|     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  | ||||
|     writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); | ||||
|     writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); | ||||
|     writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); | ||||
|     writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); | ||||
|     writeLimeObject(0,0,ildgfmt,std::string("ildgFormat")   ,std::string(ILDG_FORMAT)); // rec | ||||
|     writeLimeIldgLFN(header.ildg_lfn);                                                 // rec | ||||
|     writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA));      // Closes message with checksum | ||||
|     //    limeDestroyWriter(LimeW); | ||||
|     fclose(File); | ||||
|   } | ||||
|  | ||||
|   // format for RNG? Now just binary out | ||||
| }; | ||||
| } | ||||
| } | ||||
|  | ||||
| class IldgReader : public GridLimeReader { | ||||
|  public: | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Read either Grid/SciDAC/ILDG configuration | ||||
|   // Don't require scidac records EXCEPT checksum | ||||
|   // Use Grid MetaData object if present. | ||||
|   // Else use ILDG MetaData object if present. | ||||
|   // Else use SciDAC MetaData object if present. | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   template <class vsimd> | ||||
|   void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) { | ||||
|  | ||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|     typedef typename GaugeField::vector_object  vobj; | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
|     typedef LorentzColourMatrixF fobj; | ||||
|     typedef LorentzColourMatrixD dobj; | ||||
|  | ||||
|     GridBase *grid = Umu._grid; | ||||
|  | ||||
|     std::vector<int> dims = Umu._grid->FullDimensions(); | ||||
|  | ||||
|     assert(dims.size()==4); | ||||
|  | ||||
|     // Metadata holders | ||||
|     ildgFormat     ildgFormat_    ; | ||||
|     std::string    ildgLFN_       ; | ||||
|     scidacChecksum scidacChecksum_;  | ||||
|     usqcdInfo      usqcdInfo_     ; | ||||
|  | ||||
|     // track what we read from file | ||||
|     int found_ildgFormat    =0; | ||||
|     int found_ildgLFN       =0; | ||||
|     int found_scidacChecksum=0; | ||||
|     int found_usqcdInfo     =0; | ||||
|     int found_ildgBinary =0; | ||||
|     int found_FieldMetaData =0; | ||||
|  | ||||
|     uint32_t nersc_csum; | ||||
|     uint32_t scidac_csuma; | ||||
|     uint32_t scidac_csumb; | ||||
|  | ||||
|     // Binary format | ||||
|     std::string format; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////// | ||||
|     // Loop over all records | ||||
|     // -- Order is poorly guaranteed except ILDG header preceeds binary section. | ||||
|     // -- Run like an event loop. | ||||
|     // -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing | ||||
|     //    that Scidac.  | ||||
|     // -- Insist on Scidac checksum record. | ||||
|     ////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|  | ||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|        | ||||
|       ////////////////////////////////////////////////////////////////// | ||||
|       // If not BINARY_DATA read a string and parse | ||||
|       ////////////////////////////////////////////////////////////////// | ||||
|       if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) )  ) { | ||||
| 	 | ||||
| 	// Copy out the string | ||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||
| 	std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; | ||||
|  | ||||
| 	////////////////////////////////// | ||||
| 	// ILDG format record | ||||
| 	if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {  | ||||
|  | ||||
| 	  XmlReader RD(&xmlc[0],""); | ||||
| 	  read(RD,"ildgFormat",ildgFormat_); | ||||
|  | ||||
| 	  if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG"); | ||||
| 	  if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG"); | ||||
|  | ||||
| 	  assert( ildgFormat_.lx == dims[0]); | ||||
| 	  assert( ildgFormat_.ly == dims[1]); | ||||
| 	  assert( ildgFormat_.lz == dims[2]); | ||||
| 	  assert( ildgFormat_.lt == dims[3]); | ||||
|  | ||||
| 	  found_ildgFormat = 1; | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) { | ||||
| 	  FieldMetaData_.ildg_lfn = std::string(&xmlc[0]); | ||||
| 	  found_ildgLFN = 1; | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {  | ||||
|  | ||||
| 	  XmlReader RD(&xmlc[0],""); | ||||
| 	  read(RD,"FieldMetaData",FieldMetaData_); | ||||
|  | ||||
| 	  format = FieldMetaData_.floating_point; | ||||
|  | ||||
| 	  assert(FieldMetaData_.dimension[0] == dims[0]); | ||||
| 	  assert(FieldMetaData_.dimension[1] == dims[1]); | ||||
| 	  assert(FieldMetaData_.dimension[2] == dims[2]); | ||||
| 	  assert(FieldMetaData_.dimension[3] == dims[3]); | ||||
|  | ||||
| 	  found_FieldMetaData = 1; | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {  | ||||
| 	  std::string xmls(&xmlc[0]); | ||||
| 	  // is it a USQCD info field | ||||
| 	  if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {  | ||||
| 	    std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; | ||||
| 	    XmlReader RD(&xmlc[0],""); | ||||
| 	    read(RD,"usqcdInfo",usqcdInfo_); | ||||
| 	    found_usqcdInfo = 1; | ||||
| 	  } | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {  | ||||
| 	  XmlReader RD(&xmlc[0],""); | ||||
| 	  read(RD,"scidacChecksum",scidacChecksum_); | ||||
| 	  found_scidacChecksum = 1; | ||||
| 	} | ||||
|  | ||||
|       } else {   | ||||
| 	///////////////////////////////// | ||||
| 	// Binary data | ||||
| 	///////////////////////////////// | ||||
| 	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl; | ||||
| 	off_t offset= ftell(File); | ||||
|  | ||||
| 	if ( format == std::string("IEEE64BIG") ) { | ||||
| 	  GaugeSimpleMunger<dobj, sobj> munge; | ||||
| 	  BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} else {  | ||||
| 	  GaugeSimpleMunger<fobj, sobj> munge; | ||||
| 	  BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
|  | ||||
| 	found_ildgBinary = 1; | ||||
|       } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////////////////////// | ||||
|     // Minimally must find binary segment and checksum | ||||
|     // Since this is an ILDG reader require ILDG format | ||||
|     ////////////////////////////////////////////////////// | ||||
|     assert(found_ildgBinary); | ||||
|     assert(found_ildgFormat); | ||||
|     assert(found_scidacChecksum); | ||||
|  | ||||
|     // Must find something with the lattice dimensions | ||||
|     assert(found_FieldMetaData||found_ildgFormat); | ||||
|  | ||||
|     if ( found_FieldMetaData ) { | ||||
|  | ||||
|       std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl; | ||||
|  | ||||
|     } else {  | ||||
|  | ||||
|       assert(found_ildgFormat); | ||||
|       assert ( ildgFormat_.field == std::string("su3gauge") ); | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Populate our Grid metadata as best we can | ||||
|       /////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|       std::ostringstream vers; vers << ildgFormat_.version; | ||||
|       FieldMetaData_.hdr_version = vers.str(); | ||||
|       FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); | ||||
|  | ||||
|       FieldMetaData_.nd=4; | ||||
|       FieldMetaData_.dimension.resize(4); | ||||
|  | ||||
|       FieldMetaData_.dimension[0] = ildgFormat_.lx ; | ||||
|       FieldMetaData_.dimension[1] = ildgFormat_.ly ; | ||||
|       FieldMetaData_.dimension[2] = ildgFormat_.lz ; | ||||
|       FieldMetaData_.dimension[3] = ildgFormat_.lt ; | ||||
|  | ||||
|       if ( found_usqcdInfo ) {  | ||||
| 	FieldMetaData_.plaquette = usqcdInfo_.plaq; | ||||
| 	FieldMetaData_.link_trace= usqcdInfo_.linktr; | ||||
| 	std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl; | ||||
| 	std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl; | ||||
| 	std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl; | ||||
|       } else {  | ||||
| 	FieldMetaData_.plaquette = 0.0; | ||||
| 	FieldMetaData_.link_trace= 0.0; | ||||
| 	std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Really really want to mandate a scidac checksum | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     if ( found_scidacChecksum ) { | ||||
|       FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16); | ||||
|       FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||
|       scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); | ||||
|       assert( scidac_csuma ==FieldMetaData_.scidac_checksuma); | ||||
|       assert( scidac_csumb ==FieldMetaData_.scidac_checksumb); | ||||
|       std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl; | ||||
|     } else {  | ||||
|       std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl; | ||||
|       assert(0); // Can I insist always checksum ? | ||||
|     } | ||||
|  | ||||
|     if ( found_FieldMetaData || found_usqcdInfo ) { | ||||
|       FieldMetaData checker; | ||||
|       GaugeStatistics(Umu,checker); | ||||
|       assert(fabs(checker.plaquette  - FieldMetaData_.plaquette )<1.0e-5); | ||||
|       assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5); | ||||
|       std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl; | ||||
|     } | ||||
|   } | ||||
|  }; | ||||
|  | ||||
| }} | ||||
|  | ||||
| //HAVE_LIME | ||||
| #endif | ||||
|   | ||||
| @@ -34,47 +34,198 @@ extern "C" { // for linkage | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| struct ILDGtype { | ||||
|   bool is_ILDG; | ||||
|   LimeWriter* LW; | ||||
|   LimeReader* LR; | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| // Data representation of records that enter ILDG and SciDac formats | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   ILDGtype(bool is, LimeWriter* L) : is_ILDG(is), LW(L), LR(NULL) {} | ||||
|   ILDGtype(bool is, LimeReader* L) : is_ILDG(is), LW(NULL), LR(L) {} | ||||
|   ILDGtype() : is_ILDG(false), LW(NULL), LR(NULL) {} | ||||
| }; | ||||
| #define GRID_FORMAT      "grid-format" | ||||
| #define ILDG_FORMAT      "ildg-format" | ||||
| #define ILDG_BINARY_DATA "ildg-binary-data" | ||||
| #define ILDG_DATA_LFN    "ildg-data-lfn" | ||||
| #define SCIDAC_CHECKSUM           "scidac-checksum" | ||||
| #define SCIDAC_PRIVATE_FILE_XML   "scidac-private-file-xml" | ||||
| #define SCIDAC_FILE_XML           "scidac-file-xml" | ||||
| #define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml" | ||||
| #define SCIDAC_RECORD_XML         "scidac-record-xml" | ||||
| #define SCIDAC_BINARY_DATA        "scidac-binary-data" | ||||
| // Unused SCIDAC records names; could move to support this functionality | ||||
| #define SCIDAC_SITELIST           "scidac-sitelist" | ||||
|  | ||||
| class ILDGField { | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat | ||||
|   const int GRID_IO_MULTIFILE  = 1; // hardcode lift from QIO compat | ||||
|   const int GRID_IO_FIELD      = 0; // hardcode lift from QIO compat | ||||
|   const int GRID_IO_GLOBAL     = 1; // hardcode lift from QIO compat | ||||
|   //////////////////////////////////////////////////////////// | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| // QIO uses mandatory "private" records fixed format | ||||
| // Private is in principle "opaque" however it can't be changed now because that would break existing  | ||||
| // file compatability, so should be correct to assume the undocumented but defacto file structure. | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| //////////////////////// | ||||
| // Scidac private file xml | ||||
| // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile> | ||||
| //////////////////////// | ||||
| struct scidacFile : Serializable { | ||||
|  public: | ||||
|   // header strings (not in order) | ||||
|   std::vector<int> dimension; | ||||
|   std::vector<std::string> boundary; | ||||
|   int data_start; | ||||
|   std::string hdr_version; | ||||
|   std::string storage_format; | ||||
|   // Checks on data | ||||
|   double link_trace; | ||||
|   double plaquette; | ||||
|   uint32_t checksum; | ||||
|   unsigned int sequence_number; | ||||
|   std::string data_type; | ||||
|   std::string ensemble_id; | ||||
|   std::string ensemble_label; | ||||
|   std::string creator; | ||||
|   std::string creator_hardware; | ||||
|   std::string creation_date; | ||||
|   std::string archive_date; | ||||
|   std::string floating_point; | ||||
| }; | ||||
| } | ||||
| #else | ||||
| namespace Grid { | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile, | ||||
|                                   double, version, | ||||
|                                   int, spacetime, | ||||
| 				  std::string, dims, // must convert to int | ||||
|                                   int, volfmt); | ||||
|  | ||||
| struct ILDGtype { | ||||
|   bool is_ILDG; | ||||
|   ILDGtype() : is_ILDG(false) {} | ||||
| }; | ||||
| } | ||||
|   std::vector<int> getDimensions(void) {  | ||||
|     std::stringstream stream(dims); | ||||
|     std::vector<int> dimensions; | ||||
|     int n; | ||||
|     while(stream >> n){ | ||||
|       dimensions.push_back(n); | ||||
|     } | ||||
|     return dimensions; | ||||
|   } | ||||
|  | ||||
|   void setDimensions(std::vector<int> dimensions) {  | ||||
|     char delimiter = ' '; | ||||
|     std::stringstream stream; | ||||
|     for(int i=0;i<dimensions.size();i++){  | ||||
|       stream << dimensions[i]; | ||||
|       if ( i != dimensions.size()-1) {  | ||||
| 	stream << delimiter <<std::endl; | ||||
|       } | ||||
|     } | ||||
|     dims = stream.str(); | ||||
|   } | ||||
|  | ||||
|   // Constructor provides Grid | ||||
|   scidacFile() =default; // default constructor | ||||
|   scidacFile(GridBase * grid){ | ||||
|     version      = 1.0; | ||||
|     spacetime    = grid->_ndimension; | ||||
|     setDimensions(grid->FullDimensions());  | ||||
|     volfmt       = GRID_IO_SINGLEFILE; | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////// | ||||
| // scidac-private-record-xml : example | ||||
| // <scidacRecord> | ||||
| // <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype> | ||||
| // <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins> | ||||
| // <typesize>144</typesize><datacount>4</datacount> | ||||
| // </scidacRecord> | ||||
| /////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| struct scidacRecord : Serializable { | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord, | ||||
|                                   double, version, | ||||
|                                   std::string, date, | ||||
| 				  int, recordtype, | ||||
| 				  std::string, datatype, | ||||
| 				  std::string, precision, | ||||
| 				  int, colors, | ||||
| 				  int, spins, | ||||
| 				  int, typesize, | ||||
| 				  int, datacount); | ||||
|  | ||||
|   scidacRecord() { version =1.0; } | ||||
|  | ||||
| }; | ||||
|  | ||||
| //////////////////////// | ||||
| // ILDG format | ||||
| //////////////////////// | ||||
| struct ildgFormat : Serializable { | ||||
| public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat, | ||||
| 				  double, version, | ||||
| 				  std::string, field, | ||||
| 				  int, precision, | ||||
| 				  int, lx, | ||||
| 				  int, ly, | ||||
| 				  int, lz, | ||||
| 				  int, lt); | ||||
|   ildgFormat() { version=1.0; }; | ||||
| }; | ||||
| //////////////////////// | ||||
| // USQCD info | ||||
| //////////////////////// | ||||
| struct usqcdInfo : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo, | ||||
| 				  double, version, | ||||
| 				  double, plaq, | ||||
| 				  double, linktr, | ||||
| 				  std::string, info); | ||||
|   usqcdInfo() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| //////////////////////// | ||||
| // Scidac Checksum | ||||
| //////////////////////// | ||||
| struct scidacChecksum : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, | ||||
| 				  double, version, | ||||
| 				  std::string, suma, | ||||
| 				  std::string, sumb); | ||||
|   scidacChecksum() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Type:           scidac-file-xml         <title>MILC ILDG archival gauge configuration</title> | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Type:            | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| //////////////////////// | ||||
| // Scidac private file xml  | ||||
| // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>  | ||||
| ////////////////////////                                                                                                                                                                               | ||||
|  | ||||
| #if 0 | ||||
| //////////////////////////////////////////////////////////////////////////////////////// | ||||
| // From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf | ||||
| //////////////////////////////////////////////////////////////////////////////////////// | ||||
| struct usqcdPropFile : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile, | ||||
| 				  double, version, | ||||
| 				  std::string, type, | ||||
| 				  std::string, info); | ||||
|   usqcdPropFile() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| struct usqcdSourceInfo : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo, | ||||
| 				  double, version, | ||||
| 				  std::string, info); | ||||
|   usqcdSourceInfo() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| struct usqcdPropInfo : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo, | ||||
| 				  double, version, | ||||
| 				  int, spin, | ||||
| 				  int, color, | ||||
| 				  std::string, info); | ||||
|   usqcdPropInfo() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| #endif | ||||
|  | ||||
| } | ||||
| #endif | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										325
									
								
								lib/parallelIO/MetaData.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										325
									
								
								lib/parallelIO/MetaData.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,325 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/parallelIO/NerscIO.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
|  | ||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <fstream> | ||||
| #include <map> | ||||
| #include <unistd.h> | ||||
| #include <sys/utsname.h> | ||||
| #include <pwd.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   /////////////////////////////////////////////////////// | ||||
|   // Precision mapping | ||||
|   /////////////////////////////////////////////////////// | ||||
|   template<class vobj> static std::string getFormatString (void) | ||||
|   { | ||||
|     std::string format; | ||||
|     typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||
|     if ( sizeof(stype) == sizeof(float) ) { | ||||
|       format = std::string("IEEE32BIG"); | ||||
|     } | ||||
|     if ( sizeof(stype) == sizeof(double) ) { | ||||
|       format = std::string("IEEE64BIG"); | ||||
|     } | ||||
|     return format; | ||||
|   } | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|   // header specification/interpretation | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|     class FieldMetaData : Serializable { | ||||
|     public: | ||||
|  | ||||
|       GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData, | ||||
| 				      int, nd, | ||||
| 				      std::vector<int>, dimension, | ||||
| 				      std::vector<std::string>, boundary, | ||||
| 				      int, data_start, | ||||
| 				      std::string, hdr_version, | ||||
| 				      std::string, storage_format, | ||||
| 				      double, link_trace, | ||||
| 				      double, plaquette, | ||||
| 				      uint32_t, checksum, | ||||
| 				      uint32_t, scidac_checksuma, | ||||
| 				      uint32_t, scidac_checksumb, | ||||
| 				      unsigned int, sequence_number, | ||||
| 				      std::string, data_type, | ||||
| 				      std::string, ensemble_id, | ||||
| 				      std::string, ensemble_label, | ||||
| 				      std::string, ildg_lfn, | ||||
| 				      std::string, creator, | ||||
| 				      std::string, creator_hardware, | ||||
| 				      std::string, creation_date, | ||||
| 				      std::string, archive_date, | ||||
| 				      std::string, floating_point); | ||||
|       FieldMetaData(void) {  | ||||
| 	nd=4; | ||||
| 	dimension.resize(4); | ||||
| 	boundary.resize(4); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|  | ||||
|  | ||||
|   namespace QCD { | ||||
|  | ||||
|     using namespace Grid; | ||||
|  | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Bit and Physical Checksumming and QA of data | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     inline void GridMetaData(GridBase *grid,FieldMetaData &header) | ||||
|     { | ||||
|       int nd = grid->_ndimension; | ||||
|       header.nd = nd; | ||||
|       header.dimension.resize(nd); | ||||
|       header.boundary.resize(nd); | ||||
|       for(int d=0;d<nd;d++) { | ||||
| 	header.dimension[d] = grid->_fdimensions[d]; | ||||
|       } | ||||
|       for(int d=0;d<nd;d++) { | ||||
| 	header.boundary[d] = std::string("PERIODIC"); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     inline void MachineCharacteristics(FieldMetaData &header) | ||||
|     { | ||||
|       // Who | ||||
|       struct passwd *pw = getpwuid (getuid()); | ||||
|       if (pw) header.creator = std::string(pw->pw_name);  | ||||
|  | ||||
|       // When | ||||
|       std::time_t t = std::time(nullptr); | ||||
|       std::tm tm_ = *std::localtime(&t); | ||||
|       std::ostringstream oss;  | ||||
|       //      oss << std::put_time(&tm_, "%c %Z"); | ||||
|       header.creation_date = oss.str(); | ||||
|       header.archive_date  = header.creation_date; | ||||
|  | ||||
|       // What | ||||
|       struct utsname name;  uname(&name); | ||||
|       header.creator_hardware = std::string(name.nodename)+"-"; | ||||
|       header.creator_hardware+= std::string(name.machine)+"-"; | ||||
|       header.creator_hardware+= std::string(name.sysname)+"-"; | ||||
|       header.creator_hardware+= std::string(name.release); | ||||
|     } | ||||
|  | ||||
| #define dump_meta_data(field, s)					\ | ||||
|       s << "BEGIN_HEADER"      << std::endl;				\ | ||||
|       s << "HDR_VERSION = "    << field.hdr_version    << std::endl;	\ | ||||
|       s << "DATATYPE = "       << field.data_type      << std::endl;	\ | ||||
|       s << "STORAGE_FORMAT = " << field.storage_format << std::endl;	\ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ | ||||
|       }									\ | ||||
|       s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ | ||||
|       s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl; \ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;	\ | ||||
|       }									\ | ||||
| 									\ | ||||
|       s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \ | ||||
|       s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \ | ||||
|       s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \ | ||||
|       s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;	\ | ||||
|       s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;	\ | ||||
|       s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;	\ | ||||
|       s << "CREATOR = "         << field.creator          << std::endl;	\ | ||||
|       s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;	\ | ||||
|       s << "CREATION_DATE = "   << field.creation_date    << std::endl;	\ | ||||
|       s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;	\ | ||||
|       s << "FLOATING_POINT = "  << field.floating_point   << std::endl;	\ | ||||
|       s << "END_HEADER"         << std::endl; | ||||
|  | ||||
| template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header) | ||||
| { | ||||
|   GridBase *grid = field._grid; | ||||
|   std::string format = getFormatString<vobj>(); | ||||
|    header.floating_point = format; | ||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||
|    GridMetaData(grid,header);  | ||||
|    MachineCharacteristics(header); | ||||
|  } | ||||
|  inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header) | ||||
|  { | ||||
|    // How to convert data precision etc... | ||||
|    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data); | ||||
|    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data); | ||||
|  } | ||||
|  inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header) | ||||
|  { | ||||
|    // How to convert data precision etc... | ||||
|    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data); | ||||
|    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data); | ||||
|  } | ||||
|  template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header) | ||||
|  { | ||||
|     | ||||
|    GridBase *grid = field._grid; | ||||
|    std::string format = getFormatString<vLorentzColourMatrixF>(); | ||||
|    header.floating_point = format; | ||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||
|    GridMetaData(grid,header);  | ||||
|    GaugeStatistics(field,header); | ||||
|    MachineCharacteristics(header); | ||||
|  } | ||||
|  template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header) | ||||
|  { | ||||
|    GridBase *grid = field._grid; | ||||
|    std::string format = getFormatString<vLorentzColourMatrixD>(); | ||||
|    header.floating_point = format; | ||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||
|    GridMetaData(grid,header);  | ||||
|    GaugeStatistics(field,header); | ||||
|    MachineCharacteristics(header); | ||||
|  } | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Utilities ; these are QCD aware | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     inline void reconstruct3(LorentzColourMatrix & cm) | ||||
|     { | ||||
|       const int x=0; | ||||
|       const int y=1; | ||||
|       const int z=2; | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy | ||||
| 	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz | ||||
| 	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Some data types for intermediate storage | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >; | ||||
|  | ||||
|     typedef iLorentzColour2x3<Complex>  LorentzColour2x3; | ||||
|     typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; | ||||
|     typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| // Simple classes for precision conversion | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| template <class fobj, class sobj> | ||||
| struct BinarySimpleUnmunger { | ||||
|   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; | ||||
|   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; | ||||
|    | ||||
|   void operator()(sobj &in, fobj &out) { | ||||
|     // take word by word and transform accoding to the status | ||||
|     fobj_stype *out_buffer = (fobj_stype *)&out; | ||||
|     sobj_stype *in_buffer = (sobj_stype *)∈ | ||||
|     size_t fobj_words = sizeof(out) / sizeof(fobj_stype); | ||||
|     size_t sobj_words = sizeof(in) / sizeof(sobj_stype); | ||||
|     assert(fobj_words == sobj_words); | ||||
|      | ||||
|     for (unsigned int word = 0; word < sobj_words; word++) | ||||
|       out_buffer[word] = in_buffer[word];  // type conversion on the fly | ||||
|      | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <class fobj, class sobj> | ||||
| struct BinarySimpleMunger { | ||||
|   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; | ||||
|   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; | ||||
|  | ||||
|   void operator()(fobj &in, sobj &out) { | ||||
|     // take word by word and transform accoding to the status | ||||
|     fobj_stype *in_buffer = (fobj_stype *)∈ | ||||
|     sobj_stype *out_buffer = (sobj_stype *)&out; | ||||
|     size_t fobj_words = sizeof(in) / sizeof(fobj_stype); | ||||
|     size_t sobj_words = sizeof(out) / sizeof(sobj_stype); | ||||
|     assert(fobj_words == sobj_words); | ||||
|      | ||||
|     for (unsigned int word = 0; word < sobj_words; word++) | ||||
|       out_buffer[word] = in_buffer[word];  // type conversion on the fly | ||||
|      | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct GaugeSimpleMunger{ | ||||
|       void operator()(fobj &in, sobj &out) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|           for (int j = 0; j < Nc; j++) { | ||||
| 	    out(mu)()(i, j) = in(mu)()(i, j); | ||||
| 	  }} | ||||
|         } | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template <class fobj, class sobj> | ||||
|     struct GaugeSimpleUnmunger { | ||||
|  | ||||
|       void operator()(sobj &in, fobj &out) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|           for (int j = 0; j < Nc; j++) { | ||||
| 	    out(mu)()(i, j) = in(mu)()(i, j); | ||||
| 	  }} | ||||
|         } | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Gauge3x2munger{ | ||||
|       void operator() (fobj &in,sobj &out){ | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	  for(int j=0;j<3;j++){ | ||||
| 	    out(mu)()(i,j) = in(mu)(i)(j); | ||||
| 	  }} | ||||
| 	} | ||||
| 	reconstruct3(out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Gauge3x2unmunger{ | ||||
|       void operator() (sobj &in,fobj &out){ | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	  for(int j=0;j<3;j++){ | ||||
| 	    out(mu)(i)(j) = in(mu)()(i,j); | ||||
| 	  }} | ||||
| 	} | ||||
|       } | ||||
|     }; | ||||
|   } | ||||
|  | ||||
|  | ||||
| } | ||||
| @@ -30,182 +30,11 @@ | ||||
| #ifndef GRID_NERSC_IO_H | ||||
| #define GRID_NERSC_IO_H | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <fstream> | ||||
| #include <map> | ||||
|  | ||||
| #include <unistd.h> | ||||
| #include <sys/utsname.h> | ||||
| #include <pwd.h> | ||||
|  | ||||
| namespace Grid { | ||||
|   namespace QCD { | ||||
|  | ||||
|     using namespace Grid; | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Some data types for intermediate storage | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >; | ||||
|  | ||||
|     typedef iLorentzColour2x3<Complex>  LorentzColour2x3; | ||||
|     typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; | ||||
|     typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // header specification/interpretation | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     class NerscField { | ||||
|     public: | ||||
|       // header strings (not in order) | ||||
|       int dimension[4]; | ||||
|       std::string boundary[4];  | ||||
|       int data_start; | ||||
|       std::string hdr_version; | ||||
|       std::string storage_format; | ||||
|       // Checks on data | ||||
|       double link_trace; | ||||
|       double plaquette; | ||||
|       uint32_t checksum; | ||||
|       unsigned int sequence_number; | ||||
|       std::string data_type; | ||||
|       std::string ensemble_id ; | ||||
|       std::string ensemble_label ; | ||||
|       std::string creator ; | ||||
|       std::string creator_hardware ; | ||||
|       std::string creation_date ; | ||||
|       std::string archive_date ; | ||||
|       std::string floating_point; | ||||
|     }; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Bit and Physical Checksumming and QA of data | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     inline void NerscGrid(GridBase *grid,NerscField &header) | ||||
|     { | ||||
|       assert(grid->_ndimension==4); | ||||
|       for(int d=0;d<4;d++) { | ||||
| 	header.dimension[d] = grid->_fdimensions[d]; | ||||
|       } | ||||
|       for(int d=0;d<4;d++) { | ||||
| 	header.boundary[d] = std::string("PERIODIC"); | ||||
|       } | ||||
|     } | ||||
|     template<class GaugeField> | ||||
|     inline void NerscStatistics(GaugeField & data,NerscField &header) | ||||
|     { | ||||
|       // How to convert data precision etc... | ||||
|       header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data); | ||||
|       header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data); | ||||
|     } | ||||
|  | ||||
|     inline void NerscMachineCharacteristics(NerscField &header) | ||||
|     { | ||||
|       // Who | ||||
|       struct passwd *pw = getpwuid (getuid()); | ||||
|       if (pw) header.creator = std::string(pw->pw_name);  | ||||
|  | ||||
|       // When | ||||
|       std::time_t t = std::time(nullptr); | ||||
|       std::tm tm = *std::localtime(&t); | ||||
|       std::ostringstream oss;  | ||||
|       //  oss << std::put_time(&tm, "%c %Z"); | ||||
|       header.creation_date = oss.str(); | ||||
|       header.archive_date  = header.creation_date; | ||||
|  | ||||
|       // What | ||||
|       struct utsname name;  uname(&name); | ||||
|       header.creator_hardware = std::string(name.nodename)+"-"; | ||||
|       header.creator_hardware+= std::string(name.machine)+"-"; | ||||
|       header.creator_hardware+= std::string(name.sysname)+"-"; | ||||
|       header.creator_hardware+= std::string(name.release); | ||||
|  | ||||
|     } | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Utilities ; these are QCD aware | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) | ||||
|     { | ||||
|       BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum); | ||||
|     } | ||||
|     inline void reconstruct3(LorentzColourMatrix & cm) | ||||
|     { | ||||
|       const int x=0; | ||||
|       const int y=1; | ||||
|       const int z=2; | ||||
|       for(int mu=0;mu<4;mu++){ | ||||
| 	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy | ||||
| 	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz | ||||
| 	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct NerscSimpleMunger{ | ||||
|       void operator()(fobj &in, sobj &out, uint32_t &csum) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|             for (int j = 0; j < Nc; j++) { | ||||
|               out(mu)()(i, j) = in(mu)()(i, j); | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         NerscChecksum((uint32_t *)&in, sizeof(in), csum); | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template <class fobj, class sobj> | ||||
|     struct NerscSimpleUnmunger { | ||||
|       void operator()(sobj &in, fobj &out, uint32_t &csum) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|             for (int j = 0; j < Nc; j++) { | ||||
|               out(mu)()(i, j) = in(mu)()(i, j); | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         NerscChecksum((uint32_t *)&out, sizeof(out), csum); | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Nersc3x2munger{ | ||||
|       void operator() (fobj &in,sobj &out,uint32_t &csum){ | ||||
|       | ||||
| 	NerscChecksum((uint32_t *)&in,sizeof(in),csum);  | ||||
|  | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	    for(int j=0;j<3;j++){ | ||||
| 	      out(mu)()(i,j) = in(mu)(i)(j); | ||||
| 	    }} | ||||
| 	} | ||||
| 	reconstruct3(out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Nersc3x2unmunger{ | ||||
|  | ||||
|       void operator() (sobj &in,fobj &out,uint32_t &csum){ | ||||
|  | ||||
|  | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	    for(int j=0;j<3;j++){ | ||||
| 	      out(mu)(i)(j) = in(mu)()(i,j); | ||||
| 	    }} | ||||
| 	} | ||||
|  | ||||
| 	NerscChecksum((uint32_t *)&out,sizeof(out),csum);  | ||||
|  | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Write and read from fstream; comput header offset for payload | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -216,42 +45,17 @@ namespace Grid { | ||||
| 	std::ofstream fout(file,std::ios::out); | ||||
|       } | ||||
|    | ||||
| #define dump_nersc_header(field, s)					\ | ||||
|       s << "BEGIN_HEADER"      << std::endl;				\ | ||||
|       s << "HDR_VERSION = "    << field.hdr_version    << std::endl;	\ | ||||
|       s << "DATATYPE = "       << field.data_type      << std::endl;	\ | ||||
|       s << "STORAGE_FORMAT = " << field.storage_format << std::endl;	\ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ | ||||
|       }									\ | ||||
|       s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ | ||||
|       s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl; \ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;	\ | ||||
|       }									\ | ||||
| 									\ | ||||
|       s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \ | ||||
|       s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;	\ | ||||
|       s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;	\ | ||||
|       s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;	\ | ||||
|       s << "CREATOR = "         << field.creator          << std::endl;	\ | ||||
|       s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;	\ | ||||
|       s << "CREATION_DATE = "   << field.creation_date    << std::endl;	\ | ||||
|       s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;	\ | ||||
|       s << "FLOATING_POINT = "  << field.floating_point   << std::endl;	\ | ||||
|       s << "END_HEADER"         << std::endl; | ||||
|    | ||||
|       static inline unsigned int writeHeader(NerscField &field,std::string file) | ||||
|       static inline unsigned int writeHeader(FieldMetaData &field,std::string file) | ||||
|       { | ||||
|       std::ofstream fout(file,std::ios::out|std::ios::in); | ||||
|       fout.seekp(0,std::ios::beg); | ||||
|       dump_nersc_header(field, fout); | ||||
|       dump_meta_data(field, fout); | ||||
|       field.data_start = fout.tellp(); | ||||
|       return field.data_start; | ||||
|     } | ||||
|  | ||||
|       // for the header-reader | ||||
|       static inline int readHeader(std::string file,GridBase *grid,  NerscField &field) | ||||
|       static inline int readHeader(std::string file,GridBase *grid,  FieldMetaData &field) | ||||
|       { | ||||
|       int offset=0; | ||||
|       std::map<std::string,std::string> header; | ||||
| @@ -323,21 +127,21 @@ namespace Grid { | ||||
|       return field.data_start; | ||||
|     } | ||||
|  | ||||
|       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Now the meat: the object readers | ||||
|       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #define PARALLEL_READ | ||||
| #define PARALLEL_WRITE | ||||
|     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Now the meat: the object readers | ||||
|     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|       template<class vsimd> | ||||
|       static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file) | ||||
|       { | ||||
|     template<class vsimd> | ||||
|     static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, | ||||
| 					 FieldMetaData& header, | ||||
| 					 std::string file) | ||||
|     { | ||||
|       typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|  | ||||
|       GridBase *grid = Umu._grid; | ||||
|       int offset = readHeader(file,Umu._grid,header); | ||||
|  | ||||
|       NerscField clone(header); | ||||
|       FieldMetaData clone(header); | ||||
|  | ||||
|       std::string format(header.floating_point); | ||||
|  | ||||
| @@ -346,76 +150,78 @@ namespace Grid { | ||||
|       int ieee64big = (format == std::string("IEEE64BIG")); | ||||
|       int ieee64    = (format == std::string("IEEE64")); | ||||
|  | ||||
|       uint32_t csum; | ||||
|       uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|       // depending on datatype, set up munger; | ||||
|       // munger is a function of <floating point, Real, data_type> | ||||
|       if ( header.data_type == std::string("4D_SU3_GAUGE") ) { | ||||
|       if ( ieee32 || ieee32big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); | ||||
| #else | ||||
| 	csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); | ||||
| #endif | ||||
|       } | ||||
|       if ( ieee64 || ieee64big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); | ||||
| #else  | ||||
| 	csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); | ||||
| #endif | ||||
|       } | ||||
|       } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { | ||||
| 	if ( ieee32 || ieee32big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	  csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); | ||||
| #else | ||||
| 	  csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); | ||||
| #endif | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | ||||
| 	    (Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
| 	if ( ieee64 || ieee64big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	  csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); | ||||
| #else | ||||
| 	  csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); | ||||
| #endif | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | ||||
| 	    (Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
|       } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { | ||||
| 	if ( ieee32 || ieee32big ) { | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | ||||
| 	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
| 	if ( ieee64 || ieee64big ) { | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | ||||
| 	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
|       } else { | ||||
| 	assert(0); | ||||
|       } | ||||
|  | ||||
|       NerscStatistics<GaugeField>(Umu,clone); | ||||
|       GaugeStatistics(Umu,clone); | ||||
|  | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<            csum<< std::dec | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec | ||||
| 	       <<" header   "<<std::hex<<header.checksum<<std::dec <<std::endl; | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette | ||||
| 	       <<" header    "<<header.plaquette<<std::endl; | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace | ||||
| 	       <<" header    "<<header.link_trace<<std::endl; | ||||
|  | ||||
|       if ( fabs(clone.plaquette -header.plaquette ) >=  1.0e-5 ) {  | ||||
| 	std::cout << " Plaquette mismatch "<<std::endl; | ||||
| 	std::cout << Umu[0]<<std::endl; | ||||
| 	std::cout << Umu[1]<<std::endl; | ||||
|       } | ||||
|       if ( nersc_csum != header.checksum ) {  | ||||
| 	std::cerr << " checksum mismatch " << std::endl; | ||||
| 	std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl; | ||||
| 	std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl; | ||||
| 	std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; | ||||
| 	exit(0); | ||||
|       } | ||||
|       assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); | ||||
|       assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); | ||||
|       assert(csum == header.checksum ); | ||||
|  | ||||
|       assert(nersc_csum == header.checksum ); | ||||
|        | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|       template<class vsimd> | ||||
|       static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32) | ||||
|       static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, | ||||
| 					    std::string file,  | ||||
| 					    int two_row, | ||||
| 					    int bits32) | ||||
|       { | ||||
| 	typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|  | ||||
| 	typedef iLorentzColourMatrix<vsimd> vobj; | ||||
| 	typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
| 	FieldMetaData header; | ||||
| 	/////////////////////////////////////////// | ||||
| 	// Following should become arguments | ||||
| 	NerscField header; | ||||
| 	/////////////////////////////////////////// | ||||
| 	header.sequence_number = 1; | ||||
| 	header.ensemble_id     = "UKQCD"; | ||||
| 	header.ensemble_label  = "DWF"; | ||||
| @@ -425,45 +231,32 @@ namespace Grid { | ||||
|    | ||||
| 	GridBase *grid = Umu._grid; | ||||
|  | ||||
| 	NerscGrid(grid,header); | ||||
| 	NerscStatistics<GaugeField>(Umu,header); | ||||
| 	NerscMachineCharacteristics(header); | ||||
| 	GridMetaData(grid,header); | ||||
| 	assert(header.nd==4); | ||||
| 	GaugeStatistics(Umu,header); | ||||
| 	MachineCharacteristics(header); | ||||
|  | ||||
| 	uint32_t csum; | ||||
| 	int offset; | ||||
|    | ||||
| 	truncate(file); | ||||
|  | ||||
| 	if ( two_row ) {  | ||||
| 	// Sod it -- always write 3x3 double | ||||
| 	header.floating_point = std::string("IEEE64BIG"); | ||||
| 	header.data_type      = std::string("4D_SU3_GAUGE_3x3"); | ||||
| 	GaugeSimpleUnmunger<fobj3D,sobj> munge; | ||||
| 	offset = writeHeader(header,file); | ||||
|  | ||||
| 	  header.floating_point = std::string("IEEE64BIG"); | ||||
| 	  header.data_type      = std::string("4D_SU3_GAUGE"); | ||||
| 	  Nersc3x2unmunger<fobj2D,sobj> munge; | ||||
| 	  BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum); | ||||
| 	  offset = writeHeader(header,file); | ||||
| #ifdef PARALLEL_WRITE | ||||
| 	  csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); | ||||
| #else | ||||
| 	  csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); | ||||
| #endif | ||||
| 	} else {  | ||||
| 	  header.floating_point = std::string("IEEE64BIG"); | ||||
| 	  header.data_type      = std::string("4D_SU3_GAUGE_3x3"); | ||||
| 	  NerscSimpleUnmunger<fobj3D,sobj> munge; | ||||
| 	  BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum); | ||||
| 	  offset = writeHeader(header,file); | ||||
| #ifdef PARALLEL_WRITE | ||||
| 	  csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); | ||||
| #else | ||||
| 	  csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); | ||||
| #endif | ||||
| 	} | ||||
| 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
| 	BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point, | ||||
| 								  nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	header.checksum = nersc_csum; | ||||
| 	writeHeader(header,file); | ||||
|  | ||||
| 	std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl; | ||||
| 	std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum " | ||||
| 		 <<std::hex<<header.checksum | ||||
| 		 <<std::dec<<" plaq "<< header.plaquette <<std::endl; | ||||
|  | ||||
|       } | ||||
|  | ||||
|  | ||||
|       /////////////////////////////// | ||||
|       // RNG state | ||||
|       /////////////////////////////// | ||||
| @@ -472,19 +265,19 @@ namespace Grid { | ||||
| 	typedef typename GridParallelRNG::RngStateType RngStateType; | ||||
|  | ||||
| 	// Following should become arguments | ||||
| 	NerscField header; | ||||
| 	FieldMetaData header; | ||||
| 	header.sequence_number = 1; | ||||
| 	header.ensemble_id     = "UKQCD"; | ||||
| 	header.ensemble_label  = "DWF"; | ||||
|  | ||||
| 	GridBase *grid = parallel._grid; | ||||
|  | ||||
| 	NerscGrid(grid,header); | ||||
| 	GridMetaData(grid,header); | ||||
| 	assert(header.nd==4); | ||||
| 	header.link_trace=0.0; | ||||
| 	header.plaquette=0.0; | ||||
| 	NerscMachineCharacteristics(header); | ||||
| 	MachineCharacteristics(header); | ||||
|  | ||||
| 	uint32_t csum; | ||||
| 	int offset; | ||||
|    | ||||
| #ifdef RNG_RANLUX | ||||
| @@ -502,15 +295,19 @@ namespace Grid { | ||||
|  | ||||
| 	truncate(file); | ||||
| 	offset = writeHeader(header,file); | ||||
| 	csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset); | ||||
| 	header.checksum = csum; | ||||
| 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
| 	BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	header.checksum = nersc_csum; | ||||
| 	offset = writeHeader(header,file); | ||||
|  | ||||
| 	std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl; | ||||
| 	std::cout<<GridLogMessage  | ||||
| 		 <<"Written NERSC RNG STATE "<<file<< " checksum " | ||||
| 		 <<std::hex<<header.checksum | ||||
| 		 <<std::dec<<std::endl; | ||||
|  | ||||
|       } | ||||
|      | ||||
|       static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file) | ||||
|       static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file) | ||||
|       { | ||||
| 	typedef typename GridParallelRNG::RngStateType RngStateType; | ||||
|  | ||||
| @@ -518,7 +315,7 @@ namespace Grid { | ||||
|  | ||||
| 	int offset = readHeader(file,grid,header); | ||||
|  | ||||
| 	NerscField clone(header); | ||||
| 	FieldMetaData clone(header); | ||||
|  | ||||
| 	std::string format(header.floating_point); | ||||
| 	std::string data_type(header.data_type); | ||||
| @@ -538,15 +335,19 @@ namespace Grid { | ||||
|  | ||||
| 	// depending on datatype, set up munger; | ||||
| 	// munger is a function of <floating point, Real, data_type> | ||||
| 	uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); | ||||
| 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
| 	BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|  | ||||
| 	assert(csum == header.checksum ); | ||||
| 	if ( nersc_csum != header.checksum ) {  | ||||
| 	  std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl; | ||||
| 	  exit(0); | ||||
| 	} | ||||
| 	assert(nersc_csum == header.checksum ); | ||||
|  | ||||
| 	std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl; | ||||
|       } | ||||
|  | ||||
|     }; | ||||
|  | ||||
|  | ||||
|   }} | ||||
| #endif | ||||
|   | ||||
| @@ -40,7 +40,7 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." , INSTRUCTIONS}, | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." , CPUCYCLES   }, | ||||
|     // 4 | ||||
| #ifdef AVX512 | ||||
| #ifdef KNL | ||||
|     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES    }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS  }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS    }, | ||||
|   | ||||
| @@ -414,7 +414,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | ||||
|   for(int i=0; i < Ls; i++){ | ||||
|     as[i] = 1.0; | ||||
|     omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code | ||||
|     //    assert(fabs(omega[i])>0.0); | ||||
|     assert(omega[i]!=Coeff_t(0.0)); | ||||
|     bs[i] = 0.5*(bpc/omega[i] + bmc); | ||||
|     cs[i] = 0.5*(bpc/omega[i] - bmc); | ||||
|   } | ||||
| @@ -429,7 +429,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | ||||
|    | ||||
|   for(int i=0;i<Ls;i++){ | ||||
|     bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);      | ||||
|     //    assert(fabs(bee[i])>0.0); | ||||
|     assert(bee[i]!=Coeff_t(0.0)); | ||||
|     cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); | ||||
|     beo[i]=as[i]*bs[i]; | ||||
|     ceo[i]=-as[i]*cs[i]; | ||||
| @@ -455,11 +455,17 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | ||||
|     dee[i] = bee[i]; | ||||
|      | ||||
|     if ( i < Ls-1 ) { | ||||
|  | ||||
|       assert(bee[i]!=Coeff_t(0.0)); | ||||
|       assert(bee[0]!=Coeff_t(0.0)); | ||||
|        | ||||
|       lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column | ||||
|        | ||||
|       leem[i]=mass*cee[Ls-1]/bee[0]; | ||||
|       for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1]; | ||||
|       for(int j=0;j<i;j++) { | ||||
| 	assert(bee[j+1]!=Coeff_t(0.0)); | ||||
| 	leem[i]*= aee[j]/bee[j+1]; | ||||
|       } | ||||
|        | ||||
|       uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row | ||||
|        | ||||
| @@ -478,7 +484,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | ||||
|   {  | ||||
|     Coeff_t delta_d=mass*cee[Ls-1]; | ||||
|     for(int j=0;j<Ls-1;j++) { | ||||
|       //      assert(fabs(bee[j])>0.0); | ||||
|       assert(bee[j] != Coeff_t(0.0)); | ||||
|       delta_d *= cee[j]/bee[j]; | ||||
|     } | ||||
|     dee[Ls-1] += delta_d; | ||||
|   | ||||
| @@ -237,4 +237,11 @@ typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermion | ||||
|  | ||||
|   }} | ||||
|  | ||||
| //////////////////// | ||||
| // Scalar QED actions | ||||
| // TODO: this needs to move to another header after rename to Fermion.h | ||||
| //////////////////// | ||||
| #include <Grid/qcd/action/scalar/Scalar.h> | ||||
| #include <Grid/qcd/action/gauge/Photon.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -644,19 +644,16 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation: | ||||
|  | ||||
|     INHERIT_GIMPL_TYPES(Gimpl); | ||||
|        | ||||
|     template <typename vtype> using iImplScalar            = iScalar<iScalar<iScalar<vtype> > >; | ||||
|     template <typename vtype> using iImplSpinor            = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplHalfSpinor        = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; | ||||
|     template <typename vtype> using iImplPropagator        = iScalar<iScalar<iMatrix<vtype, Dimension> > >; | ||||
|      | ||||
|     typedef iImplScalar<Simd>            SiteComplex; | ||||
|     typedef iImplSpinor<Simd>            SiteSpinor; | ||||
|     typedef iImplHalfSpinor<Simd>        SiteHalfSpinor; | ||||
|     typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; | ||||
|     typedef iImplPropagator<Simd>        SitePropagator; | ||||
|      | ||||
|     typedef Lattice<SiteComplex>           ComplexField; | ||||
|     typedef Lattice<SiteSpinor>            FermionField; | ||||
|     typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||
|     typedef Lattice<SitePropagator> PropagatorField; | ||||
| @@ -775,7 +772,6 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation: | ||||
|  | ||||
|     INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|     template <typename vtype> using iImplScalar            = iScalar<iScalar<iScalar<vtype> > >; | ||||
|     template <typename vtype> using iImplSpinor            = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplHalfSpinor        = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; | ||||
| @@ -792,12 +788,10 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation: | ||||
|     typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||
|     typedef Lattice<SitePropagator> PropagatorField; | ||||
|      | ||||
|     typedef iImplScalar<Simd>            SiteComplex; | ||||
|     typedef iImplSpinor<Simd>            SiteSpinor; | ||||
|     typedef iImplHalfSpinor<Simd>        SiteHalfSpinor; | ||||
|  | ||||
|      | ||||
|     typedef Lattice<SiteComplex>           ComplexField; | ||||
|     typedef Lattice<SiteSpinor>            FermionField; | ||||
|      | ||||
|     typedef SimpleCompressor<SiteSpinor> Compressor; | ||||
|   | ||||
| @@ -230,8 +230,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr | ||||
| { | ||||
|   Compressor compressor; | ||||
|   int LLs = in._grid->_rdimensions[0]; | ||||
|  | ||||
|  | ||||
|  | ||||
|   DhopTotalTime -= usecond(); | ||||
|   DhopCommTime -= usecond(); | ||||
|   st.HaloExchange(in,compressor); | ||||
|   DhopCommTime += usecond(); | ||||
|    | ||||
|   DhopComputeTime -= usecond(); | ||||
|   // Dhop takes the 4d grid from U, and makes a 5d index for fermion | ||||
|   if (dag == DaggerYes) { | ||||
|     parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { | ||||
| @@ -244,12 +251,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr | ||||
| 	Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out); | ||||
|     } | ||||
|   } | ||||
|   DhopComputeTime += usecond(); | ||||
|   DhopTotalTime   += usecond(); | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class Impl> | ||||
| void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) | ||||
| { | ||||
|   DhopCalls+=1; | ||||
|   conformable(in._grid,FermionRedBlackGrid());    // verifies half grid | ||||
|   conformable(in._grid,out._grid); // drops the cb check | ||||
|  | ||||
| @@ -261,6 +271,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionFie | ||||
| template<class Impl> | ||||
| void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) | ||||
| { | ||||
|   DhopCalls+=1; | ||||
|   conformable(in._grid,FermionRedBlackGrid());    // verifies half grid | ||||
|   conformable(in._grid,out._grid); // drops the cb check | ||||
|  | ||||
| @@ -272,6 +283,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionFie | ||||
| template<class Impl> | ||||
| void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag) | ||||
| { | ||||
|   DhopCalls+=2; | ||||
|   conformable(in._grid,FermionGrid()); // verifies full grid | ||||
|   conformable(in._grid,out._grid); | ||||
|  | ||||
| @@ -280,6 +292,54 @@ void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField | ||||
|   DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag); | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void ImprovedStaggeredFermion5D<Impl>::Report(void)  | ||||
| { | ||||
|   std::vector<int> latt = GridDefaultLatt();           | ||||
|   RealD volume = Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu]; | ||||
|   RealD NP = _FourDimGrid->_Nprocessors; | ||||
|   RealD NN = _FourDimGrid->NodeCount(); | ||||
|  | ||||
|   std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; | ||||
|  | ||||
|   std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Number of DhopEO Calls   : "  | ||||
| 	    << DhopCalls   << std::endl; | ||||
|   std::cout << GridLogMessage << "ImprovedStaggeredFermion5D TotalTime   /Calls       : "  | ||||
| 	    << DhopTotalTime   / DhopCalls << " us" << std::endl; | ||||
|   std::cout << GridLogMessage << "ImprovedStaggeredFermion5D CommTime    /Calls       : "  | ||||
| 	    << DhopCommTime    / DhopCalls << " us" << std::endl; | ||||
|   std::cout << GridLogMessage << "ImprovedStaggeredFermion5D ComputeTime/Calls        : "  | ||||
| 	    << DhopComputeTime / DhopCalls << " us" << std::endl; | ||||
|  | ||||
|   // Average the compute time | ||||
|   _FourDimGrid->GlobalSum(DhopComputeTime); | ||||
|   DhopComputeTime/=NP; | ||||
|  | ||||
|   RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl; | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call per rank       : " << mflops/NP << std::endl; | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NN << std::endl; | ||||
|    | ||||
|   RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call (full)         : " << Fullmflops << std::endl; | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; | ||||
|  | ||||
|   std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Stencil"    <<std::endl;  Stencil.Report(); | ||||
|   std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilEven"<<std::endl;  StencilEven.Report(); | ||||
|   std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilOdd" <<std::endl;  StencilOdd.Report(); | ||||
| } | ||||
| template<class Impl> | ||||
| void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)  | ||||
| { | ||||
|   DhopCalls       = 0; | ||||
|   DhopTotalTime    = 0; | ||||
|   DhopCommTime    = 0; | ||||
|   DhopComputeTime = 0; | ||||
|   Stencil.ZeroCounters(); | ||||
|   StencilEven.ZeroCounters(); | ||||
|   StencilOdd.ZeroCounters(); | ||||
| } | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////////////// | ||||
| // Implement the general interface. Here we use SAME mass on all slices | ||||
|   | ||||
| @@ -55,6 +55,16 @@ namespace QCD { | ||||
|       FermionField _tmp; | ||||
|       FermionField &tmp(void) { return _tmp; } | ||||
|  | ||||
|       //////////////////////////////////////// | ||||
|       // Performance monitoring | ||||
|       //////////////////////////////////////// | ||||
|       void Report(void); | ||||
|       void ZeroCounters(void); | ||||
|       double DhopTotalTime; | ||||
|       double DhopCalls; | ||||
|       double DhopCommTime; | ||||
|       double DhopComputeTime; | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Implement the abstract base | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -238,7 +238,33 @@ template<typename HCS,typename HS,typename S> using WilsonCompressor = WilsonCom | ||||
| template<class vobj,class cobj> | ||||
| class WilsonStencil : public CartesianStencil<vobj,cobj> { | ||||
| public: | ||||
|  | ||||
|   double timer0; | ||||
|   double timer1; | ||||
|   double timer2; | ||||
|   double timer3; | ||||
|   double timer4; | ||||
|   double timer5; | ||||
|   double timer6; | ||||
|   uint64_t callsi; | ||||
|   void ZeroCountersi(void) | ||||
|   { | ||||
|     timer0=0; | ||||
|     timer1=0; | ||||
|     timer2=0; | ||||
|     timer3=0; | ||||
|     timer4=0; | ||||
|     timer5=0; | ||||
|     timer6=0; | ||||
|     callsi=0; | ||||
|   } | ||||
|   void Reporti(int calls) | ||||
|   { | ||||
|     if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl; | ||||
|     if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate)   " <<timer1/calls <<std::endl; | ||||
|     if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge )   " <<timer2/calls <<std::endl; | ||||
|     if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl; | ||||
|     if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl; | ||||
|   } | ||||
|   typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; | ||||
|  | ||||
|   std::vector<int> same_node; | ||||
| @@ -252,6 +278,7 @@ public: | ||||
|     : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances) , | ||||
|     same_node(npoints) | ||||
|   {  | ||||
|     ZeroCountersi(); | ||||
|     surface_list.resize(0); | ||||
|   }; | ||||
|  | ||||
| @@ -261,7 +288,6 @@ public: | ||||
|     // Here we know the distance is 1 for WilsonStencil | ||||
|     for(int point=0;point<this->_npoints;point++){ | ||||
|       same_node[point] = this->SameNode(point); | ||||
|       //      std::cout << " dir " <<point<<" same_node " <<same_node[point]<<std::endl; | ||||
|     } | ||||
|      | ||||
|     for(int site = 0 ;site< vol4;site++){ | ||||
| @@ -282,17 +308,28 @@ public: | ||||
|   { | ||||
|     std::vector<std::vector<CommsRequest_t> > reqs; | ||||
|     this->HaloExchangeOptGather(source,compress); | ||||
|     this->CommunicateBegin(reqs); | ||||
|     this->CommunicateComplete(reqs); | ||||
|     double t1=usecond(); | ||||
|     // Asynchronous MPI calls multidirectional, Isend etc... | ||||
|     //    this->CommunicateBegin(reqs); | ||||
|     //    this->CommunicateComplete(reqs); | ||||
|     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. | ||||
|     this->Communicate(); | ||||
|     double t2=usecond(); timer1 += t2-t1; | ||||
|     this->CommsMerge(compress); | ||||
|     double t3=usecond(); timer2 += t3-t2; | ||||
|     this->CommsMergeSHM(compress); | ||||
|     double t4=usecond(); timer3 += t4-t3; | ||||
|   } | ||||
|    | ||||
|   template <class compressor> | ||||
|   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  | ||||
|   { | ||||
|     this->Prepare(); | ||||
|     double t0=usecond(); | ||||
|     this->HaloGatherOpt(source,compress); | ||||
|     double t1=usecond(); | ||||
|     timer0 += t1-t0; | ||||
|     callsi++; | ||||
|   } | ||||
|  | ||||
|   template <class compressor> | ||||
| @@ -304,7 +341,9 @@ public: | ||||
|     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; | ||||
|     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; | ||||
|  | ||||
|     this->mpi3synctime_g-=usecond(); | ||||
|     this->_grid->StencilBarrier(); | ||||
|     this->mpi3synctime_g+=usecond(); | ||||
|  | ||||
|     assert(source._grid==this->_grid); | ||||
|     this->halogtime-=usecond(); | ||||
| @@ -323,7 +362,6 @@ public: | ||||
|     int dag = compress.dag; | ||||
|     int face_idx=0; | ||||
|     if ( dag ) {  | ||||
|       //	std::cout << " Optimised Dagger compress " <<std::endl; | ||||
|       assert(same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx)); | ||||
|       assert(same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx)); | ||||
|       assert(same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx)); | ||||
|   | ||||
| @@ -123,22 +123,24 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, | ||||
|   int vol4; | ||||
|   vol4=FourDimGrid.oSites(); | ||||
|   Stencil.BuildSurfaceList(LLs,vol4); | ||||
|  | ||||
|   vol4=FourDimRedBlackGrid.oSites(); | ||||
|   StencilEven.BuildSurfaceList(LLs,vol4); | ||||
|    StencilOdd.BuildSurfaceList(LLs,vol4); | ||||
|  | ||||
|   std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size() | ||||
|                        <<" " << StencilEven.surface_list.size()<<std::endl; | ||||
|    //  std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size() | ||||
|    //                       <<" " << StencilEven.surface_list.size()<<std::endl; | ||||
|  | ||||
| } | ||||
|       | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::Report(void) | ||||
| { | ||||
|     std::vector<int> latt = GridDefaultLatt();           | ||||
|     RealD volume = Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu]; | ||||
|     RealD NP = _FourDimGrid->_Nprocessors; | ||||
|     RealD NN = _FourDimGrid->NodeCount(); | ||||
|   RealD NP     = _FourDimGrid->_Nprocessors; | ||||
|   RealD NN     = _FourDimGrid->NodeCount(); | ||||
|   RealD volume = Ls;   | ||||
|   std::vector<int> latt = _FourDimGrid->GlobalDimensions(); | ||||
|   for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu]; | ||||
|  | ||||
|   if ( DhopCalls > 0 ) { | ||||
|     std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; | ||||
| @@ -184,6 +186,11 @@ void WilsonFermion5D<Impl>::Report(void) | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;  StencilEven.Report(); | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D StencilOdd" <<std::endl;  StencilOdd.Report(); | ||||
|   } | ||||
|   if ( DhopCalls > 0){ | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D Stencil     Reporti()"    <<std::endl;  Stencil.Reporti(DhopCalls); | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D StencilEven Reporti()"<<std::endl;  StencilEven.Reporti(DhopCalls); | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D StencilOdd  Reporti()" <<std::endl;  StencilOdd.Reporti(DhopCalls); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| @@ -203,6 +210,9 @@ void WilsonFermion5D<Impl>::ZeroCounters(void) { | ||||
|   Stencil.ZeroCounters(); | ||||
|   StencilEven.ZeroCounters(); | ||||
|   StencilOdd.ZeroCounters(); | ||||
|   Stencil.ZeroCountersi(); | ||||
|   StencilEven.ZeroCountersi(); | ||||
|   StencilOdd.ZeroCountersi(); | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -379,7 +389,6 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | ||||
| { | ||||
| #ifdef GRID_OMP | ||||
|   //  assert((dag==DaggerNo) ||(dag==DaggerYes)); | ||||
|   typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; | ||||
|  | ||||
|   Compressor compressor(dag); | ||||
|  | ||||
| @@ -388,46 +397,70 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | ||||
|  | ||||
|   DhopFaceTime-=usecond(); | ||||
|   st.HaloExchangeOptGather(in,compressor); | ||||
|   DhopFaceTime+=usecond(); | ||||
|   std::vector<std::vector<CommsRequest_t> > reqs; | ||||
|  | ||||
|   // Rely on async comms; start comms before merge of local data | ||||
|   DhopCommTime-=usecond(); | ||||
|   st.CommunicateBegin(reqs); | ||||
|  | ||||
|   DhopFaceTime-=usecond(); | ||||
|   st.CommsMergeSHM(compressor); | ||||
|   st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms | ||||
|   DhopFaceTime+=usecond(); | ||||
|  | ||||
|   // Perhaps use omp task and region | ||||
| #pragma omp parallel  | ||||
|   double ctime=0; | ||||
|   double ptime=0; | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Ugly explicit thread mapping introduced for OPA reasons. | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #pragma omp parallel reduction(max:ctime) reduction(max:ptime) | ||||
|   {  | ||||
|     int tid = omp_get_thread_num(); | ||||
|     int nthreads = omp_get_num_threads(); | ||||
|     int me = omp_get_thread_num(); | ||||
|     int myoff, mywork; | ||||
|  | ||||
|     GridThread::GetWork(len,me-1,mywork,myoff,nthreads-1); | ||||
|     int sF = LLs * myoff; | ||||
|  | ||||
|     if ( me == 0 ) { | ||||
|       st.CommunicateComplete(reqs); | ||||
|       DhopCommTime+=usecond(); | ||||
|     } else {  | ||||
|       // Interior links in stencil | ||||
|       if ( me==1 ) DhopComputeTime-=usecond(); | ||||
|       if (dag == DaggerYes) Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,1,0); | ||||
|       else      	    Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,1,0); | ||||
|       if ( me==1 ) DhopComputeTime+=usecond(); | ||||
|     int ncomms = CartesianCommunicator::nCommThreads; | ||||
|     if (ncomms == -1) ncomms = 1; | ||||
|     assert(nthreads > ncomms); | ||||
|     if (tid >= ncomms) { | ||||
|       double start = usecond(); | ||||
|       nthreads -= ncomms; | ||||
|       int ttid = tid - ncomms; | ||||
|       int n = U._grid->oSites(); | ||||
|       int chunk = n / nthreads; | ||||
|       int rem = n % nthreads; | ||||
|       int myblock, myn; | ||||
|       if (ttid < rem) { | ||||
| 	myblock = ttid * chunk + ttid; | ||||
| 	myn = chunk+1; | ||||
|       } else { | ||||
| 	myblock = ttid*chunk + rem; | ||||
| 	myn = chunk; | ||||
|       } | ||||
|        | ||||
|       // do the compute | ||||
|       if (dag == DaggerYes) { | ||||
| 	for (int ss = myblock; ss < myblock+myn; ++ss) { | ||||
| 	  int sU = ss; | ||||
| 	  int sF = LLs * sU; | ||||
| 	  Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); | ||||
| 	} | ||||
|       } else { | ||||
| 	for (int ss = myblock; ss < myblock+myn; ++ss) { | ||||
| 	  int sU = ss; | ||||
| 	  int sF = LLs * sU; | ||||
| 	  Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); | ||||
| 	} | ||||
|       } | ||||
| 	ptime = usecond() - start; | ||||
|     } | ||||
|     { | ||||
|       double start = usecond(); | ||||
|       st.CommunicateThreaded(); | ||||
|       ctime = usecond() - start; | ||||
|     } | ||||
|   } | ||||
|   DhopCommTime += ctime; | ||||
|   DhopComputeTime+=ptime; | ||||
|  | ||||
|   // First to enter, last to leave timing | ||||
|   st.CollateThreads(); | ||||
|  | ||||
|   DhopFaceTime-=usecond(); | ||||
|   st.CommsMerge(compressor); | ||||
|   DhopFaceTime+=usecond(); | ||||
|  | ||||
|   // Load imbalance alert. Should use dynamic schedule OMP for loop | ||||
|   // Perhaps create a list of only those sites with face work, and  | ||||
|   // load balance process the list. | ||||
|   DhopComputeTime2-=usecond(); | ||||
|   if (dag == DaggerYes) { | ||||
|     int sz=st.surface_list.size(); | ||||
| @@ -448,11 +481,9 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | ||||
| #else  | ||||
|   assert(0); | ||||
| #endif | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | ||||
| 					 DoubledGaugeField & U, | ||||
|   | ||||
| @@ -40,12 +40,15 @@ namespace QCD { | ||||
|   typedef typename GImpl::Simd Simd;                \ | ||||
|   typedef typename GImpl::LinkField GaugeLinkField; \ | ||||
|   typedef typename GImpl::Field GaugeField;         \ | ||||
|   typedef typename GImpl::ComplexField ComplexField;\ | ||||
|   typedef typename GImpl::SiteField SiteGaugeField; \ | ||||
|   typedef typename GImpl::SiteComplex SiteComplex;  \ | ||||
|   typedef typename GImpl::SiteLink SiteGaugeLink; | ||||
|  | ||||
| #define INHERIT_FIELD_TYPES(Impl)             \ | ||||
|   typedef typename Impl::Simd Simd;           \ | ||||
|   typedef typename Impl::SiteField SiteField; \ | ||||
| #define INHERIT_FIELD_TYPES(Impl)		    \ | ||||
|   typedef typename Impl::Simd Simd;		    \ | ||||
|   typedef typename Impl::ComplexField ComplexField; \ | ||||
|   typedef typename Impl::SiteField SiteField;	    \ | ||||
|   typedef typename Impl::Field Field; | ||||
|  | ||||
| // hardcodes the exponential approximation in the template | ||||
| @@ -53,14 +56,17 @@ template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplType | ||||
| public: | ||||
|   typedef S Simd; | ||||
|  | ||||
|   template <typename vtype> using iImplGaugeLink  = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>; | ||||
|   template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>; | ||||
|   template <typename vtype> using iImplScalar     = iScalar<iScalar<iScalar<vtype> > >; | ||||
|   template <typename vtype> using iImplGaugeLink  = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; | ||||
|   template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>; | ||||
|  | ||||
|   typedef iImplScalar<Simd>     SiteComplex; | ||||
|   typedef iImplGaugeLink<Simd>  SiteLink; | ||||
|   typedef iImplGaugeField<Simd> SiteField; | ||||
|  | ||||
|   typedef Lattice<SiteLink>  LinkField;  | ||||
|   typedef Lattice<SiteField> Field; | ||||
|   typedef Lattice<SiteComplex> ComplexField; | ||||
|   typedef Lattice<SiteLink>    LinkField;  | ||||
|   typedef Lattice<SiteField>   Field; | ||||
|  | ||||
|   // Guido: we can probably separate the types from the HMC functions | ||||
|   // this will create 2 kind of implementations | ||||
| @@ -80,7 +86,7 @@ public: | ||||
|  | ||||
|   /////////////////////////////////////////////////////////// | ||||
|   // Move these to another class | ||||
|   // HMC auxiliary functions  | ||||
|   // HMC auxiliary functions | ||||
|   static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { | ||||
|     // specific for SU gauge fields | ||||
|     LinkField Pmu(P._grid); | ||||
| @@ -92,14 +98,19 @@ public: | ||||
|   } | ||||
|  | ||||
|   static inline Field projectForce(Field &P) { return Ta(P); } | ||||
|    | ||||
|  | ||||
|   static inline void update_field(Field& P, Field& U, double ep){ | ||||
|     for (int mu = 0; mu < Nd; mu++) { | ||||
|       auto Umu = PeekIndex<LorentzIndex>(U, mu); | ||||
|       auto Pmu = PeekIndex<LorentzIndex>(P, mu); | ||||
|       Umu = expMat(Pmu, ep, Nexp) * Umu; | ||||
|       PokeIndex<LorentzIndex>(U, ProjectOnGroup(Umu), mu); | ||||
|     //static std::chrono::duration<double> diff; | ||||
|  | ||||
|     //auto start = std::chrono::high_resolution_clock::now(); | ||||
|     parallel_for(int ss=0;ss<P._grid->oSites();ss++){ | ||||
|       for (int mu = 0; mu < Nd; mu++)  | ||||
|         U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]); | ||||
|     } | ||||
|      | ||||
|     //auto end = std::chrono::high_resolution_clock::now(); | ||||
|    // diff += end - start; | ||||
|    // std::cout << "Time to exponentiate matrix " << diff.count() << " s\n"; | ||||
|   } | ||||
|  | ||||
|   static inline RealD FieldSquareNorm(Field& U){ | ||||
|   | ||||
							
								
								
									
										286
									
								
								lib/qcd/action/gauge/Photon.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								lib/qcd/action/gauge/Photon.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,286 @@ | ||||
| /************************************************************************************* | ||||
|   | ||||
|  Grid physics library, www.github.com/paboyle/Grid | ||||
|   | ||||
|  Source file: ./lib/qcd/action/gauge/Photon.h | ||||
|   | ||||
|  Copyright (C) 2015 | ||||
|   | ||||
|  Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|   | ||||
|  This program is free software; you can redistribute it and/or modify | ||||
|  it under the terms of the GNU General Public License as published by | ||||
|  the Free Software Foundation; either version 2 of the License, or | ||||
|  (at your option) any later version. | ||||
|   | ||||
|  This program is distributed in the hope that it will be useful, | ||||
|  but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  GNU General Public License for more details. | ||||
|   | ||||
|  You should have received a copy of the GNU General Public License along | ||||
|  with this program; if not, write to the Free Software Foundation, Inc., | ||||
|  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|   | ||||
|  See the full license in the file "LICENSE" in the top level distribution directory | ||||
|  *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef QCD_PHOTON_ACTION_H | ||||
| #define QCD_PHOTON_ACTION_H | ||||
|  | ||||
| namespace Grid{ | ||||
| namespace QCD{ | ||||
|   template <class S> | ||||
|   class QedGimpl | ||||
|   { | ||||
|   public: | ||||
|     typedef S Simd; | ||||
|      | ||||
|     template <typename vtype> | ||||
|     using iImplGaugeLink  = iScalar<iScalar<iScalar<vtype>>>; | ||||
|     template <typename vtype> | ||||
|     using iImplGaugeField = iVector<iScalar<iScalar<vtype>>, Nd>; | ||||
|      | ||||
|     typedef iImplGaugeLink<Simd>  SiteLink; | ||||
|     typedef iImplGaugeField<Simd> SiteField; | ||||
|     typedef SiteField             SiteComplex; | ||||
|      | ||||
|     typedef Lattice<SiteLink>  LinkField; | ||||
|     typedef Lattice<SiteField> Field; | ||||
|     typedef Field              ComplexField; | ||||
|   }; | ||||
|    | ||||
|   typedef QedGimpl<vComplex> QedGimplR; | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   class Photon | ||||
|   { | ||||
|   public: | ||||
|     INHERIT_GIMPL_TYPES(Gimpl); | ||||
|     GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3); | ||||
|     GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2); | ||||
|   public: | ||||
|     Photon(Gauge gauge, ZmScheme zmScheme); | ||||
|     virtual ~Photon(void) = default; | ||||
|     void FreePropagator(const GaugeField &in, GaugeField &out); | ||||
|     void MomentumSpacePropagator(const GaugeField &in, GaugeField &out); | ||||
|     void StochasticWeight(GaugeLinkField &weight); | ||||
|     void StochasticField(GaugeField &out, GridParallelRNG &rng); | ||||
|     void StochasticField(GaugeField &out, GridParallelRNG &rng, | ||||
|                          const GaugeLinkField &weight); | ||||
|   private: | ||||
|     void invKHatSquared(GaugeLinkField &out); | ||||
|     void zmSub(GaugeLinkField &out); | ||||
|   private: | ||||
|     Gauge    gauge_; | ||||
|     ZmScheme zmScheme_; | ||||
|   }; | ||||
|  | ||||
|   typedef Photon<QedGimplR>  PhotonR; | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme) | ||||
|   : gauge_(gauge), zmScheme_(zmScheme) | ||||
|   {} | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out) | ||||
|   { | ||||
|     FFT theFFT(in._grid); | ||||
|      | ||||
|     GaugeField in_k(in._grid); | ||||
|     GaugeField prop_k(in._grid); | ||||
|      | ||||
|     theFFT.FFT_all_dim(in_k,in,FFT::forward); | ||||
|     MomentumSpacePropagator(prop_k,in_k); | ||||
|     theFFT.FFT_all_dim(out,prop_k,FFT::backward); | ||||
|   } | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out) | ||||
|   { | ||||
|     GridBase           *grid = out._grid; | ||||
|     GaugeLinkField     kmu(grid), one(grid); | ||||
|     const unsigned int nd    = grid->_ndimension; | ||||
|     std::vector<int>   &l    = grid->_fdimensions; | ||||
|     std::vector<int>   zm(nd,0); | ||||
|     TComplex           Tone = Complex(1.0,0.0); | ||||
|     TComplex           Tzero= Complex(0.0,0.0); | ||||
|      | ||||
|     one = Complex(1.0,0.0); | ||||
|     out = zero; | ||||
|     for(int mu = 0; mu < nd; mu++) | ||||
|     { | ||||
|       Real twoPiL = M_PI*2./l[mu]; | ||||
|        | ||||
|       LatticeCoordinate(kmu,mu); | ||||
|       kmu = 2.*sin(.5*twoPiL*kmu); | ||||
|       out = out + kmu*kmu; | ||||
|     } | ||||
|     pokeSite(Tone, out, zm); | ||||
|     out = one/out; | ||||
|     pokeSite(Tzero, out, zm); | ||||
|   } | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::zmSub(GaugeLinkField &out) | ||||
|   { | ||||
|     GridBase           *grid = out._grid; | ||||
|     const unsigned int nd    = grid->_ndimension; | ||||
|      | ||||
|     switch (zmScheme_) | ||||
|     { | ||||
|       case ZmScheme::qedTL: | ||||
|       { | ||||
|         std::vector<int> zm(nd,0); | ||||
|         TComplex         Tzero = Complex(0.0,0.0); | ||||
|          | ||||
|         pokeSite(Tzero, out, zm); | ||||
|          | ||||
|         break; | ||||
|       } | ||||
|       case ZmScheme::qedL: | ||||
|       { | ||||
|         LatticeInteger spNrm(grid), coor(grid); | ||||
|         GaugeLinkField z(grid); | ||||
|          | ||||
|         spNrm = zero; | ||||
|         for(int d = 0; d < grid->_ndimension - 1; d++) | ||||
|         { | ||||
|           LatticeCoordinate(coor,d); | ||||
|           spNrm = spNrm + coor*coor; | ||||
|         } | ||||
|         out = where(spNrm == Integer(0), 0.*out, out); | ||||
|          | ||||
|         break; | ||||
|       } | ||||
|       default: | ||||
|         break; | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in, | ||||
|                                                GaugeField &out) | ||||
|   { | ||||
|     GridBase           *grid = out._grid; | ||||
|     LatticeComplex     k2Inv(grid); | ||||
|      | ||||
|     invKHatSquared(k2Inv); | ||||
|     zmSub(k2Inv); | ||||
|      | ||||
|     out = in*k2Inv; | ||||
|   } | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::StochasticWeight(GaugeLinkField &weight) | ||||
|   { | ||||
|     auto               *grid     = dynamic_cast<GridCartesian *>(weight._grid); | ||||
|     const unsigned int nd        = grid->_ndimension; | ||||
|     std::vector<int>   latt_size = grid->_fdimensions; | ||||
|      | ||||
|     Integer vol = 1; | ||||
|     for(int d = 0; d < nd; d++) | ||||
|     { | ||||
|       vol = vol * latt_size[d]; | ||||
|     } | ||||
|     invKHatSquared(weight); | ||||
|     weight = sqrt(vol*real(weight)); | ||||
|     zmSub(weight); | ||||
|   } | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng) | ||||
|   { | ||||
|     auto           *grid = dynamic_cast<GridCartesian *>(out._grid); | ||||
|     GaugeLinkField weight(grid); | ||||
|      | ||||
|     StochasticWeight(weight); | ||||
|     StochasticField(out, rng, weight); | ||||
|   } | ||||
|    | ||||
|   template<class Gimpl> | ||||
|   void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng, | ||||
|                                       const GaugeLinkField &weight) | ||||
|   { | ||||
|     auto               *grid = dynamic_cast<GridCartesian *>(out._grid); | ||||
|     const unsigned int nd = grid->_ndimension; | ||||
|     GaugeLinkField     r(grid); | ||||
|     GaugeField         aTilde(grid); | ||||
|     FFT                fft(grid); | ||||
|      | ||||
|     for(int mu = 0; mu < nd; mu++) | ||||
|     { | ||||
|       gaussian(rng, r); | ||||
|       r = weight*r; | ||||
|       pokeLorentz(aTilde, r, mu); | ||||
|     } | ||||
|     fft.FFT_all_dim(out, aTilde, FFT::backward); | ||||
|      | ||||
|     out = real(out); | ||||
|   } | ||||
| //  template<class Gimpl> | ||||
| //  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, | ||||
| //                                                            const GaugeField &in) | ||||
| //  { | ||||
| //     | ||||
| //    FeynmanGaugeMomentumSpacePropagator_TL(out,in); | ||||
| //     | ||||
| //    GridBase *grid = out._grid; | ||||
| //    LatticeInteger     coor(grid); | ||||
| //    GaugeField zz(grid); zz=zero; | ||||
| //     | ||||
| //    // xyzt | ||||
| //    for(int d = 0; d < grid->_ndimension-1;d++){ | ||||
| //      LatticeCoordinate(coor,d); | ||||
| //      out = where(coor==Integer(0),zz,out); | ||||
| //    } | ||||
| //  } | ||||
| //   | ||||
| //  template<class Gimpl> | ||||
| //  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out, | ||||
| //                                                             const GaugeField &in) | ||||
| //  { | ||||
| //     | ||||
| //    // what type LatticeComplex | ||||
| //    GridBase *grid = out._grid; | ||||
| //    int nd = grid->_ndimension; | ||||
| //     | ||||
| //    typedef typename GaugeField::vector_type vector_type; | ||||
| //    typedef typename GaugeField::scalar_type ScalComplex; | ||||
| //    typedef Lattice<iSinglet<vector_type> > LatComplex; | ||||
| //     | ||||
| //    std::vector<int> latt_size   = grid->_fdimensions; | ||||
| //     | ||||
| //    LatComplex denom(grid); denom= zero; | ||||
| //    LatComplex   one(grid); one = ScalComplex(1.0,0.0); | ||||
| //    LatComplex   kmu(grid); | ||||
| //     | ||||
| //    ScalComplex ci(0.0,1.0); | ||||
| //    // momphase = n * 2pi / L | ||||
| //    for(int mu=0;mu<Nd;mu++) { | ||||
| //       | ||||
| //      LatticeCoordinate(kmu,mu); | ||||
| //       | ||||
| //      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
| //       | ||||
| //      kmu = TwoPiL * kmu ; | ||||
| //       | ||||
| //      denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term | ||||
| //    } | ||||
| //    std::vector<int> zero_mode(nd,0); | ||||
| //    TComplexD Tone = ComplexD(1.0,0.0); | ||||
| //    TComplexD Tzero= ComplexD(0.0,0.0); | ||||
| //     | ||||
| //    pokeSite(Tone,denom,zero_mode); | ||||
| //     | ||||
| //    denom= one/denom; | ||||
| //     | ||||
| //    pokeSite(Tzero,denom,zero_mode); | ||||
| //     | ||||
| //    out = zero; | ||||
| //    out = in*denom; | ||||
| //  }; | ||||
|    | ||||
| }} | ||||
| #endif | ||||
| @@ -71,14 +71,18 @@ class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> { | ||||
|  | ||||
|     RealD factor = 0.5 * beta / RealD(Nc); | ||||
|  | ||||
|     GaugeLinkField Umu(U._grid); | ||||
|     //GaugeLinkField Umu(U._grid); | ||||
|     GaugeLinkField dSdU_mu(U._grid); | ||||
|     for (int mu = 0; mu < Nd; mu++) { | ||||
|       Umu = PeekIndex<LorentzIndex>(U, mu); | ||||
|       //Umu = PeekIndex<LorentzIndex>(U, mu); | ||||
|  | ||||
|       // Staple in direction mu | ||||
|       WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu); | ||||
|       dSdU_mu = Ta(Umu * dSdU_mu) * factor; | ||||
|       //WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu); | ||||
|       //dSdU_mu = Ta(Umu * dSdU_mu) * factor; | ||||
|  | ||||
|    | ||||
|       WilsonLoops<Gimpl>::StapleMult(dSdU_mu, U, mu); | ||||
|       dSdU_mu = Ta(dSdU_mu) * factor; | ||||
|  | ||||
|       PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu); | ||||
|     } | ||||
|   | ||||
| @@ -31,6 +31,7 @@ directory | ||||
|  | ||||
| #include <Grid/qcd/action/scalar/ScalarImpl.h> | ||||
| #include <Grid/qcd/action/scalar/ScalarAction.h> | ||||
| #include <Grid/qcd/action/scalar/ScalarInteractionAction.h> | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
| @@ -39,6 +40,10 @@ namespace QCD { | ||||
|   typedef ScalarAction<ScalarImplF>                 ScalarActionF; | ||||
|   typedef ScalarAction<ScalarImplD>                 ScalarActionD; | ||||
|  | ||||
|   template <int Colours, int Dimensions> using ScalarAdjActionR = ScalarInteractionAction<ScalarNxNAdjImplR<Colours>, Dimensions>; | ||||
|   template <int Colours, int Dimensions> using ScalarAdjActionF = ScalarInteractionAction<ScalarNxNAdjImplF<Colours>, Dimensions>; | ||||
|   template <int Colours, int Dimensions> using ScalarAdjActionD = ScalarInteractionAction<ScalarNxNAdjImplD<Colours>, Dimensions>; | ||||
|    | ||||
| } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -6,10 +6,10 @@ | ||||
|  | ||||
|   Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|   Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
|   Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|   Author: neo <cossu@post.kek.jp> | ||||
|   Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|   This program is free software; you can redistribute it and/or modify | ||||
|   it under the terms of the GNU General Public License as published by | ||||
| @@ -35,50 +35,49 @@ directory | ||||
|  | ||||
| namespace Grid { | ||||
|   // FIXME drop the QCD namespace everywhere here | ||||
|    | ||||
|   template <class Impl> | ||||
|   class ScalarAction : public QCD::Action<typename Impl::Field> { | ||||
|   public: | ||||
|  | ||||
| template <class Impl> | ||||
| class ScalarAction : public QCD::Action<typename Impl::Field> { | ||||
|  public: | ||||
|     INHERIT_FIELD_TYPES(Impl); | ||||
|      | ||||
|   private: | ||||
|  | ||||
|  private: | ||||
|     RealD mass_square; | ||||
|     RealD lambda; | ||||
|      | ||||
|   public: | ||||
|     ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l){}; | ||||
|  | ||||
|     virtual std::string LogParameters(){ | ||||
|  public: | ||||
|     ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {} | ||||
|  | ||||
|     virtual std::string LogParameters() { | ||||
|       std::stringstream sstream; | ||||
|       sstream << GridLogMessage << "[ScalarAction] lambda      : " << lambda      << std::endl; | ||||
|       sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; | ||||
|       return sstream.str(); | ||||
|        | ||||
|     } | ||||
|      | ||||
|     virtual std::string action_name(){return "ScalarAction";} | ||||
|      | ||||
|     virtual void refresh(const Field &U, | ||||
| 			 GridParallelRNG &pRNG){};  // noop as no pseudoferms | ||||
|      | ||||
|     virtual std::string action_name() {return "ScalarAction";} | ||||
|  | ||||
|     virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}  // noop as no pseudoferms | ||||
|  | ||||
|     virtual RealD S(const Field &p) { | ||||
|       return (mass_square * 0.5 + QCD::Nd) * ScalarObs<Impl>::sumphisquared(p) + | ||||
| 	(lambda / 24.) * ScalarObs<Impl>::sumphifourth(p) + | ||||
| 	ScalarObs<Impl>::sumphider(p); | ||||
|     (lambda / 24.) * ScalarObs<Impl>::sumphifourth(p) + | ||||
|     ScalarObs<Impl>::sumphider(p); | ||||
|     }; | ||||
|      | ||||
|  | ||||
|     virtual void deriv(const Field &p, | ||||
| 		       Field &force) { | ||||
|                        Field &force) { | ||||
|       Field tmp(p._grid); | ||||
|       Field p2(p._grid); | ||||
|       ScalarObs<Impl>::phisquared(p2, p); | ||||
|       tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); | ||||
|       for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); | ||||
|        | ||||
|       force=+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; | ||||
|     }; | ||||
|   }; | ||||
|    | ||||
| } // Grid | ||||
|  | ||||
|       force =+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; | ||||
|     } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| }  // namespace Grid | ||||
|  | ||||
| #endif // SCALAR_ACTION_H | ||||
|   | ||||
| @@ -5,96 +5,158 @@ | ||||
| namespace Grid { | ||||
|   //namespace QCD { | ||||
|  | ||||
|   template <class S> | ||||
|   class ScalarImplTypes { | ||||
|   public: | ||||
| template <class S> | ||||
| class ScalarImplTypes { | ||||
|  public: | ||||
|     typedef S Simd; | ||||
|      | ||||
|  | ||||
|     template <typename vtype> | ||||
|     using iImplField = iScalar<iScalar<iScalar<vtype> > >; | ||||
|      | ||||
|  | ||||
|     typedef iImplField<Simd> SiteField; | ||||
|      | ||||
|     typedef SiteField        SitePropagator; | ||||
|     typedef SiteField        SiteComplex; | ||||
|      | ||||
|     typedef Lattice<SiteField> Field; | ||||
|     typedef Field              ComplexField; | ||||
|     typedef Field              FermionField; | ||||
|     typedef Field              PropagatorField; | ||||
|      | ||||
|     static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ | ||||
|       gaussian(pRNG, P); | ||||
|     } | ||||
|      | ||||
|  | ||||
|     static inline Field projectForce(Field& P){return P;} | ||||
|      | ||||
|     static inline void update_field(Field& P, Field& U, double ep){ | ||||
|  | ||||
|     static inline void update_field(Field& P, Field& U, double ep) { | ||||
|       U += P*ep; | ||||
|     } | ||||
|      | ||||
|     static inline RealD FieldSquareNorm(Field& U){ | ||||
|  | ||||
|     static inline RealD FieldSquareNorm(Field& U) { | ||||
|       return (- sum(trace(U*U))/2.0); | ||||
|     } | ||||
|      | ||||
|  | ||||
|     static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||
|       gaussian(pRNG, U); | ||||
|     } | ||||
|      | ||||
|  | ||||
|     static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||
|       gaussian(pRNG, U); | ||||
|     } | ||||
|      | ||||
|  | ||||
|     static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||
|       U = 1.0; | ||||
|     } | ||||
|      | ||||
|     static void MomentumSpacePropagator(Field &out, RealD m) | ||||
|     { | ||||
|       GridBase           *grid = out._grid; | ||||
|       Field              kmu(grid), one(grid); | ||||
|       const unsigned int nd    = grid->_ndimension; | ||||
|       std::vector<int>   &l    = grid->_fdimensions; | ||||
|        | ||||
|       one = Complex(1.0,0.0); | ||||
|       out = m*m; | ||||
|       for(int mu = 0; mu < nd; mu++) | ||||
|       { | ||||
|         Real twoPiL = M_PI*2./l[mu]; | ||||
|          | ||||
|         LatticeCoordinate(kmu,mu); | ||||
|         kmu = 2.*sin(.5*twoPiL*kmu); | ||||
|         out = out + kmu*kmu; | ||||
|       } | ||||
|       out = one/out; | ||||
|     } | ||||
|      | ||||
|     static void FreePropagator(const Field &in, Field &out, | ||||
|                                const Field &momKernel) | ||||
|     { | ||||
|       FFT   fft((GridCartesian *)in._grid); | ||||
|       Field inFT(in._grid); | ||||
|        | ||||
|       fft.FFT_all_dim(inFT, in, FFT::forward); | ||||
|       inFT = inFT*momKernel; | ||||
|       fft.FFT_all_dim(out, inFT, FFT::backward); | ||||
|     } | ||||
|      | ||||
|     static void FreePropagator(const Field &in, Field &out, RealD m) | ||||
|     { | ||||
|       Field momKernel(in._grid); | ||||
|        | ||||
|       MomentumSpacePropagator(momKernel, m); | ||||
|       FreePropagator(in, out, momKernel); | ||||
|     } | ||||
|      | ||||
|   }; | ||||
|  | ||||
|   template <class S, unsigned int N> | ||||
|   class ScalarMatrixImplTypes { | ||||
|   class ScalarAdjMatrixImplTypes { | ||||
|   public: | ||||
|     typedef S Simd; | ||||
|     typedef QCD::SU<N> Group; | ||||
|      | ||||
|     template <typename vtype> | ||||
|     using iImplField = iScalar<iScalar<iMatrix<vtype, N> > >; | ||||
|     using iImplField   = iScalar<iScalar<iMatrix<vtype, N>>>; | ||||
|     template <typename vtype> | ||||
|     using iImplComplex = iScalar<iScalar<iScalar<vtype>>>; | ||||
|  | ||||
|     typedef iImplField<Simd>   SiteField; | ||||
|     typedef SiteField          SitePropagator; | ||||
|     typedef iImplComplex<Simd> SiteComplex; | ||||
|      | ||||
|     typedef iImplField<Simd> SiteField; | ||||
|      | ||||
|      | ||||
|     typedef Lattice<SiteField> Field; | ||||
|      | ||||
|     static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ | ||||
|       gaussian(pRNG, P); | ||||
|     typedef Lattice<SiteField>   Field; | ||||
|     typedef Lattice<SiteComplex> ComplexField; | ||||
|     typedef Field                FermionField; | ||||
|     typedef Field                PropagatorField; | ||||
|  | ||||
|     static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { | ||||
|       Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); | ||||
|     } | ||||
|      | ||||
|     static inline Field projectForce(Field& P){return P;} | ||||
|      | ||||
|     static inline void update_field(Field& P, Field& U, double ep){ | ||||
|  | ||||
|     static inline Field projectForce(Field& P) {return P;} | ||||
|  | ||||
|     static inline void update_field(Field& P, Field& U, double ep) { | ||||
|       U += P*ep; | ||||
|     } | ||||
|      | ||||
|     static inline RealD FieldSquareNorm(Field& U){ | ||||
|       return (TensorRemove(- sum(trace(U*U))*0.5).real()); | ||||
|  | ||||
|     static inline RealD FieldSquareNorm(Field& U) { | ||||
|       return (TensorRemove(sum(trace(U*U))).real()); | ||||
|     } | ||||
|      | ||||
|  | ||||
|     static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||
|       gaussian(pRNG, U); | ||||
|       Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U); | ||||
|     } | ||||
|      | ||||
|  | ||||
|     static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||
|       gaussian(pRNG, U); | ||||
|       Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U, 0.01); | ||||
|     } | ||||
|      | ||||
|  | ||||
|     static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||
|       U = 1.0; | ||||
|       U = zero; | ||||
|     } | ||||
|      | ||||
|  | ||||
|   }; | ||||
|  | ||||
|  | ||||
|    | ||||
|    | ||||
|  | ||||
|  | ||||
|   typedef ScalarImplTypes<vReal> ScalarImplR; | ||||
|   typedef ScalarImplTypes<vRealF> ScalarImplF; | ||||
|   typedef ScalarImplTypes<vRealD> ScalarImplD; | ||||
|   typedef ScalarImplTypes<vComplex> ScalarImplCR; | ||||
|   typedef ScalarImplTypes<vComplexF> ScalarImplCF; | ||||
|   typedef ScalarImplTypes<vComplexD> ScalarImplCD; | ||||
|      | ||||
|   // Hardcoding here the size of the matrices | ||||
|   typedef ScalarAdjMatrixImplTypes<vComplex,  QCD::Nc> ScalarAdjImplR; | ||||
|   typedef ScalarAdjMatrixImplTypes<vComplexF, QCD::Nc> ScalarAdjImplF; | ||||
|   typedef ScalarAdjMatrixImplTypes<vComplexD, QCD::Nc> ScalarAdjImplD; | ||||
|  | ||||
|   template <int Colours > using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes<vComplex,   Colours >; | ||||
|   template <int Colours > using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes<vComplexF,  Colours >; | ||||
|   template <int Colours > using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes<vComplexD,  Colours >; | ||||
|    | ||||
|   //}  | ||||
| }  | ||||
|   //} | ||||
| } | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -6,10 +6,7 @@ | ||||
|  | ||||
|   Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|   Author: Guido Cossu <guido,cossu@ed.ac.uk> | ||||
|  | ||||
|   This program is free software; you can redistribute it and/or modify | ||||
|   it under the terms of the GNU General Public License as published by | ||||
| @@ -30,55 +27,122 @@ directory | ||||
|   *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef SCALAR_ACTION_H | ||||
| #define SCALAR_ACTION_H | ||||
| #ifndef SCALAR_INT_ACTION_H | ||||
| #define SCALAR_INT_ACTION_H | ||||
|  | ||||
|  | ||||
| // Note: this action can completely absorb the ScalarAction for real float fields | ||||
| // use the scalarObjs to generalise the structure | ||||
|  | ||||
| namespace Grid { | ||||
|   // FIXME drop the QCD namespace everywhere here | ||||
|    | ||||
|   template <class Impl> | ||||
|  | ||||
|   template <class Impl, int Ndim > | ||||
|   class ScalarInteractionAction : public QCD::Action<typename Impl::Field> { | ||||
|   public: | ||||
|     INHERIT_FIELD_TYPES(Impl); | ||||
|      | ||||
|   private: | ||||
|     RealD mass_square; | ||||
|     RealD lambda; | ||||
|      | ||||
|   public: | ||||
|     ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l){}; | ||||
|  | ||||
|     virtual std::string LogParameters(){ | ||||
|  | ||||
|     typedef typename Field::vector_object vobj; | ||||
|     typedef CartesianStencil<vobj,vobj> Stencil; | ||||
|  | ||||
|     SimpleCompressor<vobj> compressor; | ||||
|     int npoint = 2*Ndim; | ||||
|     std::vector<int> directions;//    = {0,1,2,3,0,1,2,3};  // forcing 4 dimensions | ||||
|     std::vector<int> displacements;//  = {1,1,1,1, -1,-1,-1,-1}; | ||||
|  | ||||
|  | ||||
|   public: | ||||
|  | ||||
|     ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2*Ndim,0), directions(2*Ndim,0){ | ||||
|       for (int mu = 0 ; mu < Ndim; mu++){ | ||||
| 		directions[mu]         = mu; directions[mu+Ndim]    = mu; | ||||
| 		displacements[mu]      =  1; displacements[mu+Ndim] = -1; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     virtual std::string LogParameters() { | ||||
|       std::stringstream sstream; | ||||
|       sstream << GridLogMessage << "[ScalarAction] lambda      : " << lambda      << std::endl; | ||||
|       sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; | ||||
|       return sstream.str(); | ||||
|        | ||||
|     } | ||||
|      | ||||
|     virtual std::string action_name(){return "ScalarAction";} | ||||
|      | ||||
|     virtual void refresh(const Field &U, | ||||
| 			 GridParallelRNG &pRNG){};  // noop as no pseudoferms | ||||
|      | ||||
|  | ||||
|     virtual std::string action_name() {return "ScalarAction";} | ||||
|  | ||||
|     virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} | ||||
|  | ||||
|     virtual RealD S(const Field &p) { | ||||
|       return (mass_square * 0.5 + QCD::Nd) * ScalarObs<Impl>::sumphisquared(p) + | ||||
| 	(lambda / 24.) * ScalarObs<Impl>::sumphifourth(p) + | ||||
| 	ScalarObs<Impl>::sumphider(p); | ||||
|       assert(p._grid->Nd() == Ndim); | ||||
|       static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); | ||||
|       phiStencil.HaloExchange(p, compressor); | ||||
|       Field action(p._grid), pshift(p._grid), phisquared(p._grid); | ||||
|       phisquared = p*p; | ||||
|       action = (2.0*Ndim + mass_square)*phisquared - lambda/24.*phisquared*phisquared; | ||||
|       for (int mu = 0; mu < Ndim; mu++) { | ||||
| 	//  pshift = Cshift(p, mu, +1);  // not efficient, implement with stencils | ||||
| 	parallel_for (int i = 0; i < p._grid->oSites(); i++) { | ||||
| 	  int permute_type; | ||||
| 	  StencilEntry *SE; | ||||
| 	  vobj temp2; | ||||
| 	  const vobj *temp, *t_p; | ||||
| 	     | ||||
| 	  SE = phiStencil.GetEntry(permute_type, mu, i); | ||||
| 	  t_p  = &p._odata[i]; | ||||
| 	  if ( SE->_is_local ) { | ||||
| 	    temp = &p._odata[SE->_offset]; | ||||
| 	    if ( SE->_permute ) { | ||||
| 	      permute(temp2, *temp, permute_type); | ||||
| 	      action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2; | ||||
| 	    } else { | ||||
| 	      action._odata[i] -= (*temp)*(*t_p) + (*t_p)*(*temp); | ||||
| 	    } | ||||
| 	  } else { | ||||
| 	    action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset]; | ||||
| 	  } | ||||
| 	} | ||||
| 	//  action -= pshift*p + p*pshift; | ||||
|       } | ||||
|       // NB the trace in the algebra is normalised to 1/2 | ||||
|       // minus sign coming from the antihermitian fields | ||||
|       return -(TensorRemove(sum(trace(action)))).real(); | ||||
|     }; | ||||
|      | ||||
|     virtual void deriv(const Field &p, | ||||
| 		       Field &force) { | ||||
|       Field tmp(p._grid); | ||||
|       Field p2(p._grid); | ||||
|       ScalarObs<Impl>::phisquared(p2, p); | ||||
|       tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); | ||||
|       for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); | ||||
|  | ||||
|     virtual void deriv(const Field &p, Field &force) { | ||||
|       assert(p._grid->Nd() == Ndim); | ||||
|       force = (2.0*Ndim + mass_square)*p - lambda/12.*p*p*p; | ||||
|       // move this outside | ||||
|       static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); | ||||
|       phiStencil.HaloExchange(p, compressor); | ||||
|        | ||||
|       force=+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; | ||||
|     }; | ||||
|       //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); | ||||
|       for (int point = 0; point < npoint; point++) { | ||||
| 	parallel_for (int i = 0; i < p._grid->oSites(); i++) { | ||||
| 	  const vobj *temp; | ||||
| 	  vobj temp2; | ||||
| 	  int permute_type; | ||||
| 	  StencilEntry *SE; | ||||
| 	  SE = phiStencil.GetEntry(permute_type, point, i); | ||||
| 	   | ||||
| 	  if ( SE->_is_local ) { | ||||
| 	    temp = &p._odata[SE->_offset]; | ||||
| 	    if ( SE->_permute ) { | ||||
| 	      permute(temp2, *temp, permute_type); | ||||
| 	      force._odata[i] -= temp2; | ||||
| 	    } else { | ||||
| 	      force._odata[i] -= *temp; | ||||
| 	    } | ||||
| 	  } else { | ||||
| 	    force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|   }; | ||||
|    | ||||
| } // Grid | ||||
| }  // namespace Grid | ||||
|  | ||||
| #endif // SCALAR_ACTION_H | ||||
| #endif  // SCALAR_INT_ACTION_H | ||||
|   | ||||
| @@ -207,6 +207,12 @@ using GenericHMCRunnerTemplate = HMCWrapperTemplate<Implementation, Integrator, | ||||
| typedef HMCWrapperTemplate<ScalarImplR, MinimumNorm2, ScalarFields> | ||||
|     ScalarGenericHMCRunner; | ||||
|  | ||||
| typedef HMCWrapperTemplate<ScalarAdjImplR, MinimumNorm2, ScalarMatrixFields> | ||||
|     ScalarAdjGenericHMCRunner; | ||||
|  | ||||
| template <int Colours>  | ||||
| using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR<Colours>, MinimumNorm2, ScalarNxNMatrixFields<Colours> >; | ||||
|  | ||||
| }  // namespace QCD | ||||
| }  // namespace Grid | ||||
|  | ||||
|   | ||||
| @@ -76,7 +76,7 @@ struct HMCparameters: Serializable { | ||||
|  | ||||
|   template < class ReaderClass >  | ||||
|   void initialize(Reader<ReaderClass> &TheReader){ | ||||
|   	std::cout << "Reading HMC\n"; | ||||
|   	std::cout << GridLogMessage << "Reading HMC\n"; | ||||
|   	read(TheReader, "HMC", *this); | ||||
|   } | ||||
|  | ||||
|   | ||||
| @@ -165,7 +165,7 @@ class HMCResourceManager { | ||||
|   // Grids | ||||
|   ////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   void AddGrid(std::string s, GridModule& M) { | ||||
|   void AddGrid(const std::string s, GridModule& M) { | ||||
|     // Check for name clashes | ||||
|     auto search = Grids.find(s); | ||||
|     if (search != Grids.end()) { | ||||
| @@ -174,14 +174,24 @@ class HMCResourceManager { | ||||
|       exit(1); | ||||
|     } | ||||
|     Grids[s] = std::move(M); | ||||
|     std::cout << GridLogMessage << "::::::::::::::::::::::::::::::::::::::::" <<std::endl; | ||||
|     std::cout << GridLogMessage << "HMCResourceManager:" << std::endl; | ||||
|     std::cout << GridLogMessage << "Created grid set with name '" << s << "' and decomposition for the full cartesian " << std::endl; | ||||
|     Grids[s].show_full_decomposition(); | ||||
|     std::cout << GridLogMessage << "::::::::::::::::::::::::::::::::::::::::" <<std::endl; | ||||
|   } | ||||
|  | ||||
|   // Add a named grid set, 4d shortcut | ||||
|   void AddFourDimGrid(std::string s) { | ||||
|   void AddFourDimGrid(const std::string s) { | ||||
|     GridFourDimModule<vComplex> Mod; | ||||
|     AddGrid(s, Mod); | ||||
|   } | ||||
|  | ||||
|   // Add a named grid set, 4d shortcut + tweak simd lanes | ||||
|   void AddFourDimGrid(const std::string s, const std::vector<int> simd_decomposition) { | ||||
|     GridFourDimModule<vComplex> Mod(simd_decomposition); | ||||
|     AddGrid(s, Mod); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   GridCartesian* GetCartesian(std::string s = "") { | ||||
| @@ -253,6 +263,7 @@ class HMCResourceManager { | ||||
|   template<class T, class... Types> | ||||
|   void AddObservable(Types&&... Args){ | ||||
|     ObservablesList.push_back(std::unique_ptr<T>(new T(std::forward<Types>(Args)...))); | ||||
|     ObservablesList.back()->print_parameters(); | ||||
|   } | ||||
|  | ||||
|   std::vector<HmcObservable<typename ImplementationPolicy::Field>* > GetObservables(){ | ||||
| @@ -297,4 +308,4 @@ private: | ||||
| } | ||||
| } | ||||
|  | ||||
| #endif  // HMC_RESOURCE_MANAGER_H | ||||
| #endif  // HMC_RESOURCE_MANAGER_H | ||||
|   | ||||
| @@ -33,28 +33,29 @@ directory | ||||
| namespace Grid { | ||||
|  | ||||
| // Resources | ||||
| // Modules for grids  | ||||
| // Modules for grids | ||||
|  | ||||
| // Introduce another namespace HMCModules? | ||||
|  | ||||
| class GridModuleParameters: Serializable{    | ||||
| class GridModuleParameters: Serializable{ | ||||
| public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(GridModuleParameters, | ||||
|   std::string, lattice, | ||||
|   std::string, mpi); | ||||
|  | ||||
|   std::vector<int> getLattice(){return strToVec<int>(lattice);} | ||||
|   std::vector<int> getMpi()    {return strToVec<int>(mpi);} | ||||
|   std::vector<int> getLattice() const {return strToVec<int>(lattice);} | ||||
|   std::vector<int> getMpi()     const {return strToVec<int>(mpi);} | ||||
|  | ||||
|   void check(){ | ||||
|     if (getLattice().size() != getMpi().size()) { | ||||
|       std::cout << GridLogError  | ||||
|  | ||||
|   void check() const { | ||||
|     if (getLattice().size() != getMpi().size() ) { | ||||
|       std::cout << GridLogError | ||||
|                 << "Error in GridModuleParameters: lattice and mpi dimensions " | ||||
|                    "do not match" | ||||
|                 << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|   }     | ||||
|   } | ||||
|  | ||||
|   template <class ReaderClass> | ||||
|   GridModuleParameters(Reader<ReaderClass>& Reader, std::string n = "LatticeGrid"):name(n) { | ||||
| @@ -75,51 +76,94 @@ private: | ||||
| // Lower level class | ||||
| class GridModule { | ||||
|  public: | ||||
|   GridCartesian* get_full() {  | ||||
|   GridCartesian* get_full() { | ||||
|     std::cout << GridLogDebug << "Getting cartesian in module"<< std::endl; | ||||
|     return grid_.get(); } | ||||
|   GridRedBlackCartesian* get_rb() {  | ||||
|   GridRedBlackCartesian* get_rb() { | ||||
|     std::cout << GridLogDebug << "Getting rb-cartesian in module"<< std::endl; | ||||
|     return rbgrid_.get(); } | ||||
|  | ||||
|   void set_full(GridCartesian* grid) { grid_.reset(grid); } | ||||
|   void set_rb(GridRedBlackCartesian* rbgrid) { rbgrid_.reset(rbgrid); } | ||||
|   void show_full_decomposition(){ grid_->show_decomposition(); } | ||||
|   void show_rb_decomposition(){ rbgrid_->show_decomposition(); } | ||||
|  | ||||
|  protected: | ||||
|   std::unique_ptr<GridCartesian> grid_; | ||||
|   std::unique_ptr<GridRedBlackCartesian> rbgrid_; | ||||
|    | ||||
|  | ||||
| }; | ||||
|  | ||||
| //////////////////////////////////// | ||||
| // Classes for the user | ||||
| //////////////////////////////////// | ||||
| // Note: the space time grid should be out of the QCD namespace | ||||
| template< class vector_type> | ||||
| class GridFourDimModule : public GridModule { | ||||
|  public: | ||||
|   GridFourDimModule() { | ||||
| template <class vector_type> | ||||
| class GridFourDimModule : public GridModule | ||||
| { | ||||
| public: | ||||
|   GridFourDimModule() | ||||
|   { | ||||
|     using namespace QCD; | ||||
|     set_full(SpaceTimeGrid::makeFourDimGrid( | ||||
|         GridDefaultLatt(), GridDefaultSimd(4, vector_type::Nsimd()), | ||||
|         GridDefaultLatt(),  | ||||
|         GridDefaultSimd(4, vector_type::Nsimd()), | ||||
|         GridDefaultMpi())); | ||||
|     set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); | ||||
|   } | ||||
|  | ||||
|   GridFourDimModule(GridModuleParameters Params) { | ||||
|   GridFourDimModule(const std::vector<int> tweak_simd) | ||||
|   { | ||||
|     using namespace QCD; | ||||
|     if (tweak_simd.size() != 4) | ||||
|     { | ||||
|       std::cout << GridLogError | ||||
|                 << "Error in GridFourDimModule: SIMD size different from 4"  | ||||
|                 << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|  | ||||
|     // Checks that the product agrees with the expectation | ||||
|     int simd_sum = 1; | ||||
|     for (auto &n : tweak_simd) | ||||
|       simd_sum *= n; | ||||
|     std::cout << GridLogDebug << "TweakSIMD: " << tweak_simd << "  Sum: " << simd_sum << std::endl; | ||||
|  | ||||
|     if (simd_sum == vector_type::Nsimd()) | ||||
|     { | ||||
|       set_full(SpaceTimeGrid::makeFourDimGrid( | ||||
|           GridDefaultLatt(),  | ||||
|           tweak_simd,  | ||||
|           GridDefaultMpi())); | ||||
|       set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|       std::cout << GridLogError  | ||||
|                 << "Error in GridFourDimModule: SIMD lanes must sum to "  | ||||
|                 << vector_type::Nsimd()  | ||||
|                 << std::endl; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   GridFourDimModule(const GridModuleParameters Params) | ||||
|   { | ||||
|     using namespace QCD; | ||||
|     Params.check(); | ||||
|     std::vector<int> lattice_v = Params.getLattice(); | ||||
|     std::vector<int> mpi_v = Params.getMpi(); | ||||
|     if (lattice_v.size() == 4) { | ||||
|     if (lattice_v.size() == 4) | ||||
|     { | ||||
|       set_full(SpaceTimeGrid::makeFourDimGrid( | ||||
|           lattice_v, GridDefaultSimd(4, vector_type::Nsimd()), | ||||
|           lattice_v,  | ||||
|           GridDefaultSimd(4, vector_type::Nsimd()), | ||||
|           mpi_v)); | ||||
|       set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); | ||||
|     } else { | ||||
|       std::cout << GridLogError  | ||||
|           << "Error in GridFourDimModule: lattice dimension different from 4" | ||||
|           << std::endl; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|       std::cout << GridLogError | ||||
|                 << "Error in GridFourDimModule: lattice dimension different from 4" | ||||
|                 << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|   } | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user