mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 03:54:33 +00:00 
			
		
		
		
	Compare commits
	
		
			242 Commits
		
	
	
		
			chulwoo-de
			...
			feature/lu
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | d68937654b | ||
|  | 7af9b87318 | ||
|  | 70f386f9c6 | ||
|  | 89cda5971a | ||
|  | c39ec3b607 | ||
|  | 8afcc8fb8b | ||
|  | 1abbe2fd0c | ||
|  | 4fb37ececd | ||
|  | 71eaa7c79e | ||
|  | 811ca45473 | ||
|  | bc1a4d40ba | ||
|  | c8079e6621 | ||
|  | 8b0d171c9a | ||
|  | 1f293b76b4 | ||
|  | 8bbd9ebc27 | ||
|  | 6472b431f0 | ||
|  | bd205a3293 | ||
|  | 496beffa88 | ||
|  | 9b63e97108 | ||
|  | 81f2aeaece | ||
|  | 2d4a45c758 | ||
|  | 0f182f033b | ||
|  | 7240d73184 | ||
|  | 42cd148f5e | ||
|  | 611b5d74ba | ||
|  | b56c9ffa52 | ||
| 70c32fa49b | |||
| 77c8a94dae | |||
|  | 2e453dfbf5 | ||
|  | 4089984431 | ||
| 98439847cf | |||
|  | c78bbd0f8c | ||
| 7ea4b959a4 | |||
| 536e2ff073 | |||
| 798ff34d7e | |||
|  | 04a437c92c | ||
|  | 5c190a1b8c | ||
|  | 15d8f5c88c | ||
|  | c4ac6e7e8f | ||
|  | 510e340e16 | ||
|  | 6ffadca153 | ||
|  | b6597b74e7 | ||
| d2573189d8 | |||
| 65ca174dbb | |||
|  | 0724f7af75 | ||
| 2e74520821 | |||
|  | 6dd75ad9e5 | ||
|  | fda408ee6f | ||
|  | b9c80318a2 | ||
|  | 5df5d52d41 | ||
|  | f76f281e58 | ||
|  | aa20cc8b52 | ||
|  | 0fd179fb33 | ||
|  | f45ef8d114 | ||
|  | fd5614738d | ||
|  | 005dcc51aa | ||
|  | 655c893f86 | ||
|  | 843f5783b4 | ||
|  | 8986c9fedd | ||
|  | c80a1d427c | ||
|  | ae57032500 | ||
|  | f75468728f | ||
|  | 5acd856663 | ||
|  | b0d3e4bb2c | ||
|  | b512ccbee6 | ||
|  | 8c89391c02 | ||
|  | bfac5195b8 | ||
|  | a782ca3238 | ||
|  | 744691097f | ||
|  | ff6da364e8 | ||
| 4d11a6f5f2 | |||
|  | 88be3b39bb | ||
|  | 8a02824e08 | ||
|  | 356e7940fd | ||
|  | 73ce476890 | ||
|  | 29c4ef41de | ||
|  | e423a09974 | ||
|  | 17097a93ec | ||
|  | 94a6373a7f | ||
|  | 4ab7dbfd57 | ||
|  | 90e70790f3 | ||
|  | 9c2e8d5e28 | ||
|  | 147e2025b9 | ||
| 573b8c6020 | |||
| 15218ec57f | |||
| ec68e08dd2 | |||
|  | fc25d2295c | ||
|  | 8dc2cfcedb | ||
| 836f93780c | |||
|  | 5a68715be3 | ||
|  | 32bc7a6ab8 | ||
| b65e72e521 | |||
| d1aaff65e8 | |||
| 93d29bb699 | |||
| 3b376ed54e | |||
| d5c1f614ba | |||
| 2edc24225d | |||
| 629283726b | |||
| 6adb66dd08 | |||
| 5be92bb708 | |||
| f4c049ea6d | |||
| bc092ad30f | |||
| dad642ed1b | |||
| 63ae39abc7 | |||
| 9e5b934d21 | |||
| a7b483d67a | |||
| bb99ce0680 | |||
| 83307df1af | |||
|  | 49b5c49851 | ||
| e9f30cab2c | |||
|  | 089f0ab582 | ||
|  | df6c9f55d1 | ||
|  | b93e18ed50 | ||
|  | 9c77bb69a5 | ||
|  | 27f3ecc833 | ||
|  | f9e90eeb1f | ||
|  | fad5c675eb | ||
|  | 4908b77d46 | ||
|  | f4dd5062d7 | ||
|  | da34d75841 | ||
|  | 980ff18956 | ||
|  | 7edf4c6c04 | ||
|  | 1a6c7204ac | ||
|  | 49310fbab3 | ||
|  | 6049d5ac47 | ||
|  | 35d0d35238 | ||
|  | c0e878705e | ||
|  | 5c0c8efb9e | ||
|  | dfd714e1ef | ||
|  | 79a8ca1a62 | ||
|  | fb45eb2eb2 | ||
|  | a307274c96 | ||
|  | 3f2c44a5fe | ||
|  | 48fb1cdc11 | ||
|  | 8a79e93cc2 | ||
|  | 3493b51879 | ||
|  | de3e79d300 | ||
|  | dd62a61c5c | ||
|  | 8f47d0b5ab | ||
|  | 42af132dab | ||
|  | 9db2c6525d | ||
|  | adbc7c1188 | ||
|  | 9dc345e8e8 | ||
|  | 8b9301a74c | ||
|  | 6f47fbb1e2 | ||
|  | a9ae30f868 | ||
|  | a3c0fb79b6 | ||
|  | 62601bb649 | ||
|  | ef97e32152 | ||
|  | daea5297ee | ||
|  | 5028969d4b | ||
|  | c667d9fdcc | ||
|  | 7dbb94bab2 | ||
|  | 236dcc820b | ||
|  | a42a441a6a | ||
|  | a0676beeb1 | ||
|  | c5106d0c03 | ||
|  | fbf96b1bbb | ||
|  | 3c49ddfaa4 | ||
|  | ffb8b3116c | ||
|  | 290493e162 | ||
|  | dd8cfff111 | ||
|  | 184642adb0 | ||
|  | 4774a3bcd2 | ||
|  | 25fafa9a89 | ||
|  | 713520d3d2 | ||
|  | 85ed8175cb | ||
|  | df5c788ef2 | ||
|  | 15f22425c8 | ||
|  | e87182cf98 | ||
|  | e3d5319470 | ||
|  | ffedeb1c58 | ||
|  | 3e3b367aa9 | ||
|  | 3e80947c2b | ||
|  | fdfbf11c6d | ||
|  | 9cb90f714e | ||
|  | 6ce174cd60 | ||
|  | 17ca5240f7 | ||
|  | 2daffdf95d | ||
|  | 149f826601 | ||
|  | cd8ee27080 | ||
|  | 0fa66e8f3c | ||
|  | 8dd099267d | ||
|  | 1a6d65c6a4 | ||
|  | fc4a043663 | ||
|  | 61ba50665e | ||
|  | bfe14000a9 | ||
|  | 092fa0d8da | ||
|  | 1ceff48133 | ||
|  | 680645f849 | ||
|  | 3fc6e03ad1 | ||
|  | 2d6614f3a1 | ||
|  | 4e041b5103 | ||
|  | 712b9a3489 | ||
|  | bdaa5b1767 | ||
|  | 8fcefc021a | ||
|  | 1445189361 | ||
|  | 05c884a62a | ||
|  | a25bec87d9 | ||
|  | 2d8bb4c594 | ||
|  | 51cb2d4328 | ||
|  | 6d58cb2a68 | ||
|  | c8b35d960c | ||
|  | 532f41dd61 | ||
|  | 661b0ab45d | ||
|  | 565e9329ba | ||
|  | 4bc08ed995 | ||
|  | b2933a0557 | ||
|  | db057cc276 | ||
|  | 22e88eaf54 | ||
|  | 09fe3caebd | ||
|  | 5e02392f9c | ||
|  | 17a8f51a9b | ||
|  | 1b7f88dd00 | ||
| d6737e4bd8 | |||
| d539888e57 | |||
|  | 86187d7cca | ||
|  | 87418e7df1 | ||
|  | 55f65b81b5 | ||
|  | d9408893b3 | ||
|  | 05acc22920 | ||
|  | 8ac021de73 | ||
|  | e503ef5590 | ||
|  | a7682b0060 | ||
|  | d4c9d71fc8 | ||
|  | 786ca52c43 | ||
|  | 048ac04abc | ||
|  | f78d89bcbe | ||
|  | 53d06046b0 | ||
|  | 5d3a1a025d | ||
|  | 139cc5f1ae | ||
| 1c0e922585 | |||
| 9d5f693cbe | |||
|  | 339be37dba | ||
|  | a87b744621 | ||
| 97d0d56bcb | |||
| 7c7ea35ffb | |||
| 4b1cf580e0 | |||
| 2d8bb356e3 | |||
| a7251f28c7 | |||
|  | c1b1b89d17 | ||
|  | 771235017d | 
							
								
								
									
										30
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -5,7 +5,6 @@ | ||||
| *.o | ||||
| *.obj | ||||
|  | ||||
|  | ||||
| # Editor files # | ||||
| ################ | ||||
| *~ | ||||
| @@ -48,6 +47,7 @@ Config.h.in | ||||
| config.log | ||||
| config.status | ||||
| .deps | ||||
| *.inc | ||||
|  | ||||
| # http://www.gnu.org/software/autoconf # | ||||
| ######################################## | ||||
| @@ -62,19 +62,8 @@ stamp-h1 | ||||
| config.sub | ||||
| config.guess | ||||
| INSTALL | ||||
|  | ||||
| # Packages # | ||||
| ############ | ||||
| # it's better to unpack these files and commit the raw source | ||||
| # git has its own built in compression methods | ||||
| *.7z | ||||
| *.dmg | ||||
| *.gz | ||||
| *.iso | ||||
| *.jar | ||||
| *.rar | ||||
| *.tar | ||||
| *.zip | ||||
| .dirstamp | ||||
| ltmain.sh | ||||
|   | ||||
| # Logs and databases # | ||||
| ###################### | ||||
| @@ -100,3 +89,16 @@ build*/* | ||||
| ##################### | ||||
| *.xcodeproj/* | ||||
| build.sh | ||||
|  | ||||
| # Eigen source # | ||||
| ################ | ||||
| lib/Eigen/* | ||||
|  | ||||
| # FFTW source # | ||||
| ################ | ||||
| lib/fftw/* | ||||
|  | ||||
| # libtool macros # | ||||
| ################## | ||||
| m4/lt* | ||||
| m4/libtool.m4 | ||||
							
								
								
									
										51
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										51
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -1,14 +1,14 @@ | ||||
| language: cpp | ||||
|  | ||||
| cache: | ||||
|   directories: | ||||
|     - clang | ||||
|  | ||||
| matrix: | ||||
|   include: | ||||
|     - os:        osx | ||||
|       osx_image: xcode7.2 | ||||
|       compiler: clang | ||||
|     - os:        osx | ||||
|       osx_image: xcode7.2 | ||||
|       compiler: gcc | ||||
|       env: VERSION=-5 | ||||
|     - compiler: gcc | ||||
|       addons: | ||||
|         apt: | ||||
| @@ -19,6 +19,8 @@ matrix: | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-4.9 | ||||
|     - compiler: gcc | ||||
| @@ -31,6 +33,8 @@ matrix: | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-5 | ||||
|     - compiler: clang | ||||
| @@ -38,42 +42,65 @@ matrix: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|             - llvm-toolchain-precise-3.7 | ||||
|           packages: | ||||
|             - clang-3.7 | ||||
|             - g++-4.8 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-3.7 | ||||
|       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||
|     - compiler: clang | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|             - llvm-toolchain-precise-3.8 | ||||
|           packages: | ||||
|             - clang-3.8 | ||||
|             - g++-4.8 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-3.8 | ||||
|       env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||
|        | ||||
| before_install: | ||||
|     - export GRIDDIR=`pwd` | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi | ||||
|      | ||||
| install: | ||||
|     - export CC=$CC$VERSION | ||||
|     - export CXX=$CXX$VERSION | ||||
|     - echo $PATH | ||||
|     - which $CC | ||||
|     - $CC  --version | ||||
|     - which $CXX | ||||
|     - $CXX --version | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi | ||||
|      | ||||
| script: | ||||
|     - ./scripts/reconfigure_script | ||||
|     - ./bootstrap.sh | ||||
|     - mkdir build | ||||
|     - cd build | ||||
|     - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none | ||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none | ||||
|     - make -j4  | ||||
|     - ./benchmarks/Benchmark_dwf --threads 1 | ||||
|     - echo make clean | ||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||
|     - make -j4 | ||||
|     - ./benchmarks/Benchmark_dwf --threads 1 | ||||
|     - echo make clean | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi | ||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto | ||||
|     - make -j4 | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| # additional include paths necessary to compile the C++ library | ||||
| AM_CXXFLAGS = -I$(top_srcdir)/ | ||||
| SUBDIRS = lib tests benchmarks | ||||
| SUBDIRS = lib benchmarks tests | ||||
|  | ||||
| filelist: $(SUBDIRS) | ||||
| AM_CXXFLAGS += -I$(top_builddir)/include | ||||
| ACLOCAL_AMFLAGS = -I m4 | ||||
|   | ||||
							
								
								
									
										110
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										110
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,8 +1,28 @@ | ||||
| # Grid [](https://travis-ci.org/paboyle/Grid) | ||||
| Data parallel C++ mathematical object library | ||||
| # Grid | ||||
| <table> | ||||
| <tr> | ||||
|     <td>Last stable release</td> | ||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a> | ||||
|     </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td>Development branch</td> | ||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a> | ||||
|     </td> | ||||
| </tr> | ||||
| </table> | ||||
|  | ||||
| Last update 2015/7/30 | ||||
| **Data parallel C++ mathematical object library.** | ||||
|  | ||||
| Please send all pull requests to the `develop` branch. | ||||
|  | ||||
| License: GPL v2. | ||||
|  | ||||
| Last update 2016/08/03. | ||||
|  | ||||
| ### Description | ||||
| This library provides data parallel C++ container classes with internal memory layout | ||||
| that is transformed to map efficiently to SIMD architectures. CSHIFT facilities | ||||
| are provided, similar to HPF and cmfortran, and user control is given over the mapping of | ||||
| @@ -22,37 +42,75 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi | ||||
| for most programmers. | ||||
|  | ||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||
| Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way). | ||||
| Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way). | ||||
|  | ||||
| These are presented as  | ||||
|  | ||||
|      vRealF, vRealD, vComplexF, vComplexD  | ||||
|  | ||||
| internal vector data types. These may be useful in themselves for other programmers. | ||||
| The corresponding scalar types are named | ||||
|  | ||||
|      RealF, RealD, ComplexF, ComplexD | ||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. | ||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||
|  | ||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | ||||
| Please see https://arxiv.org/abs/1512.03487 for more detail. | ||||
|  | ||||
|    You can give `configure' initial values for configuration parameters | ||||
| by setting variables in the command line or in the environment.  Here | ||||
| are examples: | ||||
| ### Installation | ||||
| First, start by cloning the repository: | ||||
|  | ||||
|      ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4 | ||||
| ``` bash | ||||
| git clone https://github.com/paboyle/Grid.git | ||||
| ``` | ||||
|  | ||||
|      ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX | ||||
| Then enter the cloned directory and set up the build system: | ||||
|  | ||||
|      ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2 | ||||
| ``` bash | ||||
| cd Grid | ||||
| ./bootstrap.sh | ||||
| ``` | ||||
|  | ||||
|      ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none | ||||
|       | ||||
| Note: Before running configure it could be necessary to execute the script  | ||||
|         | ||||
|        script/filelist | ||||
| Now you can execute the `configure` script to generate makefiles (here from a build directory): | ||||
|  | ||||
| ``` bash | ||||
| mkdir build; cd build | ||||
| ../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path> | ||||
| ``` | ||||
|  | ||||
|       | ||||
| For developers: | ||||
| Use reconfigure_script in the scripts/ directory to create the autotools environment  | ||||
| where `--enable-precision=` set the default precision (`single` or `double`), | ||||
| `--enable-simd=` set the SIMD type (see possible values below), `--enable- | ||||
| comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or | ||||
| `shmem`), and `<path>` should be replaced by the prefix path where you want to | ||||
| install Grid. The `mpi-auto` communication option set `configure` to determine | ||||
| automatically how to link to MPI. Other options are available, use `configure | ||||
| --help` to display them. Like with any other program using GNU autotool, the | ||||
| `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | ||||
| customise the build. | ||||
|  | ||||
| Finally, you can build and install Grid: | ||||
|  | ||||
| ``` bash | ||||
| make; make install | ||||
| ``` | ||||
|  | ||||
| To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | ||||
|  | ||||
| ``` bash | ||||
| make -C tests/<subdir> tests | ||||
| ``` | ||||
|  | ||||
| ### Possible SIMD types | ||||
|  | ||||
| The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets: | ||||
|  | ||||
| | String      | Description                            | | ||||
| | ----------- | -------------------------------------- | | ||||
| | `GEN`       | generic portable vector code           | | ||||
| | `SSE4`      | SSE 4.2 (128 bit)                      | | ||||
| | `AVX`       | AVX (256 bit)                          | | ||||
| | `AVXFMA4`   | AVX (256 bit) + FMA                    | | ||||
| | `AVX2`      | AVX 2 (256 bit)                        | | ||||
| | `AVX512`    | AVX 512 bit                            | | ||||
| | `AVX512MIC` | AVX 512 bit for Intel MIC architecture | | ||||
| | `ICMI`      | Intel ICMI instructions (512 bit)      | | ||||
|  | ||||
| Alternatively, some CPU codenames can be directly used: | ||||
|  | ||||
| | String      | Description                            | | ||||
| | ----------- | -------------------------------------- | | ||||
| | `KNC`       | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | | ||||
| | `KNL`       | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | ||||
							
								
								
									
										4
									
								
								VERSION
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								VERSION
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| Version : 0.5.0 | ||||
|  | ||||
| - AVX512, AVX2, AVX, SSE good | ||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above | ||||
| @@ -25,7 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| @@ -194,7 +194,128 @@ int main (int argc, char ** argv) | ||||
|     } | ||||
|   }   | ||||
|  | ||||
| #if 0 | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|  | ||||
|  | ||||
|   for(int lat=4;lat<=32;lat+=2){ | ||||
|     for(int Ls=1;Ls<=16;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|  | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|  | ||||
|       std::vector<CartesianCommunicator::CommsRequest_t> empty; | ||||
|       std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_fwd(Nd,empty); | ||||
|       std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_bwd(Nd,empty); | ||||
|  | ||||
|       for(int mu=0;mu<4;mu++){ | ||||
| 	ncomm=0; | ||||
| 	if (mpi_layout[mu]>1 ) { | ||||
| 	  ncomm++; | ||||
|  | ||||
| 	  int comm_proc; | ||||
| 	  int xmit_to_rank; | ||||
| 	  int recv_from_rank; | ||||
|  | ||||
| 	  comm_proc=1; | ||||
| 	  Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	  Grid.SendToRecvFromInit(requests_fwd[mu], | ||||
| 				  (void *)&xbuf[mu][0], | ||||
| 				  xmit_to_rank, | ||||
| 				  (void *)&rbuf[mu][0], | ||||
| 				  recv_from_rank, | ||||
| 				  bytes); | ||||
|  | ||||
| 	  comm_proc = mpi_layout[mu]-1; | ||||
| 	  Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	  Grid.SendToRecvFromInit(requests_bwd[mu], | ||||
| 				  (void *)&xbuf[mu+4][0], | ||||
| 				  xmit_to_rank, | ||||
| 				  (void *)&rbuf[mu+4][0], | ||||
| 				  recv_from_rank, | ||||
| 				  bytes); | ||||
|  | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       { | ||||
| 	double start=usecond(); | ||||
| 	for(int i=0;i<Nloop;i++){ | ||||
| 	   | ||||
| 	  for(int mu=0;mu<4;mu++){ | ||||
| 	     | ||||
| 	    if (mpi_layout[mu]>1 ) { | ||||
| 	       | ||||
| 	      Grid.SendToRecvFromBegin(requests_fwd[mu]); | ||||
| 	      Grid.SendToRecvFromComplete(requests_fwd[mu]); | ||||
| 	      Grid.SendToRecvFromBegin(requests_bwd[mu]); | ||||
| 	      Grid.SendToRecvFromComplete(requests_bwd[mu]); | ||||
| 	    } | ||||
| 	  } | ||||
| 	  Grid.Barrier(); | ||||
| 	} | ||||
| 	 | ||||
| 	double stop=usecond(); | ||||
| 	 | ||||
| 	double dbytes    = bytes; | ||||
| 	double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
| 	double rbytes    = xbytes; | ||||
| 	double bidibytes = xbytes+rbytes; | ||||
| 	 | ||||
| 	double time = stop-start; | ||||
| 	 | ||||
| 	std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|  | ||||
|       } | ||||
|  | ||||
|  | ||||
|       { | ||||
| 	double start=usecond(); | ||||
| 	for(int i=0;i<Nloop;i++){ | ||||
| 	   | ||||
| 	  for(int mu=0;mu<4;mu++){ | ||||
| 	     | ||||
| 	    if (mpi_layout[mu]>1 ) { | ||||
| 	       | ||||
| 	      Grid.SendToRecvFromBegin(requests_fwd[mu]); | ||||
| 	      Grid.SendToRecvFromBegin(requests_bwd[mu]); | ||||
| 	      Grid.SendToRecvFromComplete(requests_fwd[mu]); | ||||
| 	      Grid.SendToRecvFromComplete(requests_bwd[mu]); | ||||
| 	    } | ||||
| 	  } | ||||
| 	  Grid.Barrier(); | ||||
| 	} | ||||
| 	 | ||||
| 	double stop=usecond(); | ||||
| 	 | ||||
| 	double dbytes    = bytes; | ||||
| 	double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
| 	double rbytes    = xbytes; | ||||
| 	double bidibytes = xbytes+rbytes; | ||||
| 	 | ||||
| 	double time = stop-start; | ||||
| 	 | ||||
| 	std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|  | ||||
|       } | ||||
|  | ||||
|     } | ||||
|   } | ||||
|  | ||||
| #endif | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|   | ||||
| @@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| @@ -45,6 +45,10 @@ struct scal { | ||||
|   }; | ||||
|  | ||||
| bool overlapComms = false; | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
| @@ -58,12 +62,18 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   std::vector<int> latt4 = GridDefaultLatt(); | ||||
|   const int Ls=8; | ||||
|   const int Ls=16; | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::cout << GridLogMessage << "Making s innermost grids"<<std::endl; | ||||
|   GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); | ||||
|   GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|  | ||||
| @@ -76,9 +86,9 @@ int main (int argc, char ** argv) | ||||
|   LatticeFermion    tmp(FGrid); | ||||
|   LatticeFermion    err(FGrid); | ||||
|  | ||||
|   ColourMatrix cm = Complex(1.0,0.0); | ||||
|   LatticeGaugeField Umu(UGrid);  | ||||
|   random(RNG4,Umu); | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); random(RNG4,Umu); | ||||
|   LatticeGaugeField Umu5d(FGrid);  | ||||
|  | ||||
|   // replicate across fifth dimension | ||||
| @@ -114,19 +124,25 @@ int main (int argc, char ** argv) | ||||
|   RealD mass=0.1; | ||||
|   RealD M5  =1.8; | ||||
|  | ||||
|   typename DomainWallFermionR::ImplParams params;  | ||||
|   params.overlapCommsCompute = overlapComms; | ||||
|    | ||||
|   RealD NP = UGrid->_Nprocessors; | ||||
|  | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); | ||||
|   for(int doasm=1;doasm<2;doasm++){ | ||||
|  | ||||
|     QCD::WilsonKernelsStatic::AsmOpt=doasm; | ||||
|  | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|    | ||||
|   std::cout<<GridLogMessage << "Calling Dw"<<std::endl; | ||||
|   int ncall=100; | ||||
|   { | ||||
|   std::cout<<GridLogMessage << "Naive wilson implementation "<<std::endl; | ||||
|   std::cout << GridLogMessage<< "Calling Dw"<<std::endl; | ||||
|   int ncall =100; | ||||
|   if (1) { | ||||
|  | ||||
|     Dw.ZeroCounters(); | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       __SSC_START; | ||||
|       Dw.Dhop(src,result,0); | ||||
|       __SSC_STOP; | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|      | ||||
| @@ -143,7 +159,127 @@ int main (int argc, char ** argv) | ||||
|     Dw.Report(); | ||||
|   } | ||||
|  | ||||
|   exit(0); | ||||
|   if (1) | ||||
|   { | ||||
|     typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; | ||||
|     LatticeFermion ssrc(sFGrid); | ||||
|     LatticeFermion sref(sFGrid); | ||||
|     LatticeFermion sresult(sFGrid); | ||||
|  | ||||
|     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); | ||||
|    | ||||
|     for(int x=0;x<latt4[0];x++){ | ||||
|     for(int y=0;y<latt4[1];y++){ | ||||
|     for(int z=0;z<latt4[2];z++){ | ||||
|     for(int t=0;t<latt4[3];t++){ | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       std::vector<int> site({s,x,y,z,t}); | ||||
|       SpinColourVector tmp; | ||||
|       peekSite(tmp,src,site); | ||||
|       pokeSite(tmp,ssrc,site); | ||||
|     }}}}} | ||||
|     std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl; | ||||
|     double t0=usecond(); | ||||
|     sDw.ZeroCounters(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       __SSC_START; | ||||
|       sDw.Dhop(ssrc,sresult,0); | ||||
|       __SSC_STOP; | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=1344*volume*ncall; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     sDw.Report(); | ||||
|    | ||||
|     if(0){ | ||||
|       for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ | ||||
|   sDw.Dhop(ssrc,sresult,0); | ||||
|   PerformanceCounter Counter(i); | ||||
|   Counter.Start(); | ||||
|   sDw.Dhop(ssrc,sresult,0); | ||||
|   Counter.Stop(); | ||||
|   Counter.Report(); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; | ||||
|  | ||||
|  | ||||
|     RealF sum=0; | ||||
|     for(int x=0;x<latt4[0];x++){ | ||||
|     for(int y=0;y<latt4[1];y++){ | ||||
|     for(int z=0;z<latt4[2];z++){ | ||||
|     for(int t=0;t<latt4[3];t++){ | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       std::vector<int> site({s,x,y,z,t}); | ||||
|       SpinColourVector normal, simd; | ||||
|       peekSite(normal,result,site); | ||||
|       peekSite(simd,sresult,site); | ||||
|       sum=sum+norm2(normal-simd); | ||||
|       if (norm2(normal-simd) > 1.0e-6 ) { | ||||
|   std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl; | ||||
|   std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl; | ||||
|   std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd   "<<simd<<std::endl; | ||||
|       } | ||||
|     }}}}} | ||||
|     std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl; | ||||
|  | ||||
|  | ||||
|     if (1) { | ||||
|  | ||||
|       LatticeFermion sr_eo(sFGrid); | ||||
|       LatticeFermion serr(sFGrid); | ||||
|  | ||||
|       LatticeFermion ssrc_e (sFrbGrid); | ||||
|       LatticeFermion ssrc_o (sFrbGrid); | ||||
|       LatticeFermion sr_e   (sFrbGrid); | ||||
|       LatticeFermion sr_o   (sFrbGrid); | ||||
|  | ||||
|       pickCheckerboard(Even,ssrc_e,ssrc); | ||||
|       pickCheckerboard(Odd,ssrc_o,ssrc); | ||||
|  | ||||
|       setCheckerboard(sr_eo,ssrc_o); | ||||
|       setCheckerboard(sr_eo,ssrc_e); | ||||
|       serr = sr_eo-ssrc;  | ||||
|       std::cout<<GridLogMessage << "EO src norm diff   "<< norm2(serr)<<std::endl; | ||||
|  | ||||
|       sr_e = zero; | ||||
|       sr_o = zero; | ||||
|  | ||||
|       sDw.ZeroCounters(); | ||||
|       sDw.stat.init("DhopEO"); | ||||
|       double t0=usecond(); | ||||
|       for (int i = 0; i < ncall; i++) { | ||||
|         sDw.DhopEO(ssrc_o, sr_e, DaggerNo); | ||||
|       } | ||||
|       double t1=usecond(); | ||||
|       sDw.stat.print(); | ||||
|  | ||||
|       double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|       double flops=(1344.0*volume*ncall)/2; | ||||
|  | ||||
|       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per node   "<< flops/(t1-t0)/NP<<std::endl; | ||||
|       sDw.Report(); | ||||
|  | ||||
|       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||
|       sDw.DhopOE(ssrc_e,sr_o,DaggerNo); | ||||
|       sDw.Dhop  (ssrc  ,sresult,DaggerNo); | ||||
|  | ||||
|       pickCheckerboard(Even,ssrc_e,sresult); | ||||
|       pickCheckerboard(Odd ,ssrc_o,sresult); | ||||
|       ssrc_e = ssrc_e - sr_e; | ||||
|       std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<< "  vec nrm"<<norm2(sr_e) <<std::endl; | ||||
|       ssrc_o = ssrc_o - sr_o; | ||||
|       std::cout<<GridLogMessage << "sO norm diff   "<< norm2(ssrc_o)<< "  vec nrm"<<norm2(sr_o) <<std::endl; | ||||
|     } | ||||
|  | ||||
|  | ||||
|   } | ||||
|  | ||||
|   if (1) | ||||
|   { // Naive wilson dag implementation | ||||
| @@ -153,18 +289,19 @@ int main (int argc, char ** argv) | ||||
|       //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x | ||||
|       tmp = U[mu]*Cshift(src,mu+1,1); | ||||
|       for(int i=0;i<ref._odata.size();i++){ | ||||
| 	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ; | ||||
|   ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ; | ||||
|       } | ||||
|  | ||||
|       tmp =adj(U[mu])*src; | ||||
|       tmp =Cshift(tmp,mu+1,-1); | ||||
|       for(int i=0;i<ref._odata.size();i++){ | ||||
| 	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ; | ||||
|   ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ; | ||||
|       } | ||||
|     } | ||||
|     ref = -0.5*ref; | ||||
|   } | ||||
|   Dw.Dhop(src,result,1); | ||||
|   std::cout << GridLogMessage << "Naive wilson implementation Dag" << std::endl; | ||||
|   std::cout<<GridLogMessage << "Called DwDag"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; | ||||
|   std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||
| @@ -186,6 +323,7 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl; | ||||
|  | ||||
|   { | ||||
|     Dw.ZeroCounters(); | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| @@ -197,6 +335,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     Dw.Report(); | ||||
|   } | ||||
|   Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
|   Dw.DhopOE(src_e,r_o,DaggerNo); | ||||
| @@ -217,5 +356,8 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; | ||||
|   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; | ||||
|  | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|   | ||||
							
								
								
									
										153
									
								
								benchmarks/Benchmark_dwf_ntpf.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								benchmarks/Benchmark_dwf_ntpf.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,153 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./benchmarks/Benchmark_dwf.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
|   Gamma::GammaMatrix Gmu [] = { | ||||
|     Gamma::GammaX, | ||||
|     Gamma::GammaY, | ||||
|     Gamma::GammaZ, | ||||
|     Gamma::GammaT | ||||
|   }; | ||||
|  | ||||
| bool overlapComms = false; | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ | ||||
|     overlapComms = true; | ||||
|   } | ||||
|  | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   std::vector<int> latt4 = GridDefaultLatt(); | ||||
|   const int Ls=16; | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|  | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|  | ||||
|   LatticeFermion src   (FGrid); random(RNG5,src); | ||||
|   LatticeFermion result(FGrid); result=zero; | ||||
|   LatticeFermion    ref(FGrid);    ref=zero; | ||||
|   LatticeFermion    tmp(FGrid); | ||||
|   LatticeFermion    err(FGrid); | ||||
|  | ||||
|   ColourMatrix cm = Complex(1.0,0.0); | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid);  | ||||
|   random(RNG4,Umu); | ||||
|  | ||||
|   LatticeGaugeField Umu5d(FGrid);  | ||||
|  | ||||
|   // replicate across fifth dimension | ||||
|   for(int ss=0;ss<Umu._grid->oSites();ss++){ | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Naive wilson implementation | ||||
|   //////////////////////////////////// | ||||
|   std::vector<LatticeColourMatrix> U(4,FGrid); | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); | ||||
|   } | ||||
|  | ||||
|   if (1) | ||||
|   { | ||||
|     ref = zero; | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|  | ||||
|       tmp = U[mu]*Cshift(src,mu+1,1); | ||||
|       ref=ref + tmp - Gamma(Gmu[mu])*tmp; | ||||
|  | ||||
|       tmp =adj(U[mu])*src; | ||||
|       tmp =Cshift(tmp,mu+1,-1); | ||||
|       ref=ref + tmp + Gamma(Gmu[mu])*tmp; | ||||
|     } | ||||
|     ref = -0.5*ref; | ||||
|   } | ||||
|  | ||||
|   RealD mass=0.1; | ||||
|   RealD M5  =1.8; | ||||
|  | ||||
|   typename DomainWallFermionR::ImplParams params;  | ||||
|   params.overlapCommsCompute = overlapComms; | ||||
|    | ||||
|   RealD NP = UGrid->_Nprocessors; | ||||
|  | ||||
|  | ||||
|   QCD::WilsonKernelsStatic::AsmOpt=1; | ||||
|  | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); | ||||
|    | ||||
|   std::cout<<GridLogMessage << "Calling Dw"<<std::endl; | ||||
|   int ncall =50; | ||||
|   if (1) { | ||||
|  | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       Dw.Dhop(src,result,0); | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|      | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=1344*volume*ncall; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     err = ref-result;  | ||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||
|     //    Dw.Report(); | ||||
|   } | ||||
|   Grid_finalize(); | ||||
| } | ||||
							
								
								
									
										364
									
								
								benchmarks/Benchmark_dwf_sweep.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										364
									
								
								benchmarks/Benchmark_dwf_sweep.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,364 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./benchmarks/Benchmark_dwf.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
|   Gamma::GammaMatrix Gmu [] = { | ||||
|     Gamma::GammaX, | ||||
|     Gamma::GammaY, | ||||
|     Gamma::GammaZ, | ||||
|     Gamma::GammaT | ||||
|   }; | ||||
|  | ||||
| void benchDw(std::vector<int> & L, int Ls, int threads, int report =0 ); | ||||
| void benchsDw(std::vector<int> & L, int Ls, int threads, int report=0 ); | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   const int Ls=8; | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   if ( getenv("ASMOPT") )  { | ||||
|     QCD::WilsonKernelsStatic::AsmOpt=1; | ||||
|   } else {  | ||||
|     QCD::WilsonKernelsStatic::AsmOpt=0; | ||||
|   } | ||||
|  | ||||
|   std::cout<<GridLogMessage << "=========================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "=========================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s)  "<<std::endl; | ||||
|   std::cout<<GridLogMessage << "=========================================================================="<<std::endl; | ||||
|  | ||||
|   int Lmax=32; | ||||
|   int dmin=0; | ||||
|   if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX")); | ||||
|   if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN")); | ||||
|   for (int L=8;L<=Lmax;L*=2){ | ||||
|     std::vector<int> latt4(4,L); | ||||
|     for(int d=4;d>dmin;d--){ | ||||
|       if ( d<=3 ) latt4[d]*=2; | ||||
|       std::cout << GridLogMessage <<"\t"; | ||||
|       for(int d=0;d<Nd;d++){ | ||||
| 	std::cout<<latt4[d]<<"x"; | ||||
|       } | ||||
|       std::cout <<Ls<<"\t" ; | ||||
|       benchDw (latt4,Ls,threads,0); | ||||
|       benchsDw(latt4,Ls,threads,0); | ||||
|       std::cout<<std::endl; | ||||
|     } | ||||
|   } | ||||
|   std::cout<<GridLogMessage << "=========================================================================="<<std::endl; | ||||
|   { | ||||
|     std::vector<int> latt4(4,16); | ||||
|     std::cout<<GridLogMessage << "16^4 Dw miss rate"<<std::endl; | ||||
|     benchDw (latt4,Ls,threads,1); | ||||
|     std::cout<<GridLogMessage << "16^4 sDw miss rate"<<std::endl; | ||||
|     benchsDw(latt4,Ls,threads,1); | ||||
|   } | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|  | ||||
| #undef CHECK | ||||
|  | ||||
| void benchDw(std::vector<int> & latt4, int Ls, int threads,int report ) | ||||
| { | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|  | ||||
| #ifdef CHECK  | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|   LatticeFermion src   (FGrid); random(RNG5,src); | ||||
|   LatticeGaugeField Umu(UGrid);  | ||||
|   random(RNG4,Umu); | ||||
| #else  | ||||
|   LatticeFermion src   (FGrid); src=zero; | ||||
|   LatticeGaugeField Umu(UGrid); Umu=zero; | ||||
| #endif | ||||
|  | ||||
|   LatticeFermion result(FGrid); result=zero; | ||||
|   LatticeFermion    ref(FGrid);    ref=zero; | ||||
|   LatticeFermion    tmp(FGrid); | ||||
|   LatticeFermion    err(FGrid); | ||||
|  | ||||
|   ColourMatrix cm = Complex(1.0,0.0); | ||||
|  | ||||
|   LatticeGaugeField Umu5d(FGrid);  | ||||
|  | ||||
|   // replicate across fifth dimension | ||||
|   for(int ss=0;ss<Umu._grid->oSites();ss++){ | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////// | ||||
|   // Naive wilson implementation | ||||
|   //////////////////////////////////// | ||||
|   std::vector<LatticeColourMatrix> U(4,FGrid); | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); | ||||
|   } | ||||
|  | ||||
| #ifdef CHECK | ||||
|   if (1) { | ||||
|  | ||||
|     ref = zero; | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       tmp = U[mu]*Cshift(src,mu+1,1); | ||||
|       ref=ref + tmp - Gamma(Gmu[mu])*tmp; | ||||
|  | ||||
|       tmp =adj(U[mu])*src; | ||||
|       tmp =Cshift(tmp,mu+1,-1); | ||||
|       ref=ref + tmp + Gamma(Gmu[mu])*tmp; | ||||
|     } | ||||
|     ref = -0.5*ref; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   RealD mass=0.1; | ||||
|   RealD M5  =1.8; | ||||
|   RealD NP = UGrid->_Nprocessors; | ||||
|  | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|    | ||||
|   double t0=usecond(); | ||||
|   Dw.Dhop(src,result,0); | ||||
|   double t1=usecond(); | ||||
|  | ||||
| #ifdef TIMERS_OFF | ||||
|     int ncall =10; | ||||
| #else | ||||
|   int ncall =1+(int) ((5.0*1000*1000)/(t1-t0)); | ||||
| #endif | ||||
|  | ||||
|   if (ncall < 5 ) exit(0); | ||||
|  | ||||
|   Dw.Dhop(src,result,0); | ||||
|  | ||||
|   PerformanceCounter Counter(8); | ||||
|   Counter.Start(); | ||||
|   t0=usecond(); | ||||
|   for(int i=0;i<ncall;i++){ | ||||
|     Dw.Dhop(src,result,0); | ||||
|   } | ||||
|   t1=usecond(); | ||||
|   Counter.Stop(); | ||||
|   if ( report ) { | ||||
|     Counter.Report(); | ||||
|   } | ||||
|    | ||||
|   if ( ! report ) { | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=1344*volume*ncall; | ||||
|     std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t"; | ||||
|   } | ||||
|    | ||||
| #ifdef CHECK | ||||
|   err = ref-result;  | ||||
|   RealD errd = norm2(err); | ||||
|   if ( errd> 1.0e-4 ) { | ||||
|     std::cout<<GridLogMessage << "oops !!! norm diff   "<< norm2(err)<<std::endl; | ||||
|     exit(-1); | ||||
|   } | ||||
| #endif | ||||
|      | ||||
|   LatticeFermion src_e (FrbGrid); | ||||
|   LatticeFermion src_o (FrbGrid); | ||||
|   LatticeFermion r_e   (FrbGrid); | ||||
|   LatticeFermion r_o   (FrbGrid); | ||||
|   LatticeFermion r_eo  (FGrid); | ||||
|    | ||||
|   pickCheckerboard(Even,src_e,src); | ||||
|   pickCheckerboard(Odd,src_o,src); | ||||
|    | ||||
|   { | ||||
|     Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|      | ||||
|     if(!report){ | ||||
|       double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|       double flops=(1344.0*volume*ncall)/2; | ||||
|       std::cout<< flops/(t1-t0); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| #define CHECK_SDW | ||||
| void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report ) | ||||
| { | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); | ||||
|   GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|  | ||||
| #ifdef CHECK_SDW | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|   LatticeFermion src   (FGrid); random(RNG5,src); | ||||
|   LatticeGaugeField Umu(UGrid);  | ||||
|   random(RNG4,Umu); | ||||
| #else  | ||||
|   LatticeFermion src   (FGrid); src=zero; | ||||
|   LatticeGaugeField Umu(UGrid); Umu=zero; | ||||
| #endif | ||||
|  | ||||
|   LatticeFermion result(FGrid); result=zero; | ||||
|   LatticeFermion    ref(FGrid);    ref=zero; | ||||
|   LatticeFermion    tmp(FGrid); | ||||
|   LatticeFermion    err(FGrid); | ||||
|  | ||||
|   ColourMatrix cm = Complex(1.0,0.0); | ||||
|  | ||||
|   LatticeGaugeField Umu5d(FGrid);  | ||||
|  | ||||
|   // replicate across fifth dimension | ||||
|   for(int ss=0;ss<Umu._grid->oSites();ss++){ | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   RealD mass=0.1; | ||||
|   RealD M5  =1.8; | ||||
|  | ||||
|   typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; | ||||
|   LatticeFermion ssrc(sFGrid); | ||||
|   LatticeFermion sref(sFGrid); | ||||
|   LatticeFermion sresult(sFGrid); | ||||
|   WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); | ||||
|    | ||||
|   for(int x=0;x<latt4[0];x++){ | ||||
|   for(int y=0;y<latt4[1];y++){ | ||||
|   for(int z=0;z<latt4[2];z++){ | ||||
|   for(int t=0;t<latt4[3];t++){ | ||||
|   for(int s=0;s<Ls;s++){ | ||||
|     std::vector<int> site({s,x,y,z,t}); | ||||
|     SpinColourVector tmp; | ||||
|     peekSite(tmp,src,site); | ||||
|     pokeSite(tmp,ssrc,site); | ||||
|   }}}}} | ||||
|  | ||||
|   double t0=usecond(); | ||||
|   sDw.Dhop(ssrc,sresult,0); | ||||
|   double t1=usecond(); | ||||
|  | ||||
| #ifdef TIMERS_OFF | ||||
|   int ncall =10; | ||||
| #else  | ||||
|   int ncall =1+(int) ((5.0*1000*1000)/(t1-t0)); | ||||
| #endif | ||||
|  | ||||
|   PerformanceCounter Counter(8); | ||||
|   Counter.Start(); | ||||
|   t0=usecond(); | ||||
|   for(int i=0;i<ncall;i++){ | ||||
|     sDw.Dhop(ssrc,sresult,0); | ||||
|   } | ||||
|   t1=usecond(); | ||||
|   Counter.Stop(); | ||||
|    | ||||
|   if ( report ) { | ||||
|     Counter.Report(); | ||||
|   } else {  | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=1344*volume*ncall; | ||||
|     std::cout<<"\t"<< flops/(t1-t0); | ||||
|   } | ||||
|  | ||||
|   LatticeFermion sr_eo(sFGrid); | ||||
|   LatticeFermion serr(sFGrid); | ||||
|    | ||||
|   LatticeFermion ssrc_e (sFrbGrid); | ||||
|   LatticeFermion ssrc_o (sFrbGrid); | ||||
|   LatticeFermion sr_e   (sFrbGrid); | ||||
|   LatticeFermion sr_o   (sFrbGrid); | ||||
|        | ||||
|   pickCheckerboard(Even,ssrc_e,ssrc); | ||||
|   pickCheckerboard(Odd,ssrc_o,ssrc); | ||||
|    | ||||
|   setCheckerboard(sr_eo,ssrc_o); | ||||
|   setCheckerboard(sr_eo,ssrc_e); | ||||
|      | ||||
|   sr_e = zero; | ||||
|   sr_o = zero; | ||||
|    | ||||
|   sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||
|   PerformanceCounter CounterSdw(8); | ||||
|   CounterSdw.Start(); | ||||
|   t0=usecond(); | ||||
|   for(int i=0;i<ncall;i++){ | ||||
|     __SSC_START; | ||||
|     sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||
|     __SSC_STOP; | ||||
|   } | ||||
|   t1=usecond(); | ||||
|   CounterSdw.Stop(); | ||||
|  | ||||
|   if ( report ) {  | ||||
|     CounterSdw.Report(); | ||||
|   } else { | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=(1344.0*volume*ncall)/2; | ||||
|     std::cout<<"\t"<< flops/(t1-t0); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|   | ||||
| @@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|   | ||||
| @@ -26,7 +26,7 @@ Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|   | ||||
| @@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|   | ||||
							
								
								
									
										117
									
								
								benchmarks/Benchmark_wilson_sweep.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								benchmarks/Benchmark_wilson_sweep.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,117 @@ | ||||
| /************************************************************************************* | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|     Source file: ./benchmarks/Benchmark_wilson.cc | ||||
|     Copyright (C) 2015 | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Richard Rollins <rprollins@users.noreply.github.com> | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
| Gamma::GammaMatrix Gmu [] = { | ||||
|   Gamma::GammaX, | ||||
|   Gamma::GammaY, | ||||
|   Gamma::GammaZ, | ||||
|   Gamma::GammaT | ||||
| }; | ||||
|  | ||||
| bool overlapComms = false; | ||||
|  | ||||
| void bench_wilson ( | ||||
| 		   LatticeFermion &    src, | ||||
| 		   LatticeFermion & result, | ||||
| 		   WilsonFermionR &     Dw, | ||||
| 		   double const     volume, | ||||
| 		   int const           dag ); | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|   if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; } | ||||
|   typename WilsonFermionR::ImplParams params; | ||||
|   params.overlapCommsCompute = overlapComms; | ||||
|  | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
|   RealD mass = 0.1; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "============================================================================="<< std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl; | ||||
|   std::cout<<GridLogMessage << "============================================================================="<< std::endl; | ||||
|   std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl; | ||||
|   std::cout<<GridLogMessage << "============================================================================="<< std::endl; | ||||
|  | ||||
|   int Lmax = 32; | ||||
|   int dmin = 0; | ||||
|   if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX")); | ||||
|   if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN")); | ||||
|   for (int L=8; L<=Lmax; L*=2) | ||||
|     { | ||||
|       std::vector<int> latt_size = std::vector<int>(4,L); | ||||
|       for(int d=4; d>dmin; d--) | ||||
| 	{ | ||||
| 	  if ( d<=3 ) { latt_size[d] *= 2; } | ||||
|  | ||||
| 	  std::cout << GridLogMessage; | ||||
| 	  std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) ); | ||||
| 	  std::cout << latt_size.back() << "\t\t"; | ||||
|  | ||||
| 	  GridCartesian           Grid(latt_size,simd_layout,mpi_layout); | ||||
| 	  GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
| 	  GridParallelRNG  pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); | ||||
| 	  LatticeGaugeField Umu(&Grid); random(pRNG,Umu); | ||||
| 	  LatticeFermion    src(&Grid); random(pRNG,src); | ||||
| 	  LatticeFermion result(&Grid); result=zero; | ||||
|  | ||||
| 	  double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>()); | ||||
|  | ||||
| 	  WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params); | ||||
|        | ||||
| 	  bench_wilson(src,result,Dw,volume,DaggerNo); | ||||
| 	  bench_wilson(src,result,Dw,volume,DaggerYes); | ||||
| 	  std::cout << std::endl; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|   std::cout<<GridLogMessage << "============================================================================="<< std::endl; | ||||
|   Grid_finalize(); | ||||
| } | ||||
|  | ||||
| void bench_wilson ( | ||||
| 		   LatticeFermion &    src, | ||||
| 		   LatticeFermion & result, | ||||
| 		   WilsonFermionR &     Dw, | ||||
| 		   double const     volume, | ||||
| 		   int const           dag ) | ||||
| { | ||||
|   int ncall    = 1000; | ||||
|   double t0    = usecond(); | ||||
|   for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); } | ||||
|   double t1    = usecond(); | ||||
|   double flops = 1344 * volume * ncall; | ||||
|   std::cout << flops/(t1-t0) << "\t\t"; | ||||
| } | ||||
| @@ -25,8 +25,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| #include <PerfCount.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
|  | ||||
| using namespace Grid; | ||||
| @@ -41,14 +40,20 @@ int main(int argc,char **argv) | ||||
|   std::ofstream os("zmm.dat"); | ||||
|  | ||||
|   os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl; | ||||
|   std::cout<<GridLogMessage << "====================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "====================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "====================================================================="<<std::endl; | ||||
|   for(int L=4;L<=32;L+=4){ | ||||
|     for(int m=1;m<=2;m++){ | ||||
|       for(int Ls=8;Ls<=16;Ls+=8){ | ||||
| 	std::vector<int> grid({L,L,m*L,m*L}); | ||||
|   std::cout << GridLogMessage <<"\t"; | ||||
| 	for(int i=0;i<4;i++) {  | ||||
| 	  std::cout << grid[i]<<"x"; | ||||
| 	} | ||||
| 	std::cout << Ls<<std::endl; | ||||
| 	std::cout << Ls<<"\t\t"; | ||||
| 	bench(os,grid,Ls); | ||||
|       } | ||||
|     } | ||||
| @@ -105,7 +110,6 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls) | ||||
|   RealD M5  =1.8; | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|  | ||||
|   std::cout<<GridLogMessage << "Calling Dw"<<std::endl; | ||||
|   int ncall=50; | ||||
|   double t0=usecond(); | ||||
|   for(int i=0;i<ncall;i++){ | ||||
| @@ -117,16 +121,16 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls) | ||||
|   double flops=1344*volume/2; | ||||
|  | ||||
|   mfc = flops*ncall/(t1-t0); | ||||
|   std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s =   "<< mfc<<std::endl; | ||||
|   std::cout<<mfc<<"\t\t"; | ||||
|  | ||||
|   QCD::WilsonFermion5DStatic::AsmOptDslash=1; | ||||
|   QCD::WilsonKernelsStatic::AsmOpt=1; | ||||
|   t0=usecond(); | ||||
|   for(int i=0;i<ncall;i++){ | ||||
|     Dw.DhopOE(srce,resulta,0); | ||||
|   } | ||||
|   t1=usecond(); | ||||
|   mfa = flops*ncall/(t1-t0); | ||||
|   std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s =   "<< mfa<<std::endl; | ||||
|   std::cout<<mfa<<"\t\t"; | ||||
|   /* | ||||
|   int dag=DaggerNo; | ||||
|   t0=usecond(); | ||||
| @@ -164,8 +168,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls) | ||||
|   //resulta = (-0.5) * resulta; | ||||
|  | ||||
|   diff = resulto-resulta; | ||||
|   std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl; | ||||
|   std::cout<<std::endl; | ||||
|   std::cout<<norm2(diff)<<std::endl; | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -1,31 +0,0 @@ | ||||
|  | ||||
| bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson Benchmark_zmm | ||||
|  | ||||
|  | ||||
| Benchmark_comms_SOURCES=Benchmark_comms.cc | ||||
| Benchmark_comms_LDADD=-lGrid | ||||
|  | ||||
|  | ||||
| Benchmark_dwf_SOURCES=Benchmark_dwf.cc | ||||
| Benchmark_dwf_LDADD=-lGrid | ||||
|  | ||||
|  | ||||
| Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc | ||||
| Benchmark_memory_asynch_LDADD=-lGrid | ||||
|  | ||||
|  | ||||
| Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc | ||||
| Benchmark_memory_bandwidth_LDADD=-lGrid | ||||
|  | ||||
|  | ||||
| Benchmark_su3_SOURCES=Benchmark_su3.cc | ||||
| Benchmark_su3_LDADD=-lGrid | ||||
|  | ||||
|  | ||||
| Benchmark_wilson_SOURCES=Benchmark_wilson.cc | ||||
| Benchmark_wilson_LDADD=-lGrid | ||||
|  | ||||
|  | ||||
| Benchmark_zmm_SOURCES=Benchmark_zmm.cc | ||||
| Benchmark_zmm_LDADD=-lGrid | ||||
|  | ||||
| @@ -1,8 +1 @@ | ||||
| # additional include paths necessary to compile the C++ library | ||||
| AM_CXXFLAGS = -I$(top_srcdir)/lib | ||||
| AM_LDFLAGS = -L$(top_builddir)/lib | ||||
|  | ||||
| # | ||||
| # Test code | ||||
| # | ||||
| include Make.inc | ||||
|   | ||||
							
								
								
									
										19
									
								
								bootstrap.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										19
									
								
								bootstrap.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' | ||||
| FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz | ||||
|  | ||||
| echo "-- deploying Eigen source..." | ||||
| wget ${EIGEN_URL} --no-check-certificate | ||||
| ./scripts/update_eigen.sh `basename ${EIGEN_URL}` | ||||
| rm `basename ${EIGEN_URL}` | ||||
|  | ||||
| echo "-- copying fftw prototypes..." | ||||
| wget ${FFTW_URL} | ||||
| ./scripts/update_fftw.sh `basename ${FFTW_URL}` | ||||
| rm `basename ${FFTW_URL}` | ||||
|  | ||||
| echo '-- generating Make.inc files...' | ||||
| ./scripts/filelist | ||||
| echo '-- generating configure script...' | ||||
| autoreconf -fvi | ||||
							
								
								
									
										435
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										435
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -1,287 +1,362 @@ | ||||
| #                         -*- Autoconf -*- | ||||
| # Process this file with autoconf to produce a configure script. | ||||
| # | ||||
| # Project Grid package   | ||||
| #  | ||||
| # Time-stamp: <2015-07-10 17:46:21 neo> | ||||
|  | ||||
| AC_PREREQ([2.63]) | ||||
| AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) | ||||
| AC_CANONICAL_SYSTEM | ||||
| AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid]) | ||||
| AC_CANONICAL_BUILD | ||||
| AC_CANONICAL_HOST | ||||
| AC_CANONICAL_TARGET | ||||
| AM_INIT_AUTOMAKE(subdir-objects) | ||||
| AC_CONFIG_MACRO_DIR([m4]) | ||||
| AC_CONFIG_SRCDIR([lib/Grid.h]) | ||||
| AC_CONFIG_HEADERS([lib/Config.h]) | ||||
| m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||
|  | ||||
| AC_MSG_NOTICE([ | ||||
|  | ||||
| ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: | ||||
| Configuring $PACKAGE v$VERSION  for $host | ||||
| ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: | ||||
| ]) | ||||
|  | ||||
| # Checks for programs. | ||||
| ############### Checks for programs | ||||
| AC_LANG(C++) | ||||
| CXXFLAGS="-O3 $CXXFLAGS" | ||||
| AC_PROG_CXX | ||||
| AC_OPENMP | ||||
| AC_PROG_RANLIB | ||||
| #AX_CXX_COMPILE_STDCXX_11(noext, mandatory) | ||||
| AX_EXT | ||||
|  | ||||
| # Checks for libraries. | ||||
| #AX_GCC_VAR_ATTRIBUTE(aligned) | ||||
| ############ openmp  ############### | ||||
| AC_OPENMP | ||||
|  | ||||
| # Checks for header files. | ||||
| ac_openmp=no | ||||
|  | ||||
| if test "${OPENMP_CXXFLAGS}X" != "X"; then | ||||
| ac_openmp=yes | ||||
| AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS" | ||||
| AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS" | ||||
| fi | ||||
|  | ||||
| ############### Checks for header files | ||||
| AC_CHECK_HEADERS(stdint.h) | ||||
| AC_CHECK_HEADERS(mm_malloc.h) | ||||
| AC_CHECK_HEADERS(malloc/malloc.h) | ||||
| AC_CHECK_HEADERS(malloc.h) | ||||
| AC_CHECK_HEADERS(endian.h) | ||||
| AC_CHECK_HEADERS(execinfo.h) | ||||
| AC_CHECK_HEADERS(gmp.h) | ||||
| AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | ||||
| AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | ||||
|  | ||||
| # Checks for typedefs, structures, and compiler characteristics. | ||||
| ############### Checks for typedefs, structures, and compiler characteristics | ||||
| AC_TYPE_SIZE_T | ||||
| AC_TYPE_UINT32_T | ||||
| AC_TYPE_UINT64_T | ||||
|  | ||||
| # Checks for library functions. | ||||
| echo | ||||
| echo Checking libraries  | ||||
| echo ::::::::::::::::::::::::::::::::::::::::::: | ||||
| ############### GMP and MPFR ################# | ||||
| AC_ARG_WITH([gmp], | ||||
|     [AS_HELP_STRING([--with-gmp=prefix], | ||||
|     [try this for a non-standard install prefix of the GMP library])], | ||||
|     [AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"] | ||||
|     [AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"]) | ||||
| AC_ARG_WITH([mpfr], | ||||
|     [AS_HELP_STRING([--with-mpfr=prefix], | ||||
|     [try this for a non-standard install prefix of the MPFR library])], | ||||
|     [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] | ||||
|     [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) | ||||
|  | ||||
| ################## lapack #################### | ||||
| AC_ARG_ENABLE([lapack], | ||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  | ||||
|     [ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) | ||||
|  | ||||
| case ${ac_LAPACK} in | ||||
|     no) | ||||
|         ;; | ||||
|     yes) | ||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; | ||||
|     *) | ||||
|         AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS" | ||||
|         AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS" | ||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]) | ||||
| esac | ||||
|  | ||||
| ################## first-touch #################### | ||||
| AC_ARG_ENABLE([numa], | ||||
|     [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],  | ||||
|     [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) | ||||
|  | ||||
| case ${ac_NUMA} in | ||||
|     no) | ||||
|         ;; | ||||
|     yes) | ||||
|         AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);; | ||||
|     *) | ||||
|         AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);; | ||||
| esac | ||||
|  | ||||
| ################## FFTW3 #################### | ||||
| AC_ARG_WITH([fftw],     | ||||
|             [AS_HELP_STRING([--with-fftw=prefix], | ||||
|             [try this for a non-standard install prefix of the FFTW3 library])], | ||||
|             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] | ||||
|             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) | ||||
|  | ||||
| ################ Get compiler informations | ||||
| AC_LANG([C++]) | ||||
| AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) | ||||
| AX_COMPILER_VENDOR | ||||
| AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"], | ||||
|       [vendor of C++ compiler that will compile the code]) | ||||
| AX_GXX_VERSION | ||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||
|       [version of g++ that will compile the code]) | ||||
|  | ||||
| ############### Checks for library functions | ||||
| CXXFLAGS_CPY=$CXXFLAGS | ||||
| LDFLAGS_CPY=$LDFLAGS | ||||
| CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" | ||||
| LDFLAGS="$AM_LDFLAGS $LDFLAGS" | ||||
| AC_CHECK_FUNCS([gettimeofday]) | ||||
| AC_CHECK_LIB([gmp],[__gmpf_init], | ||||
|              [AC_CHECK_LIB([mpfr],[mpfr_init], | ||||
|                  [AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])] | ||||
|                  [have_mpfr=true] | ||||
|                  [LIBS="$LIBS -lmpfr"], | ||||
|                  [AC_MSG_ERROR([MPFR library not found])])] | ||||
|    	     [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])] | ||||
|              [have_gmp=true] | ||||
|              [LIBS="$LIBS -lgmp"], | ||||
|              [AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])]) | ||||
|  | ||||
| if test "${ac_LAPACK}x" != "nox"; then | ||||
|     AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[], | ||||
|                  [AC_MSG_ERROR("LAPACK enabled but library not found")]) | ||||
| fi | ||||
| AC_CHECK_LIB([fftw3],[fftw_execute], | ||||
|   [AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] | ||||
|   [have_fftw=true] | ||||
|   [LIBS="$LIBS -lfftw3 -lfftw3f"], | ||||
|   [AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])]) | ||||
| CXXFLAGS=$CXXFLAGS_CPY | ||||
| LDFLAGS=$LDFLAGS_CPY | ||||
|  | ||||
| # | ||||
| # SIMD instructions selection | ||||
| # | ||||
|  | ||||
| AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\ | ||||
| ############### SIMD instruction selection | ||||
| AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\ | ||||
| 	[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\ | ||||
| 	[ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG]) | ||||
| 	[ac_SIMD=${enable_simd}],[ac_SIMD=GEN]) | ||||
|  | ||||
| supported=no | ||||
|  | ||||
| ac_ZMM=no; | ||||
| case ${ax_cv_cxx_compiler_vendor} in | ||||
|   clang|gnu) | ||||
|     case ${ac_SIMD} in | ||||
|       SSE4) | ||||
|         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) | ||||
|         SIMD_FLAGS='-msse4.2';; | ||||
|       AVX) | ||||
|         AC_DEFINE([AVX1],[1],[AVX intrinsics]) | ||||
|         SIMD_FLAGS='-mavx';; | ||||
|       AVXFMA4) | ||||
|         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) | ||||
|         SIMD_FLAGS='-mavx -mfma4';; | ||||
|       AVXFMA) | ||||
|         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) | ||||
|         SIMD_FLAGS='-mavx -mfma';; | ||||
|       AVX2) | ||||
|         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) | ||||
|         SIMD_FLAGS='-mavx2 -mfma';; | ||||
|       AVX512|AVX512MIC|KNL) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||
|         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; | ||||
|       IMCI|KNC) | ||||
|         AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner]) | ||||
|         SIMD_FLAGS='';; | ||||
|       GEN) | ||||
|         AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) | ||||
|         SIMD_FLAGS='';; | ||||
|       QPX|BGQ) | ||||
|         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) | ||||
|         SIMD_FLAGS='';; | ||||
|       *) | ||||
|         AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);; | ||||
|     esac;; | ||||
|   intel) | ||||
|     case ${ac_SIMD} in | ||||
|       SSE4) | ||||
|         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) | ||||
|         SIMD_FLAGS='-msse4.2 -xsse4.2';; | ||||
|       AVX) | ||||
|         AC_DEFINE([AVX1],[1],[AVX intrinsics]) | ||||
|         SIMD_FLAGS='-mavx -xavx';; | ||||
|       AVXFMA4) | ||||
|         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) | ||||
|         SIMD_FLAGS='-mavx -mfma';; | ||||
|       AVXFMA) | ||||
|         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4]) | ||||
|         SIMD_FLAGS='-mavx -mfma';; | ||||
|       AVX2) | ||||
|         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) | ||||
|         SIMD_FLAGS='-march=core-avx2 -xcore-avx2';; | ||||
|       AVX512) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||
|         SIMD_FLAGS='-xcore-avx512';; | ||||
|       AVX512MIC|KNL) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) | ||||
|         SIMD_FLAGS='-xmic-avx512';; | ||||
|       IMCI|KNC) | ||||
|         AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner]) | ||||
|         SIMD_FLAGS='';; | ||||
|       GEN) | ||||
|         AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) | ||||
|         SIMD_FLAGS='';; | ||||
|       *) | ||||
|         AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);; | ||||
|     esac;; | ||||
|   *) | ||||
|     AC_MSG_WARN([Compiler unknown, using generic vector code]) | ||||
|     AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);; | ||||
| esac | ||||
| AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS" | ||||
| AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS" | ||||
|  | ||||
| case ${ac_SIMD} in | ||||
|      SSE4) | ||||
|        echo Configuring for SSE4 | ||||
|        AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] ) | ||||
|        if test x"$ax_cv_support_ssse3_ext" = x"yes"; then  dnl minimal support for SSE4 | ||||
|          supported=yes | ||||
|        else | ||||
|   	AC_MSG_WARN([Your processor does not support SSE4 instructions]) | ||||
|        fi | ||||
|      ;; | ||||
|      AVX) | ||||
|        echo Configuring for AVX | ||||
|        AC_DEFINE([AVX1],[1],[AVX Intrinsics] ) | ||||
|        if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX | ||||
|        supported=yes			   | ||||
|        else | ||||
|        	AC_MSG_WARN([Your processor does not support AVX instructions]) | ||||
|        fi | ||||
|      ;; | ||||
|      AVXFMA4) | ||||
|        echo Configuring for AVX | ||||
|        AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] ) | ||||
|        if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX | ||||
|        supported=yes			   | ||||
|        else | ||||
|        	AC_MSG_WARN([Your processor does not support AVX instructions]) | ||||
|        fi | ||||
|      ;; | ||||
|      AVX2) | ||||
|        echo Configuring for AVX2 | ||||
|        AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] ) | ||||
|        if test x"$ax_cv_support_avx2_ext" = x"yes"; then  dnl minimal support for AVX2 | ||||
|        supported=yes | ||||
|        else | ||||
|        AC_MSG_WARN([Your processor does not support AVX2 instructions]) | ||||
|        fi | ||||
|      ;; | ||||
|      AVX512) | ||||
|        echo Configuring for AVX512  | ||||
|        AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] ) | ||||
|        supported="cross compilation" | ||||
|        ac_ZMM=yes; | ||||
|      ;; | ||||
|      IMCI) | ||||
|        echo Configuring for IMCI | ||||
|        AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] ) | ||||
|        supported="cross compilation" | ||||
|        ac_ZMM=no; | ||||
|      ;; | ||||
|      NEONv8) | ||||
|        echo Configuring for experimental ARMv8a support  | ||||
|        AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] ) | ||||
|        supported="cross compilation" | ||||
|      ;; | ||||
|      DEBUG) | ||||
|        echo Configuring without SIMD support - only for compiler DEBUGGING! | ||||
|        AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] ) | ||||
|       ;;      | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]);  | ||||
|      ;; | ||||
|   AVX512|AVX512MIC|KNL) | ||||
|     AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);; | ||||
|   *) | ||||
| 	;; | ||||
| esac | ||||
|  | ||||
| case ${ac_ZMM} in | ||||
| yes) | ||||
| 	echo Enabling ZMM source code | ||||
| ;; | ||||
| no) | ||||
| 	echo Disabling ZMM source code | ||||
| ;; | ||||
| esac | ||||
|  | ||||
| AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ]) | ||||
|  | ||||
| ############### precision selection | ||||
| AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double]) | ||||
| case ${ac_PRECISION} in | ||||
|      single) | ||||
|        echo default precision is single | ||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] ) | ||||
|      ;; | ||||
|      double) | ||||
|        echo default precision is double | ||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| # | ||||
| # Comms selection | ||||
| # | ||||
|  | ||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||
| ############### communication type selection | ||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||
|  | ||||
| case ${ac_COMMS} in | ||||
|      none) | ||||
|        echo Configuring for NO communications | ||||
|        AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) | ||||
|      ;; | ||||
|      mpi-auto) | ||||
|        AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||
|        LX_FIND_MPI | ||||
|        if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi | ||||
|        AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS" | ||||
|        AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS" | ||||
|        AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS" | ||||
|        LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS" | ||||
|      ;; | ||||
|      mpi) | ||||
|        echo Configuring for MPI communications | ||||
|        AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||
|      ;; | ||||
|      shmem) | ||||
|        echo Configuring for SHMEM communications | ||||
|        AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) | ||||
|      ;; | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);  | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ]) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ]) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ]) | ||||
| AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ]) | ||||
|  | ||||
| # | ||||
| # RNG selection | ||||
| # | ||||
| ############### RNG selection | ||||
| AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\ | ||||
| 	[Select Random Number Generator to be used])],\ | ||||
| 	[ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) | ||||
|  | ||||
| case ${ac_RNG} in | ||||
|      ranlux48) | ||||
|      AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] ) | ||||
|       AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] ) | ||||
|      ;; | ||||
|      mt19937) | ||||
|      AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] ) | ||||
|       AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] ) | ||||
|      ;; | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);  | ||||
|       AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);  | ||||
|      ;; | ||||
| esac | ||||
| # | ||||
| # Chroma regression tests | ||||
| # | ||||
| AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) | ||||
|  | ||||
| case ${ac_CHROMA} in | ||||
| ############### timer option | ||||
| AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\ | ||||
| 	[Enable system dependent high res timers])],\ | ||||
| 	[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes]) | ||||
| case ${ac_TIMERS} in | ||||
|      yes) | ||||
|        echo Enabling tests regressing to Chroma | ||||
|       AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] ) | ||||
|      ;; | ||||
|      no) | ||||
|        echo Disabling tests regressing to Chroma | ||||
|       AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) | ||||
|      ;; | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);  | ||||
|       AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);  | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| ############### Chroma regression test | ||||
| AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) | ||||
| case ${ac_CHROMA} in | ||||
|      yes|no) | ||||
|      ;; | ||||
|      *) | ||||
|        AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);  | ||||
|      ;; | ||||
| esac | ||||
| AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ]) | ||||
|  | ||||
| # | ||||
| # Lapack | ||||
| # | ||||
| AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) | ||||
| ############### Doxygen | ||||
| AC_PROG_DOXYGEN | ||||
|  | ||||
| case ${ac_LAPACK} in | ||||
|      yes) | ||||
|        echo Enabling lapack | ||||
|      ;; | ||||
|      no) | ||||
|        echo Disabling lapack | ||||
|      ;; | ||||
|      *) | ||||
|        echo Enabling lapack at ${ac_LAPACK} | ||||
|      ;; | ||||
| esac | ||||
| if test -n "$DOXYGEN" | ||||
| then | ||||
| AC_CONFIG_FILES([docs/doxy.cfg]) | ||||
| fi | ||||
|  | ||||
| AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ]) | ||||
| AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ]) | ||||
|  | ||||
| ################################################################### | ||||
| # Checks for doxygen support | ||||
| # if present enables the "make doxyfile" command | ||||
| #echo | ||||
| #echo Checking doxygen support  | ||||
| #echo ::::::::::::::::::::::::::::::::::::::::::: | ||||
| #AC_PROG_DOXYGEN | ||||
|  | ||||
| #if test -n "$DOXYGEN" | ||||
| #then | ||||
| #AC_CONFIG_FILES([docs/doxy.cfg]) | ||||
| #fi | ||||
|  | ||||
| echo | ||||
| echo Creating configuration files | ||||
| echo ::::::::::::::::::::::::::::::::::::::::::: | ||||
| ############### Ouput | ||||
| cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | ||||
| AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | ||||
| AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | ||||
| AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | ||||
| AC_SUBST([AM_CFLAGS]) | ||||
| AC_SUBST([AM_CXXFLAGS]) | ||||
| AC_SUBST([AM_LDFLAGS]) | ||||
| AC_CONFIG_FILES(Makefile) | ||||
| AC_CONFIG_FILES(lib/Makefile) | ||||
| AC_CONFIG_FILES(tests/Makefile) | ||||
| AC_CONFIG_FILES(tests/IO/Makefile) | ||||
| AC_CONFIG_FILES(tests/core/Makefile) | ||||
| AC_CONFIG_FILES(tests/debug/Makefile) | ||||
| AC_CONFIG_FILES(tests/forces/Makefile) | ||||
| AC_CONFIG_FILES(tests/hmc/Makefile) | ||||
| AC_CONFIG_FILES(tests/solver/Makefile) | ||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||
| AC_CONFIG_FILES(benchmarks/Makefile) | ||||
| AC_OUTPUT | ||||
|  | ||||
|  | ||||
| echo " | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| Summary of configuration for $PACKAGE v$VERSION | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| The following features are enabled: | ||||
|  | ||||
| ----- PLATFORM ---------------------------------------- | ||||
| - architecture (build)          : $build_cpu | ||||
| - os (build)                    : $build_os | ||||
| - architecture (target)         : $target_cpu | ||||
| - os (target)                   : $target_os | ||||
| - compiler vendor               : ${ax_cv_cxx_compiler_vendor} | ||||
| - compiler version              : ${ax_cv_gxx_version} | ||||
| ----- BUILD OPTIONS ----------------------------------- | ||||
| - SIMD                          : ${ac_SIMD} | ||||
| - Threading                     : ${ac_openmp}  | ||||
| - Communications type           : ${ac_COMMS} | ||||
| - Default precision             : ${ac_PRECISION} | ||||
| - RNG choice                    : ${ac_RNG}  | ||||
| - GMP                           : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||
| - LAPACK                        : ${ac_LAPACK} | ||||
| - FFTW                          : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||
| - build DOXYGEN documentation   : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` | ||||
| - graphs and diagrams           : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` | ||||
| - Supported SIMD flags          : $SIMD_FLAGS | ||||
| ---------------------------------------------------------- | ||||
| - enabled simd support          : ${ac_SIMD}   (config macro says supported: $supported ) | ||||
| - communications type           : ${ac_COMMS} | ||||
| - default precision             : ${ac_PRECISION} | ||||
| - RNG choice                    : ${ac_RNG}  | ||||
| - LAPACK	                : ${ac_LAPACK}  | ||||
|  | ||||
|  | ||||
| ----- BUILD FLAGS ------------------------------------- | ||||
| - CXXFLAGS: | ||||
| `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| - LDFLAGS: | ||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| - LIBS: | ||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| ------------------------------------------------------- | ||||
| " | ||||
|   | ||||
							
								
								
									
										1
									
								
								include/Grid
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								include/Grid
									
									
									
									
									
										Symbolic link
									
								
							| @@ -0,0 +1 @@ | ||||
| ../lib | ||||
| @@ -29,27 +29,28 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_ALGORITHMS_H | ||||
| #define GRID_ALGORITHMS_H | ||||
|  | ||||
| #include <algorithms/SparseMatrix.h> | ||||
| #include <algorithms/LinearOperator.h> | ||||
| #include <algorithms/Preconditioner.h> | ||||
| #include <Grid/algorithms/SparseMatrix.h> | ||||
| #include <Grid/algorithms/LinearOperator.h> | ||||
| #include <Grid/algorithms/Preconditioner.h> | ||||
|  | ||||
| #include <algorithms/approx/Zolotarev.h> | ||||
| #include <algorithms/approx/Chebyshev.h> | ||||
| #include <algorithms/approx/Remez.h> | ||||
| #include <algorithms/approx/MultiShiftFunction.h> | ||||
| #include <Grid/algorithms/approx/Zolotarev.h> | ||||
| #include <Grid/algorithms/approx/Chebyshev.h> | ||||
| #include <Grid/algorithms/approx/Remez.h> | ||||
| #include <Grid/algorithms/approx/MultiShiftFunction.h> | ||||
|  | ||||
| #include <algorithms/iterative/ConjugateGradient.h> | ||||
| #include <algorithms/iterative/ConjugateResidual.h> | ||||
| #include <algorithms/iterative/NormalEquations.h> | ||||
| #include <algorithms/iterative/SchurRedBlack.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradient.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateResidual.h> | ||||
| #include <Grid/algorithms/iterative/NormalEquations.h> | ||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||
|  | ||||
| #include <algorithms/iterative/ConjugateGradientMultiShift.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||
|  | ||||
| // Lanczos support | ||||
| #include <algorithms/iterative/MatrixUtils.h> | ||||
| #include <algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||
| #include <Grid/algorithms/iterative/MatrixUtils.h> | ||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||
|  | ||||
| #include <algorithms/CoarsenedMatrix.h> | ||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | ||||
|  | ||||
| // Eigen/lanczos | ||||
| // EigCg | ||||
|   | ||||
| @@ -113,9 +113,8 @@ public: | ||||
|  | ||||
| #endif | ||||
|     _Tp tmp; | ||||
| #undef FIRST_TOUCH_OPTIMISE | ||||
| #ifdef FIRST_TOUCH_OPTIMISE | ||||
| #pragma omp parallel for  | ||||
| #ifdef GRID_NUMA | ||||
| #pragma omp parallel for schedule(static) | ||||
|   for(int i=0;i<__n;i++){ | ||||
|     ptr[i]=tmp; | ||||
|   } | ||||
|   | ||||
| @@ -28,8 +28,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_CARTESIAN_H | ||||
| #define GRID_CARTESIAN_H | ||||
|  | ||||
| #include <cartesian/Cartesian_base.h> | ||||
| #include <cartesian/Cartesian_full.h> | ||||
| #include <cartesian/Cartesian_red_black.h>  | ||||
| #include <Grid/cartesian/Cartesian_base.h> | ||||
| #include <Grid/cartesian/Cartesian_full.h> | ||||
| #include <Grid/cartesian/Cartesian_red_black.h>  | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_COMMUNICATOR_H | ||||
| #define GRID_COMMUNICATOR_H | ||||
|  | ||||
| #include <communicator/Communicator_base.h> | ||||
| #include <Grid/communicator/Communicator_base.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -28,17 +28,17 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef _GRID_CSHIFT_H_ | ||||
| #define _GRID_CSHIFT_H_ | ||||
|  | ||||
| #include <cshift/Cshift_common.h> | ||||
| #include <Grid/cshift/Cshift_common.h> | ||||
|  | ||||
| #ifdef GRID_COMMS_NONE | ||||
| #include <cshift/Cshift_none.h> | ||||
| #include <Grid/cshift/Cshift_none.h> | ||||
| #endif | ||||
|  | ||||
| #ifdef GRID_COMMS_MPI | ||||
| #include <cshift/Cshift_mpi.h> | ||||
| #include <Grid/cshift/Cshift_mpi.h> | ||||
| #endif  | ||||
|  | ||||
| #ifdef GRID_COMMS_SHMEM | ||||
| #include <cshift/Cshift_mpi.h> // uses same implementation of communicator | ||||
| #include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator | ||||
| #endif  | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										276
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										276
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,276 @@ | ||||
|  | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/Cshift.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef _GRID_FFT_H_ | ||||
| #define _GRID_FFT_H_ | ||||
|  | ||||
| #ifdef HAVE_FFTW	 | ||||
| #include <fftw3.h> | ||||
| #endif | ||||
| namespace Grid { | ||||
|  | ||||
|   template<class scalar> struct FFTW { }; | ||||
|  | ||||
| #ifdef HAVE_FFTW	 | ||||
|   template<> struct FFTW<ComplexD> { | ||||
|   public: | ||||
|  | ||||
|     typedef fftw_complex FFTW_scalar; | ||||
|     typedef fftw_plan    FFTW_plan; | ||||
|  | ||||
|     static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, | ||||
| 					FFTW_scalar *in, const int *inembed,		 | ||||
| 					int istride, int idist,		 | ||||
| 					FFTW_scalar *out, const int *onembed,		 | ||||
| 					int ostride, int odist,		 | ||||
| 					int sign, unsigned flags) { | ||||
|       return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); | ||||
|     }	   | ||||
|      | ||||
|     static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ | ||||
|       ::fftw_flops(p,add,mul,fmas); | ||||
|     } | ||||
|  | ||||
|     inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { | ||||
|       ::fftw_execute_dft(p,in,out); | ||||
|     } | ||||
|     inline static void fftw_destroy_plan(const FFTW_plan p) { | ||||
|       ::fftw_destroy_plan(p); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   template<> struct FFTW<ComplexF> { | ||||
|   public: | ||||
|  | ||||
|     typedef fftwf_complex FFTW_scalar; | ||||
|     typedef fftwf_plan    FFTW_plan; | ||||
|  | ||||
|     static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, | ||||
| 					FFTW_scalar *in, const int *inembed,		 | ||||
| 					int istride, int idist,		 | ||||
| 					FFTW_scalar *out, const int *onembed,		 | ||||
| 					int ostride, int odist,		 | ||||
| 					int sign, unsigned flags) { | ||||
|       return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); | ||||
|     }	   | ||||
|      | ||||
|     static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ | ||||
|       ::fftwf_flops(p,add,mul,fmas); | ||||
|     } | ||||
|  | ||||
|     inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { | ||||
|       ::fftwf_execute_dft(p,in,out); | ||||
|     } | ||||
|     inline static void fftw_destroy_plan(const FFTW_plan p) { | ||||
|       ::fftwf_destroy_plan(p); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifndef FFTW_FORWARD | ||||
| #define FFTW_FORWARD (-1) | ||||
| #define FFTW_BACKWARD (+1) | ||||
| #endif | ||||
|  | ||||
|   class FFT {  | ||||
|   private: | ||||
|  | ||||
|     GridCartesian *vgrid; | ||||
|     GridCartesian *sgrid; | ||||
|  | ||||
|     int Nd; | ||||
|     double flops; | ||||
|     double flops_call; | ||||
|     uint64_t usec; | ||||
|  | ||||
|     std::vector<int> dimensions; | ||||
|     std::vector<int> processors; | ||||
|     std::vector<int> processor_coor; | ||||
|  | ||||
|   public: | ||||
|  | ||||
|     static const int forward=FFTW_FORWARD; | ||||
|     static const int backward=FFTW_BACKWARD; | ||||
|  | ||||
|     double Flops(void) {return flops;} | ||||
|     double MFlops(void) {return flops/usec;} | ||||
|  | ||||
|     FFT ( GridCartesian * grid ) :  | ||||
|       vgrid(grid), | ||||
|       Nd(grid->_ndimension), | ||||
|       dimensions(grid->_fdimensions), | ||||
|       processors(grid->_processors), | ||||
|       processor_coor(grid->_processor_coor) | ||||
|     { | ||||
|       flops=0; | ||||
|       usec =0; | ||||
|       std::vector<int> layout(Nd,1); | ||||
|       sgrid = new GridCartesian(dimensions,layout,processors); | ||||
|     }; | ||||
|  | ||||
|     ~FFT ( void)  {  | ||||
|       delete sgrid;  | ||||
|     } | ||||
|      | ||||
|     template<class vobj> | ||||
|     void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){ | ||||
|  | ||||
|       conformable(result._grid,vgrid); | ||||
|       conformable(source._grid,vgrid); | ||||
|  | ||||
|       int L = vgrid->_ldimensions[dim]; | ||||
|       int G = vgrid->_fdimensions[dim]; | ||||
|  | ||||
|       std::vector<int> layout(Nd,1); | ||||
|       std::vector<int> pencil_gd(vgrid->_fdimensions); | ||||
|  | ||||
|       pencil_gd[dim] = G*processors[dim];     | ||||
|  | ||||
|       // Pencil global vol LxLxGxLxL per node | ||||
|       GridCartesian pencil_g(pencil_gd,layout,processors); | ||||
|  | ||||
|       // Construct pencils | ||||
|       typedef typename vobj::scalar_object sobj; | ||||
|       typedef typename sobj::scalar_type   scalar; | ||||
|  | ||||
|       Lattice<vobj> ssource(vgrid); ssource =source; | ||||
|       Lattice<sobj> pgsource(&pencil_g); | ||||
|       Lattice<sobj> pgresult(&pencil_g); pgresult=zero; | ||||
|  | ||||
| #ifndef HAVE_FFTW	 | ||||
|       assert(0); | ||||
| #else  | ||||
|       typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar; | ||||
|       typedef typename FFTW<scalar>::FFTW_plan   FFTW_plan; | ||||
|  | ||||
|       { | ||||
| 	int Ncomp = sizeof(sobj)/sizeof(scalar); | ||||
| 	int Nlow  = 1; | ||||
| 	for(int d=0;d<dim;d++){ | ||||
| 	  Nlow*=vgrid->_ldimensions[d]; | ||||
| 	} | ||||
|  | ||||
| 	int rank = 1;  /* 1d transforms */ | ||||
| 	int n[] = {G}; /* 1d transforms of length G */ | ||||
| 	int howmany = Ncomp; | ||||
| 	int odist,idist,istride,ostride; | ||||
| 	idist   = odist   = 1;          /* Distance between consecutive FT's */ | ||||
| 	istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */ | ||||
| 	int *inembed = n, *onembed = n; | ||||
|  | ||||
| 	 | ||||
| 	int sign = FFTW_FORWARD; | ||||
| 	if (inverse) sign = FFTW_BACKWARD; | ||||
|  | ||||
| 	FFTW_plan p; | ||||
| 	{ | ||||
| 	  FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0]; | ||||
| 	  FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0]; | ||||
| 	  p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany, | ||||
| 					       in,inembed, | ||||
| 					       istride,idist, | ||||
| 					       out,onembed, | ||||
| 					       ostride, odist, | ||||
| 					       sign,FFTW_ESTIMATE); | ||||
| 	} | ||||
|  | ||||
| 	double add,mul,fma; | ||||
| 	FFTW<scalar>::fftw_flops(p,&add,&mul,&fma); | ||||
| 	flops_call = add+mul+2.0*fma; | ||||
|  | ||||
| 	GridStopWatch timer; | ||||
|  | ||||
| 	// Barrel shift and collect global pencil | ||||
| 	for(int p=0;p<processors[dim];p++) {  | ||||
|  | ||||
| 	  for(int idx=0;idx<sgrid->lSites();idx++) {  | ||||
|  | ||||
| 	    std::vector<int> lcoor(Nd); | ||||
|     	    sgrid->LocalIndexToLocalCoor(idx,lcoor); | ||||
|  | ||||
| 	    sobj s; | ||||
|  | ||||
| 	    peekLocalSite(s,ssource,lcoor); | ||||
|  | ||||
| 	    lcoor[dim]+=p*L; | ||||
| 	    | ||||
| 	    pokeLocalSite(s,pgsource,lcoor); | ||||
| 	  } | ||||
|  | ||||
| 	  ssource = Cshift(ssource,dim,L); | ||||
| 	} | ||||
| 	 | ||||
| 	// Loop over orthog coords | ||||
| 	int NN=pencil_g.lSites(); | ||||
|  | ||||
| 	GridStopWatch Timer; | ||||
| 	Timer.Start(); | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
| 	for(int idx=0;idx<NN;idx++) {  | ||||
|  | ||||
| 	  std::vector<int> lcoor(Nd); | ||||
| 	  pencil_g.LocalIndexToLocalCoor(idx,lcoor); | ||||
|  | ||||
| 	  if ( lcoor[dim] == 0 ) {  // restricts loop to plane at lcoor[dim]==0 | ||||
| 	    FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx]; | ||||
| 	    FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx]; | ||||
| 	    FFTW<scalar>::fftw_execute_dft(p,in,out); | ||||
| 	  } | ||||
| 	} | ||||
|  | ||||
|         Timer.Stop(); | ||||
| 	usec += Timer.useconds(); | ||||
| 	flops+= flops_call*NN; | ||||
|  | ||||
|         int pc = processor_coor[dim]; | ||||
|         for(int idx=0;idx<sgrid->lSites();idx++) {  | ||||
| 	  std::vector<int> lcoor(Nd); | ||||
| 	  sgrid->LocalIndexToLocalCoor(idx,lcoor); | ||||
| 	  std::vector<int> gcoor = lcoor; | ||||
| 	  // extract the result | ||||
| 	  sobj s; | ||||
| 	  gcoor[dim] = lcoor[dim]+L*pc; | ||||
| 	  peekLocalSite(s,pgresult,gcoor); | ||||
| 	  pokeLocalSite(s,result,lcoor); | ||||
| 	} | ||||
|       	   | ||||
| 	FFTW<scalar>::fftw_destroy_plan(p); | ||||
|       } | ||||
| #endif | ||||
|  | ||||
|  | ||||
|     } | ||||
|  | ||||
|   }; | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										46
									
								
								lib/Grid.h
									
									
									
									
									
								
							
							
						
						
									
										46
									
								
								lib/Grid.h
									
									
									
									
									
								
							| @@ -59,29 +59,31 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| /////////////////// | ||||
| // Grid headers | ||||
| /////////////////// | ||||
| #include <serialisation/Serialisation.h> | ||||
| #include <Config.h> | ||||
| #include <Timer.h> | ||||
| #include <PerfCount.h> | ||||
| #include <Log.h> | ||||
| #include <AlignedAllocator.h> | ||||
| #include <Simd.h> | ||||
| #include <Threads.h> | ||||
| #include <Lexicographic.h> | ||||
| #include <Communicator.h>  | ||||
| #include <Cartesian.h>     | ||||
| #include <Tensors.h>       | ||||
| #include <Lattice.h>       | ||||
| #include <Cshift.h>        | ||||
| #include <Stencil.h>       | ||||
| #include <Algorithms.h>    | ||||
| #include <parallelIO/BinaryIO.h> | ||||
| #include <qcd/QCD.h> | ||||
| #include <parallelIO/NerscIO.h> | ||||
| #include <Init.h> | ||||
| #include <Grid/serialisation/Serialisation.h> | ||||
| #include "Config.h" | ||||
| #include <Grid/Timer.h> | ||||
| #include <Grid/PerfCount.h> | ||||
| #include <Grid/Log.h> | ||||
| #include <Grid/AlignedAllocator.h> | ||||
| #include <Grid/Simd.h> | ||||
| #include <Grid/Threads.h> | ||||
| #include <Grid/Lexicographic.h> | ||||
| #include <Grid/Init.h> | ||||
| #include <Grid/Communicator.h>  | ||||
| #include <Grid/Cartesian.h>     | ||||
| #include <Grid/Tensors.h>       | ||||
| #include <Grid/Lattice.h>       | ||||
| #include <Grid/Cshift.h>        | ||||
| #include <Grid/Stencil.h>       | ||||
| #include <Grid/Algorithms.h>    | ||||
| #include <Grid/parallelIO/BinaryIO.h> | ||||
| #include <Grid/qcd/QCD.h> | ||||
| #include <Grid/parallelIO/NerscIO.h> | ||||
|  | ||||
| #include <qcd/hmc/NerscCheckpointer.h> | ||||
| #include <qcd/hmc/HmcRunner.h> | ||||
| #include <Grid/FFT.h> | ||||
|  | ||||
| #include <Grid/qcd/hmc/NerscCheckpointer.h> | ||||
| #include <Grid/qcd/hmc/HmcRunner.h> | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										55
									
								
								lib/Init.cc
									
									
									
									
									
								
							
							
						
						
									
										55
									
								
								lib/Init.cc
									
									
									
									
									
								
							| @@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc, | ||||
|     assert(ompthreads.size()==1); | ||||
|     GridThread::SetThreads(ompthreads[0]); | ||||
|   } | ||||
|  | ||||
|   if( GridCmdOptionExists(argv,argv+argc,"--cores") ){ | ||||
|     std::vector<int> cores(0); | ||||
|     arg= GridCmdOptionPayload(argv,argv+argc,"--cores"); | ||||
| @@ -193,7 +194,7 @@ void Grid_init(int *argc,char ***argv) | ||||
|     std::cout<<GridLogMessage<<"--mpi n.n.n.n   : default MPI decomposition"<<std::endl;     | ||||
|     std::cout<<GridLogMessage<<"--threads n     : default number of OMP threads"<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"--grid n.n.n.n  : default Grid size"<<std::endl;     | ||||
|     std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug"<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl; | ||||
|     exit(EXIT_SUCCESS); | ||||
|   } | ||||
|  | ||||
| @@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv) | ||||
|     GridLogConfigure(logstreams); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ | ||||
|     Grid_debug_handler_init(); | ||||
|   } | ||||
| @@ -211,8 +211,7 @@ void Grid_init(int *argc,char ***argv) | ||||
|     Grid_quiesce_nodes(); | ||||
|   } | ||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){ | ||||
|     QCD::WilsonFermionStatic::HandOptDslash=1; | ||||
|     QCD::WilsonFermion5DStatic::HandOptDslash=1; | ||||
|     QCD::WilsonKernelsStatic::HandOpt=1; | ||||
|   } | ||||
|   if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ | ||||
|     LebesgueOrder::UseLebesgueOrder=1; | ||||
| @@ -235,26 +234,34 @@ void Grid_init(int *argc,char ***argv) | ||||
|     std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; | ||||
|   } | ||||
|  | ||||
|   std::string COL_RED    = GridLogColours.colour["RED"]; | ||||
|   std::string COL_PURPLE = GridLogColours.colour["PURPLE"]; | ||||
|   std::string COL_BLACK  = GridLogColours.colour["BLACK"]; | ||||
|   std::string COL_GREEN  = GridLogColours.colour["GREEN"]; | ||||
|   std::string COL_BLUE   = GridLogColours.colour["BLUE"]; | ||||
|   std::string COL_YELLOW = GridLogColours.colour["YELLOW"]; | ||||
|   std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"]; | ||||
|  | ||||
|    | ||||
|   std::cout <<std::endl; | ||||
|   std::cout <<Logger::RED  << "__|__|__|__|__"<<             "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<Logger::RED  << "__|__|__|__|__"<<             "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<Logger::RED  << "__|__|  |  |  "<<             "|  |  | "<<Logger::PURPLE<<" |  |  |"<<                "  |  |  | _|__"<<std::endl;  | ||||
|   std::cout <<Logger::RED  << "__|__         "<<             "        "<<Logger::PURPLE<<"        "<<                "          _|__"<<std::endl;  | ||||
|   std::cout <<Logger::RED  << "__|_  "<<Logger::GREEN<<" GGGG   "<<Logger::RED<<" RRRR   "<<Logger::BLUE  <<" III    "<<Logger::PURPLE<<"DDDD  "<<Logger::PURPLE<<"    _|__"<<std::endl; | ||||
|   std::cout <<Logger::RED  << "__|_  "<<Logger::GREEN<<"G       "<<Logger::RED<<" R   R  "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D   D "<<Logger::PURPLE<<"    _|__"<<std::endl; | ||||
|   std::cout <<Logger::RED  << "__|_  "<<Logger::GREEN<<"G       "<<Logger::RED<<" R   R  "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D    D"<<Logger::PURPLE<<"    _|__"<<std::endl; | ||||
|   std::cout <<Logger::BLUE << "__|_  "<<Logger::GREEN<<"G  GG   "<<Logger::RED<<" RRRR   "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D    D"<<Logger::GREEN <<"    _|__"<<std::endl; | ||||
|   std::cout <<Logger::BLUE << "__|_  "<<Logger::GREEN<<"G   G   "<<Logger::RED<<" R  R   "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D   D "<<Logger::GREEN <<"    _|__"<<std::endl; | ||||
|   std::cout <<Logger::BLUE << "__|_  "<<Logger::GREEN<<" GGGG   "<<Logger::RED<<" R   R  "<<Logger::BLUE  <<" III    "<<Logger::PURPLE<<"DDDD  "<<Logger::GREEN <<"    _|__"<<std::endl; | ||||
|   std::cout <<Logger::BLUE << "__|__         "<<             "        "<<Logger::GREEN <<"        "<<                "          _|__"<<std::endl;  | ||||
|   std::cout <<Logger::BLUE << "__|__|__|__|__"<<             "|__|__|_"<<Logger::GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<Logger::BLUE << "__|__|__|__|__"<<             "|__|__|_"<<Logger::GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<Logger::BLUE << "  |  |  |  |  "<<             "|  |  | "<<Logger::GREEN <<" |  |  |"<<                "  |  |  |  |  "<<std::endl;  | ||||
|   std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<COL_RED  << "__|_ |  |  |  "<<             "|  |  | "<<COL_PURPLE<<" |  |  |"<<                "  |  |  | _|__"<<std::endl;  | ||||
|   std::cout <<COL_RED  << "__|_          "<<             "        "<<COL_PURPLE<<"        "<<                "          _|__"<<std::endl;  | ||||
|   std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_PURPLE<<"    _|__"<<std::endl; | ||||
|   std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_PURPLE<<"    _|__"<<std::endl; | ||||
|   std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_PURPLE<<"    _|__"<<std::endl; | ||||
|   std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G  GG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_GREEN <<"    _|__"<<std::endl; | ||||
|   std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G   G   "<<COL_RED<<" R  R   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_GREEN <<"    _|__"<<std::endl; | ||||
|   std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" R   R  "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_GREEN <<"    _|__"<<std::endl; | ||||
|   std::cout <<COL_BLUE << "__|_          "<<             "        "<<COL_GREEN <<"        "<<                "          _|__"<<std::endl;  | ||||
|   std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl;  | ||||
|   std::cout <<COL_BLUE << "  |  |  |  |  "<<             "|  |  | "<<COL_GREEN <<" |  |  |"<<                "  |  |  |  |  "<<std::endl;  | ||||
|   std::cout << std::endl; | ||||
|   std::cout << std::endl; | ||||
|   std::cout <<Logger::YELLOW<< std::endl; | ||||
|   std::cout <<COL_YELLOW<< std::endl; | ||||
|   std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl; | ||||
|   std::cout << "Colours by Tadahito Boyle "<<std::endl; | ||||
|   std::cout << std::endl; | ||||
|   std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl; | ||||
|   std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl; | ||||
| @@ -265,7 +272,8 @@ void Grid_init(int *argc,char ***argv) | ||||
|   std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl; | ||||
|   std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl; | ||||
|   std::cout << "GNU General Public License for more details."<<std::endl; | ||||
|   std::cout << Logger::BLACK <<std::endl; | ||||
|   std::cout << COL_BACKGROUND <<std::endl; | ||||
|   std::cout << std::endl; | ||||
| } | ||||
|  | ||||
|    | ||||
| @@ -276,11 +284,6 @@ void Grid_finalize(void) | ||||
|   Grid_unquiesce_nodes(); | ||||
| #endif | ||||
| } | ||||
| double usecond(void) { | ||||
|   struct timeval tv; | ||||
|   gettimeofday(&tv,NULL); | ||||
|   return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec; | ||||
| } | ||||
|  | ||||
| void * Grid_backtrace_buffer[_NBACKTRACE]; | ||||
|  | ||||
|   | ||||
| @@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_LATTICE_H | ||||
| #define GRID_LATTICE_H | ||||
|  | ||||
| #include <lattice/Lattice_base.h> | ||||
| #include <Grid/lattice/Lattice_base.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										128
									
								
								lib/Log.cc
									
									
									
									
									
								
							
							
						
						
									
										128
									
								
								lib/Log.cc
									
									
									
									
									
								
							| @@ -1,126 +1,92 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/Log.cc | ||||
| Source file: ./lib/Log.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| GridStopWatch Logger::StopWatch; | ||||
| std::ostream  Logger::devnull(0); | ||||
| std::string Logger::BLACK("\033[30m"); | ||||
| std::string Logger::RED("\033[31m"); | ||||
| std::string Logger::GREEN("\033[32m"); | ||||
| std::string Logger::YELLOW("\033[33m"); | ||||
| std::string Logger::BLUE("\033[34m"); | ||||
| std::string Logger::PURPLE("\033[35m"); | ||||
| std::string Logger::CYAN("\033[36m"); | ||||
| std::string Logger::WHITE("\033[37m"); | ||||
| std::string Logger::NORMAL("\033[0;39m"); | ||||
| std::string EMPTY(""); | ||||
| std::ostream Logger::devnull(0); | ||||
|  | ||||
| #if 0   | ||||
|   GridLogger GridLogError      (1,"Error",Logger::RED); | ||||
|   GridLogger GridLogWarning    (1,"Warning",Logger::YELLOW); | ||||
|   GridLogger GridLogMessage    (1,"Message",Logger::BLACK); | ||||
|   GridLogger GridLogDebug      (1,"Debug",Logger::PURPLE); | ||||
|   GridLogger GridLogPerformance(1,"Performance",Logger::GREEN); | ||||
|   GridLogger GridLogIterative  (1,"Iterative",Logger::BLUE); | ||||
|   GridLogger GridLogIntegrator (1,"Integrator",Logger::BLUE); | ||||
| #else | ||||
|   GridLogger GridLogError      (1,"Error",EMPTY); | ||||
|   GridLogger GridLogWarning    (1,"Warning",EMPTY); | ||||
|   GridLogger GridLogMessage    (1,"Message",EMPTY); | ||||
|   GridLogger GridLogDebug      (1,"Debug",EMPTY); | ||||
|   GridLogger GridLogPerformance(1,"Performance",EMPTY); | ||||
|   GridLogger GridLogIterative  (1,"Iterative",EMPTY); | ||||
|   GridLogger GridLogIntegrator (1,"Integrator",EMPTY); | ||||
| #endif | ||||
| Colours GridLogColours(0); | ||||
| GridLogger GridLogError(1, "Error", GridLogColours, "RED"); | ||||
| GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); | ||||
| GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); | ||||
| GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE"); | ||||
| GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); | ||||
| GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE"); | ||||
| GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE"); | ||||
|  | ||||
| void GridLogConfigure(std::vector<std::string> &logstreams) | ||||
| { | ||||
| void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
|   GridLogError.Active(0); | ||||
|   GridLogWarning.Active(0); | ||||
|   GridLogMessage.Active(0); | ||||
|   GridLogMessage.Active(1); // at least the messages should be always on | ||||
|   GridLogIterative.Active(0); | ||||
|   GridLogDebug.Active(0); | ||||
|   GridLogPerformance.Active(0); | ||||
|   GridLogIntegrator.Active(0); | ||||
|   GridLogColours.Active(0); | ||||
|  | ||||
|   int blackAndWhite = 1; | ||||
|   if(blackAndWhite){ | ||||
|     Logger::BLACK = std::string(""); | ||||
|     Logger::RED    =Logger::BLACK; | ||||
|     Logger::GREEN  =Logger::BLACK; | ||||
|     Logger::YELLOW =Logger::BLACK; | ||||
|     Logger::BLUE   =Logger::BLACK; | ||||
|     Logger::PURPLE =Logger::BLACK; | ||||
|     Logger::CYAN   =Logger::BLACK; | ||||
|     Logger::WHITE  =Logger::BLACK; | ||||
|     Logger::NORMAL =Logger::BLACK; | ||||
|   } | ||||
|  | ||||
|   for(int i=0;i<logstreams.size();i++){ | ||||
|     if ( logstreams[i]== std::string("Error")       ) GridLogError.Active(1); | ||||
|     if ( logstreams[i]== std::string("Warning")     ) GridLogWarning.Active(1); | ||||
|     if ( logstreams[i]== std::string("Message")     ) GridLogMessage.Active(1); | ||||
|     if ( logstreams[i]== std::string("Iterative")   ) GridLogIterative.Active(1); | ||||
|     if ( logstreams[i]== std::string("Debug")       ) GridLogDebug.Active(1); | ||||
|     if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1); | ||||
|     if ( logstreams[i]== std::string("Integrator" ) ) GridLogIntegrator.Active(1); | ||||
|   for (int i = 0; i < logstreams.size(); i++) { | ||||
|     if (logstreams[i] == std::string("Error")) GridLogError.Active(1); | ||||
|     if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1); | ||||
|     if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0); | ||||
|     if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1); | ||||
|     if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1); | ||||
|     if (logstreams[i] == std::string("Performance")) | ||||
|       GridLogPerformance.Active(1); | ||||
|     if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1); | ||||
|     if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////// | ||||
| // Verbose limiter on MPI tasks | ||||
| //////////////////////////////////////////////////////////// | ||||
| void Grid_quiesce_nodes(void) | ||||
| { | ||||
|   int me=0; | ||||
| void Grid_quiesce_nodes(void) { | ||||
|   int me = 0; | ||||
| #ifdef GRID_COMMS_MPI | ||||
|   MPI_Comm_rank(MPI_COMM_WORLD,&me); | ||||
|   MPI_Comm_rank(MPI_COMM_WORLD, &me); | ||||
| #endif | ||||
| #ifdef GRID_COMMS_SHMEM | ||||
|   me = shmem_my_pe(); | ||||
| #endif | ||||
|   if ( me ) {  | ||||
|   if (me) { | ||||
|     std::cout.setstate(std::ios::badbit); | ||||
|   } | ||||
| } | ||||
|  | ||||
| void Grid_unquiesce_nodes(void) | ||||
| { | ||||
| void Grid_unquiesce_nodes(void) { | ||||
| #ifdef GRID_COMMS_MPI | ||||
|     std::cout.clear(); | ||||
|   std::cout.clear(); | ||||
| #endif | ||||
| } | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										158
									
								
								lib/Log.h
									
									
									
									
									
								
							
							
						
						
									
										158
									
								
								lib/Log.h
									
									
									
									
									
								
							| @@ -6,9 +6,9 @@ | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|     Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
| @@ -27,6 +27,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
|  | ||||
| #include <map> | ||||
|  | ||||
| #ifndef GRID_LOG_H | ||||
| #define GRID_LOG_H | ||||
|  | ||||
| @@ -34,56 +37,99 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <execinfo.h> | ||||
| #endif | ||||
|  | ||||
| namespace Grid { | ||||
|     namespace Grid { | ||||
|  | ||||
| // Dress the output; use std::chrono for time stamping via the StopWatch class | ||||
| int Rank(void); // used for early stage debug before library init | ||||
|  | ||||
|  | ||||
| class Colours{ | ||||
| protected: | ||||
|   bool is_active; | ||||
| public: | ||||
|   std::map<std::string, std::string> colour; | ||||
|  | ||||
|   Colours(bool activate=false){ | ||||
|     Active(activate); | ||||
|   }; | ||||
|  | ||||
|   void Active(bool activate){ | ||||
|     is_active=activate; | ||||
|  | ||||
|     if (is_active){ | ||||
|      colour["BLACK"]  ="\033[30m"; | ||||
|      colour["RED"]    ="\033[31m"; | ||||
|      colour["GREEN"]  ="\033[32m"; | ||||
|      colour["YELLOW"] ="\033[33m"; | ||||
|      colour["BLUE"]   ="\033[34m"; | ||||
|      colour["PURPLE"] ="\033[35m"; | ||||
|      colour["CYAN"]   ="\033[36m"; | ||||
|      colour["WHITE"]  ="\033[37m"; | ||||
|      colour["NORMAL"] ="\033[0;39m"; | ||||
|    } else { | ||||
|     colour["BLACK"] =""; | ||||
|     colour["RED"]   =""; | ||||
|     colour["GREEN"] =""; | ||||
|     colour["YELLOW"]=""; | ||||
|     colour["BLUE"]  =""; | ||||
|     colour["PURPLE"]=""; | ||||
|     colour["CYAN"]  =""; | ||||
|     colour["WHITE"] =""; | ||||
|     colour["NORMAL"]=""; | ||||
|   } | ||||
|  | ||||
|  | ||||
| }; | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
| class Logger { | ||||
| protected: | ||||
|     int active; | ||||
|     std::string name, topName, COLOUR; | ||||
| public: | ||||
|     static GridStopWatch StopWatch; | ||||
|     static std::ostream devnull; | ||||
|   Colours &Painter; | ||||
|   int active; | ||||
|   std::string name, topName; | ||||
|   std::string COLOUR; | ||||
|  | ||||
|     static std::string BLACK; | ||||
|     static std::string RED  ; | ||||
|     static std::string GREEN; | ||||
|     static std::string YELLOW; | ||||
|     static std::string BLUE  ; | ||||
|     static std::string PURPLE; | ||||
|     static std::string CYAN  ; | ||||
|     static std::string WHITE ; | ||||
|     static std::string NORMAL; | ||||
|      | ||||
|  Logger(std::string topNm, int on, std::string nm,std::string col) | ||||
|    : active(on), name(nm), topName(topNm), COLOUR(col) {}; | ||||
|      | ||||
|     void Active(int on) {active = on;}; | ||||
|     int  isActive(void) {return active;}; | ||||
|      | ||||
|     friend std::ostream& operator<< (std::ostream& stream, const Logger& log){ | ||||
|         if ( log.active ) { | ||||
|             StopWatch.Stop(); | ||||
|             GridTime now = StopWatch.Elapsed(); | ||||
|             StopWatch.Start(); | ||||
|             stream << BLACK <<std::setw(8) << std::left << log.topName << BLACK<< " : "; | ||||
|             stream << log.COLOUR <<std::setw(11)  << log.name << BLACK << " : "; | ||||
|             stream << YELLOW <<std::setw(6) << now <<BLACK << " : " ; | ||||
|             stream << log.COLOUR; | ||||
|             return stream; | ||||
|         } else {  | ||||
|             return devnull; | ||||
|         } | ||||
| public: | ||||
|   static GridStopWatch StopWatch; | ||||
|   static std::ostream devnull; | ||||
|  | ||||
|   std::string background() {return Painter.colour["NORMAL"];} | ||||
|   std::string evidence() {return Painter.colour["YELLOW"];} | ||||
|   std::string colour() {return Painter.colour[COLOUR];} | ||||
|  | ||||
|   Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) | ||||
|   : active(on), | ||||
|   name(nm), | ||||
|   topName(topNm), | ||||
|   Painter(col_class), | ||||
|   COLOUR(col){} ; | ||||
|    | ||||
|   void Active(int on) {active = on;}; | ||||
|   int  isActive(void) {return active;}; | ||||
|    | ||||
|   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ | ||||
|  | ||||
|     if ( log.active ) { | ||||
|       StopWatch.Stop(); | ||||
|       GridTime now = StopWatch.Elapsed(); | ||||
|       StopWatch.Start(); | ||||
|       stream << log.background()<< log.topName << log.background()<< " : "; | ||||
|       stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : "; | ||||
|       stream << log.evidence()<< now << log.background() << " : " << log.colour(); | ||||
|       return stream; | ||||
|     } else {  | ||||
|       return devnull; | ||||
|     } | ||||
|      | ||||
|   } | ||||
|  | ||||
| }; | ||||
|      | ||||
|  | ||||
| class GridLogger: public Logger { | ||||
| public: | ||||
|  GridLogger(int on, std::string nm, std::string col = Logger::BLACK): Logger("Grid", on, nm, col){}; | ||||
|   GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"): | ||||
|   Logger("Grid", on, nm, col_class, col_key){}; | ||||
| }; | ||||
|  | ||||
| void GridLogConfigure(std::vector<std::string> &logstreams); | ||||
| @@ -95,38 +141,40 @@ extern GridLogger GridLogDebug  ; | ||||
| extern GridLogger GridLogPerformance; | ||||
| extern GridLogger GridLogIterative  ; | ||||
| extern GridLogger GridLogIntegrator  ; | ||||
| extern Colours    GridLogColours; | ||||
|  | ||||
|  | ||||
| #define _NBACKTRACE (256) | ||||
| extern void * Grid_backtrace_buffer[_NBACKTRACE]; | ||||
|  | ||||
| #define BACKTRACEFILE() {\ | ||||
|     char string[20];					\ | ||||
|     std::sprintf(string,"backtrace.%d",Rank());				\ | ||||
|     std::FILE * fp = std::fopen(string,"w");				\ | ||||
|     BACKTRACEFP(fp)\ | ||||
|     std::fclose(fp);	    \ | ||||
| char string[20];					\ | ||||
| std::sprintf(string,"backtrace.%d",Rank());				\ | ||||
| std::FILE * fp = std::fopen(string,"w");				\ | ||||
| BACKTRACEFP(fp)\ | ||||
| std::fclose(fp);	    \ | ||||
| } | ||||
|  | ||||
|  | ||||
| #ifdef HAVE_EXECINFO_H | ||||
| #define BACKTRACEFP(fp) { \ | ||||
|   int symbols    = backtrace        (Grid_backtrace_buffer,_NBACKTRACE);\ | ||||
|   char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\ | ||||
|   for (int i = 0; i < symbols; i++){\ | ||||
|     std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \ | ||||
|   }\ | ||||
| int symbols    = backtrace        (Grid_backtrace_buffer,_NBACKTRACE);\ | ||||
| char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\ | ||||
| for (int i = 0; i < symbols; i++){\ | ||||
|   std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \ | ||||
| }\ | ||||
| } | ||||
| #else  | ||||
| #define BACKTRACEFP(fp) { \ | ||||
|     std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \ | ||||
|     std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \ | ||||
|     std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \ | ||||
|     std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \ | ||||
| std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \ | ||||
| std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \ | ||||
| std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \ | ||||
| std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \ | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #define BACKTRACE() BACKTRACEFP(stdout)  | ||||
|  | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -1,6 +1,3 @@ | ||||
| # additional include paths necessary to compile the C++ library | ||||
| AM_CXXFLAGS = -I$(top_srcdir)/ | ||||
|  | ||||
| extra_sources= | ||||
| if BUILD_COMMS_MPI | ||||
|   extra_sources+=communicator/Communicator_mpi.cc | ||||
| @@ -17,16 +14,11 @@ endif | ||||
| # | ||||
| # Libraries | ||||
| # | ||||
|  | ||||
| include Make.inc | ||||
| include Eigen.inc | ||||
|  | ||||
| lib_LIBRARIES = libGrid.a | ||||
| libGrid_a_SOURCES = $(CCFILES) $(extra_sources) | ||||
|  | ||||
|  | ||||
| #	qcd/action/fermion/PartialFractionFermion5D.cc\	\ | ||||
| # | ||||
| # Include files | ||||
| # | ||||
| nobase_include_HEADERS=$(HFILES) | ||||
|  | ||||
| libGrid_a_SOURCES              = $(CCFILES) $(extra_sources) | ||||
| libGrid_adir                   = $(pkgincludedir) | ||||
| nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h | ||||
|   | ||||
| @@ -32,28 +32,44 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| namespace Grid { | ||||
|  | ||||
| #define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16)) | ||||
|  | ||||
| #define RawConfig(A,B) (A<<8|B) | ||||
| const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = { | ||||
| #ifdef __linux__ | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." }, | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." }, | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES    ,  "CACHE_REFERENCES..." }, | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES        ,  "CACHE_MISSES......." }, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS)       ,  "L1D_READ_MISS......"}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS)     ,  "L1D_READ_ACCESS...."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS)      ,  "L1D_WRITE_MISS....."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS)    ,  "L1D_WRITE_ACCESS..."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS)   ,  "L1D_PREFETCH_MISS.."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS"}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS)        ,  "LL_READ_MISS......."}, | ||||
|   //  { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS)      ,  "LL_READ_ACCESS....."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS)       ,  "LL_WRITE_MISS......"}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS)     ,  "LL_WRITE_ACCESS...."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS)    ,  "LL_PREFETCH_MISS..."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS)  ,  "LL_PREFETCH_ACCESS."}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS)       ,  "L1I_READ_MISS......"}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS)     ,  "L1I_READ_ACCESS...."} | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES    ,  "CACHE_REFERENCES..." , INSTRUCTIONS}, | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES        ,  "CACHE_MISSES......." , CACHE_REFERENCES}, | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." , INSTRUCTIONS}, | ||||
|   { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." , CPUCYCLES   }, | ||||
|     // 4 | ||||
| #ifdef AVX512 | ||||
|     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES    }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS  }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS    }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS  }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS  }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS }, | ||||
|     { PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS }, | ||||
|     // 11 | ||||
| #else | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS)     ,  "L1D_READ_ACCESS....",INSTRUCTIONS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS)       ,  "L1D_READ_MISS......",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS)      ,  "L1D_WRITE_MISS.....",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS)    ,  "L1D_WRITE_ACCESS...",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS)   ,  "L1D_PREFETCH_MISS..",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS}, | ||||
|     // 11 | ||||
| #endif | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS)        ,  "LL_READ_MISS.......",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS)      ,  "LL_READ_ACCESS.....",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS)       ,  "LL_WRITE_MISS......",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS)     ,  "LL_WRITE_ACCESS....",L1D_READ_ACCESS}, | ||||
|     //15 | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS)    ,  "LL_PREFETCH_MISS...",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS)  ,  "LL_PREFETCH_ACCESS.",L1D_READ_ACCESS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS)       ,  "L1I_READ_MISS......",INSTRUCTIONS}, | ||||
|   { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS)     ,  "L1I_READ_ACCESS....",INSTRUCTIONS} | ||||
|     //19 | ||||
|   //  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" }, | ||||
| #endif | ||||
| }; | ||||
| } | ||||
|   | ||||
| @@ -58,6 +58,27 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #ifdef TIMERS_OFF | ||||
|  | ||||
|  | ||||
| inline uint64_t cyclecount(void){  | ||||
|   return 0; | ||||
| } | ||||
| #define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx") | ||||
| #define __SSC_STOP  __SSC_MARK(0x110) | ||||
| #define __SSC_START __SSC_MARK(0x111) | ||||
|  | ||||
|  | ||||
| #else | ||||
|  | ||||
| #define __SSC_MARK(mark)  | ||||
| #define __SSC_STOP   | ||||
| #define __SSC_START  | ||||
|  | ||||
| /* | ||||
|  * cycle counters arch dependent | ||||
|  */ | ||||
|  | ||||
| #ifdef __bgq__ | ||||
| inline uint64_t cyclecount(void){  | ||||
|    uint64_t tmp; | ||||
| @@ -65,18 +86,20 @@ inline uint64_t cyclecount(void){ | ||||
|    return tmp; | ||||
| } | ||||
| #elif defined __x86_64__ | ||||
| #include <immintrin.h> | ||||
| #ifndef __INTEL_COMPILER | ||||
| #include <x86intrin.h> | ||||
| #endif | ||||
| inline uint64_t cyclecount(void){ | ||||
|    return __rdtsc(); | ||||
| inline uint64_t cyclecount(void){  | ||||
|   return __rdtsc(); | ||||
|   //  unsigned int dummy; | ||||
|   // return __rdtscp(&dummy); | ||||
| } | ||||
| #else | ||||
| #warning No cycle counter implemented for this architecture | ||||
|  | ||||
| inline uint64_t cyclecount(void){  | ||||
|    return 0; | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
|  | ||||
| class PerformanceCounter { | ||||
| @@ -87,6 +110,7 @@ private: | ||||
|     uint32_t type; | ||||
|     uint64_t config; | ||||
|     const char *name; | ||||
|     int normalisation; | ||||
|   } PerformanceCounterConfig;  | ||||
|    | ||||
|   static const PerformanceCounterConfig PerformanceCounterConfigs []; | ||||
| @@ -94,26 +118,12 @@ private: | ||||
| public: | ||||
|  | ||||
|   enum PerformanceCounterType { | ||||
|     CPUCYCLES=0, | ||||
|     INSTRUCTIONS, | ||||
|     //    STALL_CYCLES, | ||||
|     CACHE_REFERENCES, | ||||
|     CACHE_MISSES, | ||||
|     L1D_READ_MISS, | ||||
|     L1D_READ_ACCESS, | ||||
|     L1D_WRITE_MISS, | ||||
|     L1D_WRITE_ACCESS, | ||||
|     L1D_PREFETCH_MISS, | ||||
|     L1D_PREFETCH_ACCESS, | ||||
|     LL_READ_MISS, | ||||
|     //    LL_READ_ACCESS, | ||||
|     LL_WRITE_MISS, | ||||
|     LL_WRITE_ACCESS, | ||||
|     LL_PREFETCH_MISS, | ||||
|     LL_PREFETCH_ACCESS, | ||||
|     L1I_READ_MISS, | ||||
|     L1I_READ_ACCESS, | ||||
|     PERFORMANCE_COUNTER_NUM_TYPES | ||||
|     CACHE_REFERENCES=0, | ||||
|     CACHE_MISSES=1, | ||||
|     CPUCYCLES=2, | ||||
|     INSTRUCTIONS=3, | ||||
|     L1D_READ_ACCESS=4, | ||||
|     PERFORMANCE_COUNTER_NUM_TYPES=19 | ||||
|   }; | ||||
|  | ||||
| public: | ||||
| @@ -121,7 +131,9 @@ public: | ||||
|   int PCT; | ||||
|  | ||||
|   long long count; | ||||
|   long long cycles; | ||||
|   int fd; | ||||
|   int cyclefd; | ||||
|   unsigned long long elapsed; | ||||
|   uint64_t begin; | ||||
|  | ||||
| @@ -134,7 +146,9 @@ public: | ||||
|     assert(_pct>=0); | ||||
|     assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES); | ||||
|     fd=-1; | ||||
|     cyclefd=-1; | ||||
|     count=0; | ||||
|     cycles=0; | ||||
|     PCT =_pct; | ||||
|     Open(); | ||||
| #endif | ||||
| @@ -159,6 +173,15 @@ public: | ||||
|       fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name); | ||||
|       perror("Error is"); | ||||
|     } | ||||
|     int norm = PerformanceCounterConfigs[PCT].normalisation; | ||||
|     pe.type  = PerformanceCounterConfigs[norm].type; | ||||
|     pe.config= PerformanceCounterConfigs[norm].config; | ||||
|     name = PerformanceCounterConfigs[norm].name; | ||||
|     cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1 | ||||
|     if (cyclefd == -1) { | ||||
|       fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name); | ||||
|       perror("Error is"); | ||||
|     } | ||||
| #endif | ||||
|   } | ||||
|  | ||||
| @@ -168,6 +191,8 @@ public: | ||||
|     if ( fd!= -1) { | ||||
|       ::ioctl(fd, PERF_EVENT_IOC_RESET, 0); | ||||
|       ::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); | ||||
|       ::ioctl(cyclefd, PERF_EVENT_IOC_RESET, 0); | ||||
|       ::ioctl(cyclefd, PERF_EVENT_IOC_ENABLE, 0); | ||||
|     } | ||||
|     begin  =cyclecount(); | ||||
| #else | ||||
| @@ -177,10 +202,13 @@ public: | ||||
|  | ||||
|   void Stop(void) { | ||||
|     count=0; | ||||
|     cycles=0; | ||||
| #ifdef __linux__ | ||||
|     if ( fd!= -1) { | ||||
|       ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); | ||||
|       ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); | ||||
|       ::read(fd, &count, sizeof(long long)); | ||||
|       ::read(cyclefd, &cycles, sizeof(long long)); | ||||
|     } | ||||
|     elapsed = cyclecount() - begin; | ||||
| #else | ||||
| @@ -190,7 +218,11 @@ public: | ||||
|   } | ||||
|   void Report(void) { | ||||
| #ifdef __linux__ | ||||
|     std::printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count); | ||||
|     int N = PerformanceCounterConfigs[PCT].normalisation; | ||||
|     const char * sn = PerformanceCounterConfigs[N].name ; | ||||
|     const char * sc = PerformanceCounterConfigs[PCT].name; | ||||
|       std::printf("tsc = %llu %s = %llu  %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles,  | ||||
| 		  sc, count, sc,sn, (double)count/(double)cycles); | ||||
| #else | ||||
|     std::printf("%llu cycles \n", elapsed ); | ||||
| #endif | ||||
| @@ -199,7 +231,7 @@ public: | ||||
|   ~PerformanceCounter() | ||||
|   { | ||||
| #ifdef __linux__ | ||||
|     ::close(fd); | ||||
|     ::close(fd);    ::close(cyclefd); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   | ||||
							
								
								
									
										49
									
								
								lib/Simd.h
									
									
									
									
									
								
							
							
						
						
									
										49
									
								
								lib/Simd.h
									
									
									
									
									
								
							| @@ -1,32 +1,33 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/Simd.h | ||||
| Source file: ./lib/Simd.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_SIMD_H | ||||
| #define GRID_SIMD_H | ||||
|  | ||||
| @@ -118,6 +119,14 @@ namespace Grid { | ||||
|   inline ComplexD timesI(const ComplexD &r)     { return(r*ComplexD(0.0,1.0));} | ||||
|   inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));} | ||||
|   inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));} | ||||
|  | ||||
|   // define projections to real and imaginay parts | ||||
|   inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));} | ||||
|   inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));} | ||||
|   inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));} | ||||
|   inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));} | ||||
|  | ||||
|   // define auxiliary functions for complex computations | ||||
|   inline void timesI(ComplexF &ret,const ComplexF &r)     { ret = timesI(r);} | ||||
|   inline void timesI(ComplexD &ret,const ComplexD &r)     { ret = timesI(r);} | ||||
|   inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);} | ||||
| @@ -163,8 +172,8 @@ namespace Grid { | ||||
|  | ||||
| }; | ||||
|  | ||||
| #include <simd/Grid_vector_types.h> | ||||
| #include <simd/Grid_vector_unops.h> | ||||
| #include "simd/Grid_vector_types.h" | ||||
| #include "simd/Grid_vector_unops.h" | ||||
|  | ||||
| namespace Grid { | ||||
|   // Default precision | ||||
|   | ||||
							
								
								
									
										247
									
								
								lib/Stat.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										247
									
								
								lib/Stat.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,247 @@ | ||||
| #include <Grid.h> | ||||
| #include <PerfCount.h> | ||||
| #include <Stat.h> | ||||
|  | ||||
|  | ||||
| namespace Grid {  | ||||
|  | ||||
|  | ||||
| bool PmuStat::pmu_initialized=false; | ||||
|  | ||||
|  | ||||
| void PmuStat::init(const char *regname) | ||||
| { | ||||
| #ifdef __x86_64__ | ||||
|   name = regname; | ||||
|   if (!pmu_initialized) | ||||
|     { | ||||
|       std::cout<<"initialising pmu"<<std::endl; | ||||
|       pmu_initialized = true; | ||||
|       pmu_init(); | ||||
|     } | ||||
|   clear(); | ||||
| #endif | ||||
| } | ||||
| void PmuStat::clear(void) | ||||
| { | ||||
| #ifdef __x86_64__ | ||||
|   count = 0; | ||||
|   tregion = 0; | ||||
|   pmc0 = 0; | ||||
|   pmc1 = 0; | ||||
|   inst = 0; | ||||
|   cyc = 0; | ||||
|   ref = 0; | ||||
|   tcycles = 0; | ||||
|   reads = 0; | ||||
|   writes = 0; | ||||
| #endif | ||||
| } | ||||
| void PmuStat::print(void) | ||||
| { | ||||
| #ifdef __x86_64__ | ||||
|   std::cout <<"Reg "<<std::string(name)<<":\n"; | ||||
|   std::cout <<"  region "<<tregion<<std::endl; | ||||
|   std::cout <<"  cycles "<<tcycles<<std::endl; | ||||
|   std::cout <<"  inst   "<<inst   <<std::endl; | ||||
|   std::cout <<"  cyc    "<<cyc    <<std::endl; | ||||
|   std::cout <<"  ref    "<<ref    <<std::endl; | ||||
|   std::cout <<"  pmc0   "<<pmc0   <<std::endl; | ||||
|   std::cout <<"  pmc1   "<<pmc1   <<std::endl; | ||||
|   std::cout <<"  count  "<<count  <<std::endl; | ||||
|   std::cout <<"  reads  "<<reads  <<std::endl; | ||||
|   std::cout <<"  writes "<<writes <<std::endl; | ||||
| #endif | ||||
| } | ||||
| void PmuStat::start(void) | ||||
| { | ||||
| #ifdef __x86_64__ | ||||
|   pmu_start(); | ||||
|   ++count; | ||||
|   xmemctrs(&mrstart, &mwstart); | ||||
|   tstart = __rdtsc(); | ||||
| #endif | ||||
| } | ||||
| void PmuStat::enter(int t) | ||||
| { | ||||
| #ifdef __x86_64__ | ||||
|   counters[0][t] = __rdpmc(0); | ||||
|   counters[1][t] = __rdpmc(1); | ||||
|   counters[2][t] = __rdpmc((1<<30)|0); | ||||
|   counters[3][t] = __rdpmc((1<<30)|1); | ||||
|   counters[4][t] = __rdpmc((1<<30)|2); | ||||
|   counters[5][t] = __rdtsc(); | ||||
| #endif | ||||
| } | ||||
| void PmuStat::exit(int t) | ||||
| { | ||||
| #ifdef __x86_64__ | ||||
|   counters[0][t] = __rdpmc(0) - counters[0][t]; | ||||
|   counters[1][t] = __rdpmc(1) - counters[1][t]; | ||||
|   counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t]; | ||||
|   counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t]; | ||||
|   counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t]; | ||||
|   counters[5][t] = __rdtsc() - counters[5][t]; | ||||
| #endif | ||||
| } | ||||
| void PmuStat::accum(int nthreads) | ||||
| { | ||||
| #ifdef __x86_64__ | ||||
|   tend = __rdtsc(); | ||||
|   xmemctrs(&mrend, &mwend); | ||||
|   pmu_stop(); | ||||
|   for (int t = 0; t < nthreads; ++t) { | ||||
|     pmc0 += counters[0][t]; | ||||
|     pmc1 += counters[1][t]; | ||||
|     inst += counters[2][t]; | ||||
|     cyc += counters[3][t]; | ||||
|     ref += counters[4][t]; | ||||
|     tcycles += counters[5][t]; | ||||
|   } | ||||
|   uint64_t region = tend - tstart; | ||||
|   tregion += region; | ||||
|   uint64_t mreads = mrend - mrstart; | ||||
|   reads += mreads; | ||||
|   uint64_t mwrites = mwend - mwstart; | ||||
|   writes += mwrites; | ||||
| #endif | ||||
| } | ||||
|  | ||||
|  | ||||
| void PmuStat::pmu_fini(void) {} | ||||
| void PmuStat::pmu_start(void) {}; | ||||
| void PmuStat::pmu_stop(void) {}; | ||||
| void PmuStat::pmu_init(void) | ||||
| { | ||||
| #ifdef _KNIGHTS_LANDING_ | ||||
|   KNLsetup(); | ||||
| #endif | ||||
| } | ||||
| void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw) | ||||
| { | ||||
| #ifdef _KNIGHTS_LANDING_ | ||||
|   ctrs c; | ||||
|   KNLreadctrs(c); | ||||
|   uint64_t emr = 0, emw = 0; | ||||
|   for (int i = 0; i < NEDC; ++i) | ||||
|     { | ||||
|       emr += c.edcrd[i]; | ||||
|       emw += c.edcwr[i]; | ||||
|     } | ||||
|   *mr = emr; | ||||
|   *mw = emw; | ||||
| #else | ||||
|   *mr = *mw = 0; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| #ifdef _KNIGHTS_LANDING_ | ||||
|  | ||||
| struct knl_gbl_ PmuStat::gbl; | ||||
|  | ||||
| #define PMU_MEM | ||||
|  | ||||
| void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask) | ||||
| { | ||||
|   char fname[1024]; | ||||
|   snprintf(fname, sizeof(fname), "%s/type", ename); | ||||
|   FILE *fp = fopen(fname, "r"); | ||||
|   if (fp == 0) { | ||||
|     ::printf("open %s", fname); | ||||
|     ::exit(0); | ||||
|   } | ||||
|   int type; | ||||
|   int ret = fscanf(fp, "%d", &type); | ||||
|   assert(ret == 1); | ||||
|   fclose(fp); | ||||
|   //  std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl; | ||||
|  | ||||
|   struct perf_event_attr hw = {}; | ||||
|   hw.size = sizeof(hw); | ||||
|   hw.type = type; | ||||
|   // see /sys/devices/uncore_*/format/* | ||||
|   // All of the events we are interested in are configured the same way, but | ||||
|   // that isn't always true. Proper code would parse the format files | ||||
|   hw.config = event | (umask << 8); | ||||
|   //hw.read_format = PERF_FORMAT_GROUP; | ||||
|   // unfortunately the above only works within a single PMU; might | ||||
|   // as well just read them one at a time | ||||
|   int cpu = 0; | ||||
|   fd = perf_event_open(&hw, -1, cpu, -1, 0); | ||||
|   if (fd == -1) { | ||||
|     ::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config); | ||||
|     ::exit(0); | ||||
|   } else {  | ||||
|     //    std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
|  void PmuStat::KNLsetup(void){ | ||||
|  | ||||
|    int ret; | ||||
|    char fname[1024]; | ||||
|  | ||||
|    // MC RPQ inserts and WPQ inserts (reads & writes) | ||||
|    for (int mc = 0; mc < NMC; ++mc) | ||||
|      { | ||||
|        ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc); | ||||
|        // RPQ Inserts | ||||
|        KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1); | ||||
|        // WPQ Inserts | ||||
|        KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1); | ||||
|      } | ||||
|    // EDC RPQ inserts and WPQ inserts | ||||
|    for (int edc=0; edc < NEDC; ++edc) | ||||
|      { | ||||
|        ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc); | ||||
|        // RPQ inserts | ||||
|        KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1); | ||||
|        // WPQ inserts | ||||
|        KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1); | ||||
|      } | ||||
|    // EDC HitE, HitM, MissE, MissM | ||||
|    for (int edc=0; edc < NEDC; ++edc) | ||||
|      { | ||||
|        ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc); | ||||
|        KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1); | ||||
|        KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2); | ||||
|        KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4); | ||||
|        KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8); | ||||
|      } | ||||
|  } | ||||
|  | ||||
| uint64_t PmuStat::KNLreadctr(int fd) | ||||
| { | ||||
|   uint64_t data; | ||||
|   size_t s = ::read(fd, &data, sizeof(data)); | ||||
|   if (s != sizeof(uint64_t)){ | ||||
|     ::printf("read counter %lu", s); | ||||
|     ::exit(0); | ||||
|   } | ||||
|   return data; | ||||
| } | ||||
|  | ||||
| void PmuStat::KNLreadctrs(ctrs &c) | ||||
| { | ||||
|   for (int i = 0; i < NMC; ++i) | ||||
|     { | ||||
|       c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]); | ||||
|       c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]); | ||||
|     } | ||||
|   for (int i = 0; i < NEDC; ++i) | ||||
|     { | ||||
|       c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]); | ||||
|       c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]); | ||||
|     } | ||||
|   for (int i = 0; i < NEDC; ++i) | ||||
|     { | ||||
|       c.edchite[i] = KNLreadctr(gbl.edc_hite[i]); | ||||
|       c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]); | ||||
|       c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]); | ||||
|       c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]); | ||||
|     } | ||||
| } | ||||
|  | ||||
| #endif | ||||
| } | ||||
							
								
								
									
										104
									
								
								lib/Stat.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								lib/Stat.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | ||||
| #ifndef _GRID_STAT_H | ||||
| #define _GRID_STAT_H | ||||
|  | ||||
| #ifdef AVX512 | ||||
| #define _KNIGHTS_LANDING_ROOTONLY | ||||
| #endif | ||||
|  | ||||
| namespace Grid {  | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////////////// | ||||
| // Extra KNL counters from MCDRAM | ||||
| /////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef _KNIGHTS_LANDING_ | ||||
| #define NMC 6 | ||||
| #define NEDC 8 | ||||
| struct ctrs | ||||
| { | ||||
|     uint64_t mcrd[NMC]; | ||||
|     uint64_t mcwr[NMC]; | ||||
|     uint64_t edcrd[NEDC];  | ||||
|     uint64_t edcwr[NEDC]; | ||||
|     uint64_t edchite[NEDC]; | ||||
|     uint64_t edchitm[NEDC]; | ||||
|     uint64_t edcmisse[NEDC]; | ||||
|     uint64_t edcmissm[NEDC]; | ||||
| }; | ||||
| // Peter/Azusa: | ||||
| // Our modification of a code provided by Larry Meadows from Intel | ||||
| // Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS | ||||
| // so is already public and in the linux kernel for KNL. | ||||
| struct knl_gbl_ | ||||
| { | ||||
|   int mc_rd[NMC]; | ||||
|   int mc_wr[NMC]; | ||||
|   int edc_rd[NEDC]; | ||||
|   int edc_wr[NEDC]; | ||||
|   int edc_hite[NEDC]; | ||||
|   int edc_hitm[NEDC]; | ||||
|   int edc_misse[NEDC]; | ||||
|   int edc_missm[NEDC]; | ||||
| }; | ||||
| #endif | ||||
| /////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| class PmuStat | ||||
| { | ||||
|     uint64_t counters[8][256]; | ||||
| #ifdef _KNIGHTS_LANDING_ | ||||
|     static struct knl_gbl_ gbl; | ||||
| #endif | ||||
|     const char *name; | ||||
|  | ||||
|     uint64_t reads;     // memory reads | ||||
|     uint64_t writes;    // memory writes | ||||
|     uint64_t mrstart;   // memory read counter at start of parallel region | ||||
|     uint64_t mrend;     // memory read counter at end of parallel region | ||||
|     uint64_t mwstart;   // memory write counter at start of parallel region | ||||
|     uint64_t mwend;     // memory write counter at end of parallel region | ||||
|  | ||||
|     // cumulative counters | ||||
|     uint64_t count;     // number of invocations | ||||
|     uint64_t tregion;   // total time in parallel region (from thread 0) | ||||
|     uint64_t tcycles;   // total cycles inside parallel region | ||||
|     uint64_t inst, ref, cyc;   // fixed counters | ||||
|     uint64_t pmc0, pmc1;// pmu | ||||
|     // add memory counters here | ||||
|     // temp variables | ||||
|     uint64_t tstart;    // tsc at start of parallel region | ||||
|     uint64_t tend;      // tsc at end of parallel region | ||||
|     // map for ctrs values | ||||
|     // 0 pmc0 start | ||||
|     // 1 pmc0 end | ||||
|     // 2 pmc1 start | ||||
|     // 3 pmc1 end | ||||
|     // 4 tsc start | ||||
|     // 5 tsc end | ||||
|     static bool pmu_initialized; | ||||
| public: | ||||
|     static bool is_init(void){ return pmu_initialized;} | ||||
|     static void pmu_init(void); | ||||
|     static void pmu_fini(void); | ||||
|     static void pmu_start(void); | ||||
|     static void pmu_stop(void); | ||||
|     void accum(int nthreads); | ||||
|     static void xmemctrs(uint64_t *mr, uint64_t *mw); | ||||
|     void start(void); | ||||
|     void enter(int t); | ||||
|     void exit(int t); | ||||
|     void print(void); | ||||
|     void init(const char *regname); | ||||
|     void clear(void); | ||||
| #ifdef _KNIGHTS_LANDING_ | ||||
|     static void     KNLsetup(void); | ||||
|     static uint64_t KNLreadctr(int fd); | ||||
|     static void     KNLreadctrs(ctrs &c); | ||||
|     static void     KNLevsetup(const char *ename, int &fd, int event, int umask); | ||||
| #endif | ||||
|      | ||||
|   }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|  | ||||
|  | ||||
							
								
								
									
										1814
									
								
								lib/Stencil.h
									
									
									
									
									
								
							
							
						
						
									
										1814
									
								
								lib/Stencil.h
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -30,22 +30,22 @@ Author: neo <cossu@post.kek.jp> | ||||
| #ifndef GRID_MATH_H | ||||
| #define GRID_MATH_H | ||||
|  | ||||
| #include <tensors/Tensor_traits.h> | ||||
| #include <tensors/Tensor_class.h> | ||||
| #include <tensors/Tensor_arith.h> | ||||
| #include <tensors/Tensor_inner.h> | ||||
| #include <tensors/Tensor_outer.h> | ||||
| #include <tensors/Tensor_transpose.h> | ||||
| #include <tensors/Tensor_trace.h> | ||||
| #include <tensors/Tensor_index.h> | ||||
| #include <tensors/Tensor_Ta.h> | ||||
| #include <tensors/Tensor_determinant.h> | ||||
| #include <tensors/Tensor_exp.h> | ||||
| //#include <tensors/Tensor_peek.h> | ||||
| //#include <tensors/Tensor_poke.h> | ||||
| #include <tensors/Tensor_reality.h> | ||||
| #include <tensors/Tensor_unary.h> | ||||
| #include <tensors/Tensor_extract_merge.h> | ||||
| #include <tensors/Tensor_logical.h> | ||||
| #include <Grid/tensors/Tensor_traits.h> | ||||
| #include <Grid/tensors/Tensor_class.h> | ||||
| #include <Grid/tensors/Tensor_arith.h> | ||||
| #include <Grid/tensors/Tensor_inner.h> | ||||
| #include <Grid/tensors/Tensor_outer.h> | ||||
| #include <Grid/tensors/Tensor_transpose.h> | ||||
| #include <Grid/tensors/Tensor_trace.h> | ||||
| #include <Grid/tensors/Tensor_index.h> | ||||
| #include <Grid/tensors/Tensor_Ta.h> | ||||
| #include <Grid/tensors/Tensor_determinant.h> | ||||
| #include <Grid/tensors/Tensor_exp.h> | ||||
| //#include <Grid/tensors/Tensor_peek.h> | ||||
| //#include <Grid/tensors/Tensor_poke.h> | ||||
| #include <Grid/tensors/Tensor_reality.h> | ||||
| #include <Grid/tensors/Tensor_unary.h> | ||||
| #include <Grid/tensors/Tensor_extract_merge.h> | ||||
| #include <Grid/tensors/Tensor_logical.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -37,7 +37,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| #ifdef GRID_OMP | ||||
| #include <omp.h> | ||||
| #define PARALLEL_FOR_LOOP _Pragma("omp parallel for ") | ||||
| #ifdef GRID_NUMA | ||||
| #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") | ||||
| #else | ||||
| #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") | ||||
| #endif | ||||
| #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") | ||||
| #else | ||||
| #define PARALLEL_FOR_LOOP  | ||||
|   | ||||
							
								
								
									
										14
									
								
								lib/Timer.h
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								lib/Timer.h
									
									
									
									
									
								
							| @@ -39,7 +39,13 @@ namespace Grid { | ||||
|   // Dress the output; use std::chrono | ||||
|  | ||||
| // C++11 time facilities better? | ||||
| double usecond(void); | ||||
| inline double usecond(void) { | ||||
|   struct timeval tv; | ||||
| #ifdef TIMERS_ON | ||||
|   gettimeofday(&tv,NULL); | ||||
| #endif | ||||
|   return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec; | ||||
| } | ||||
|  | ||||
| typedef  std::chrono::system_clock          GridClock; | ||||
| typedef  std::chrono::time_point<GridClock> GridTimePoint; | ||||
| @@ -63,17 +69,23 @@ public: | ||||
|   } | ||||
|   void     Start(void) {  | ||||
|     assert(running == false); | ||||
| #ifdef TIMERS_ON | ||||
|     start = GridClock::now();  | ||||
| #endif | ||||
|     running = true; | ||||
|   } | ||||
|   void     Stop(void)  {  | ||||
|     assert(running == true); | ||||
| #ifdef TIMERS_ON | ||||
|     accumulator+= std::chrono::duration_cast<GridUsecs>(GridClock::now()-start);  | ||||
| #endif | ||||
|     running = false;  | ||||
|   }; | ||||
|   void     Reset(void){ | ||||
|     running = false; | ||||
| #ifdef TIMERS_ON | ||||
|     start = GridClock::now(); | ||||
| #endif | ||||
|     accumulator = std::chrono::duration_cast<GridUsecs>(start-start);  | ||||
|   } | ||||
|   GridTime Elapsed(void) { | ||||
|   | ||||
| @@ -31,7 +31,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_ALGORITHM_COARSENED_MATRIX_H | ||||
| #define  GRID_ALGORITHM_COARSENED_MATRIX_H | ||||
|  | ||||
| #include <Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -28,7 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_ALGORITHM_SPARSE_MATRIX_H | ||||
| #define  GRID_ALGORITHM_SPARSE_MATRIX_H | ||||
|  | ||||
| #include <Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,8 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_CHEBYSHEV_H | ||||
| #define GRID_CHEBYSHEV_H | ||||
|  | ||||
| #include<Grid.h> | ||||
| #include<algorithms/LinearOperator.h> | ||||
| #include <Grid/algorithms/LinearOperator.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -18,10 +18,10 @@ | ||||
| #include <stddef.h> | ||||
| #include <Config.h> | ||||
|  | ||||
| #ifdef HAVE_GMP_H | ||||
| #include <algorithms/approx/bigfloat.h> | ||||
| #ifdef HAVE_LIBGMP | ||||
| #include "bigfloat.h" | ||||
| #else | ||||
| #include <algorithms/approx/bigfloat_double.h> | ||||
| #include "bigfloat_double.h" | ||||
| #endif | ||||
|  | ||||
| #define JMAX 10000 //Maximum number of iterations of Newton's approximation | ||||
|   | ||||
| @@ -1,150 +1,168 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradient.h | ||||
| Source file: ./lib/algorithms/iterative/ConjugateGradient.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_CONJUGATE_GRADIENT_H | ||||
| #define GRID_CONJUGATE_GRADIENT_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Base classes for iterative processes based on operators | ||||
|     // single input vec, single output vec. | ||||
|     ///////////////////////////////////////////////////////////// | ||||
| ///////////////////////////////////////////////////////////// | ||||
| // Base classes for iterative processes based on operators | ||||
| // single input vec, single output vec. | ||||
| ///////////////////////////////////////////////////////////// | ||||
|  | ||||
|   template<class Field>  | ||||
|     class ConjugateGradient : public OperatorFunction<Field> { | ||||
| public:                                                 | ||||
|     RealD   Tolerance; | ||||
|     Integer MaxIterations; | ||||
|     ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {  | ||||
|     }; | ||||
| template <class Field> | ||||
| class ConjugateGradient : public OperatorFunction<Field> { | ||||
|  public: | ||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|       : Tolerance(tol), | ||||
|         MaxIterations(maxit), | ||||
|         ErrorOnNoConverge(err_on_no_conv){}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, | ||||
|                   Field &psi) { | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
|     void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){ | ||||
|     RealD cp, c, a, d, b, ssq, qq, b_pred; | ||||
|  | ||||
|       psi.checkerboard = src.checkerboard; | ||||
|       conformable(psi,src); | ||||
|     Field p(src); | ||||
|     Field mmp(src); | ||||
|     Field r(src); | ||||
|  | ||||
|       RealD cp,c,a,d,b,ssq,qq,b_pred; | ||||
|        | ||||
|       Field   p(src); | ||||
|       Field mmp(src); | ||||
|       Field   r(src); | ||||
|        | ||||
|       //Initial residual computation & set up | ||||
|       RealD guess = norm2(psi); | ||||
|       assert(std::isnan(guess)==0); | ||||
|     // Initial residual computation & set up | ||||
|     RealD guess = norm2(psi); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|       Linop.HermOpAndNorm(psi,mmp,d,b); | ||||
|        | ||||
|       r= src-mmp; | ||||
|       p= r; | ||||
|        | ||||
|       a  =norm2(p); | ||||
|       cp =a; | ||||
|       ssq=norm2(src); | ||||
|      | ||||
|     Linop.HermOpAndNorm(psi, mmp, d, b); | ||||
|      | ||||
|  | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:  cp,r "<<cp   <<std::endl; | ||||
|       std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl; | ||||
|     r = src - mmp; | ||||
|     p = r; | ||||
|  | ||||
|       RealD rsq =  Tolerance* Tolerance*ssq; | ||||
|        | ||||
|       //Check if guess is really REALLY good :) | ||||
|       if ( cp <= rsq ) { | ||||
| 	return; | ||||
|       } | ||||
|        | ||||
|       std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl; | ||||
|     a = norm2(p); | ||||
|     cp = a; | ||||
|     ssq = norm2(src); | ||||
|  | ||||
|       GridStopWatch LinalgTimer; | ||||
|       GridStopWatch MatrixTimer; | ||||
|       GridStopWatch SolverTimer; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:   mmp " << b << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:     p " << a << std::endl; | ||||
|  | ||||
|       SolverTimer.Start(); | ||||
|       int k; | ||||
|       for (k=1;k<=MaxIterations;k++){ | ||||
| 	 | ||||
| 	c=cp; | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
| 	MatrixTimer.Start(); | ||||
| 	Linop.HermOpAndNorm(p,mmp,d,qq); | ||||
| 	MatrixTimer.Stop(); | ||||
|  | ||||
| 	LinalgTimer.Start(); | ||||
| 	//	RealD    qqck = norm2(mmp); | ||||
| 	//	ComplexD dck  = innerProduct(p,mmp); | ||||
|        | ||||
| 	a      = c/d; | ||||
| 	b_pred = a*(a*qq-d)/c; | ||||
|  | ||||
| 	cp = axpy_norm(r,-a,mmp,r); | ||||
| 	b = cp/c; | ||||
| 	 | ||||
| 	// Fuse these loops ; should be really easy | ||||
| 	psi= a*p+psi; | ||||
| 	p  = p*b+r; | ||||
| 	   | ||||
| 	LinalgTimer.Stop(); | ||||
| 	std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl; | ||||
| 	 | ||||
| 	// Stopping condition | ||||
| 	if ( cp <= rsq ) {  | ||||
| 	   | ||||
| 	  SolverTimer.Stop(); | ||||
| 	  Linop.HermOpAndNorm(psi,mmp,d,qq); | ||||
| 	  p=mmp-src; | ||||
| 	   | ||||
| 	  RealD mmpnorm = sqrt(norm2(mmp)); | ||||
| 	  RealD psinorm = sqrt(norm2(psi)); | ||||
| 	  RealD srcnorm = sqrt(norm2(src)); | ||||
| 	  RealD resnorm = sqrt(norm2(p)); | ||||
| 	  RealD true_residual = resnorm/srcnorm; | ||||
|  | ||||
| 	  std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k | ||||
| 		   <<" computed residual "<<sqrt(cp/ssq) | ||||
| 		   <<" true residual "    <<true_residual | ||||
| 		   <<" target "<<Tolerance<<std::endl; | ||||
| 	  std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix  "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed(); | ||||
| 	  std::cout<<std::endl; | ||||
| 	   | ||||
| 	  assert(true_residual/Tolerance < 1000.0); | ||||
|  | ||||
| 	  return; | ||||
| 	} | ||||
|       } | ||||
|       std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl; | ||||
|       assert(0); | ||||
|     // Check if guess is really REALLY good :) | ||||
|     if (cp <= rsq) { | ||||
|       return; | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq | ||||
|               << std::endl; | ||||
|  | ||||
|     GridStopWatch LinalgTimer; | ||||
|     GridStopWatch MatrixTimer; | ||||
|     GridStopWatch SolverTimer; | ||||
|  | ||||
|     SolverTimer.Start(); | ||||
|     int k; | ||||
|     for (k = 1; k <= MaxIterations; k++) { | ||||
|       c = cp; | ||||
|  | ||||
|       MatrixTimer.Start(); | ||||
|       Linop.HermOpAndNorm(p, mmp, d, qq); | ||||
|       MatrixTimer.Stop(); | ||||
|  | ||||
|       LinalgTimer.Start(); | ||||
|       //  RealD    qqck = norm2(mmp); | ||||
|       //  ComplexD dck  = innerProduct(p,mmp); | ||||
|  | ||||
|       a = c / d; | ||||
|       b_pred = a * (a * qq - d) / c; | ||||
|  | ||||
|       cp = axpy_norm(r, -a, mmp, r); | ||||
|       b = cp / c; | ||||
|  | ||||
|       // Fuse these loops ; should be really easy | ||||
|       psi = a * p + psi; | ||||
|       p = p * b + r; | ||||
|  | ||||
|       LinalgTimer.Stop(); | ||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
|         SolverTimer.Stop(); | ||||
|         Linop.HermOpAndNorm(psi, mmp, d, qq); | ||||
|         p = mmp - src; | ||||
|  | ||||
|         RealD mmpnorm = sqrt(norm2(mmp)); | ||||
|         RealD psinorm = sqrt(norm2(psi)); | ||||
|         RealD srcnorm = sqrt(norm2(src)); | ||||
|         RealD resnorm = sqrt(norm2(p)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage | ||||
|                   << "ConjugateGradient: Converged on iteration " << k << std::endl; | ||||
|         std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual " << true_residual << " target " | ||||
|                   << Tolerance << std::endl; | ||||
|         std::cout << GridLogMessage << "Time elapsed: Iterations " | ||||
|                   << SolverTimer.Elapsed() << " Matrix  " | ||||
|                   << MatrixTimer.Elapsed() << " Linalg " | ||||
|                   << LinalgTimer.Elapsed(); | ||||
|         std::cout << std::endl; | ||||
|  | ||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0); | ||||
|  | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||
|               << std::endl; | ||||
|     if (ErrorOnNoConverge) assert(0); | ||||
|   } | ||||
| }; | ||||
| } | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										142
									
								
								lib/algorithms/iterative/ConjugateGradientMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								lib/algorithms/iterative/ConjugateGradientMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,142 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Christopher Kelly <ckelly@phys.columbia.edu> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H | ||||
| #define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   //Mixed precision restarted defect correction CG | ||||
|   template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  | ||||
|   class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> { | ||||
|   public:                                                 | ||||
|     RealD   Tolerance; | ||||
|     Integer MaxInnerIterations; | ||||
|     Integer MaxOuterIterations; | ||||
|     GridBase* SinglePrecGrid; //Grid for single-precision fields | ||||
|     RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance | ||||
|     LinearOperatorBase<FieldF> &Linop_f; | ||||
|     LinearOperatorBase<FieldD> &Linop_d; | ||||
|  | ||||
|     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess | ||||
|     LinearFunction<FieldF> *guesser; | ||||
|      | ||||
|     MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) : | ||||
|       Linop_f(_Linop_f), Linop_d(_Linop_d), | ||||
|       Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||
|       OuterLoopNormMult(100.), guesser(NULL){ }; | ||||
|  | ||||
|     void useGuesser(LinearFunction<FieldF> &g){ | ||||
|       guesser = &g; | ||||
|     } | ||||
|    | ||||
|     void operator() (const FieldD &src_d_in, FieldD &sol_d){ | ||||
|       GridStopWatch TotalTimer; | ||||
|       TotalTimer.Start(); | ||||
|      | ||||
|       int cb = src_d_in.checkerboard; | ||||
|       sol_d.checkerboard = cb; | ||||
|      | ||||
|       RealD src_norm = norm2(src_d_in); | ||||
|       RealD stop = src_norm * Tolerance*Tolerance; | ||||
|  | ||||
|       GridBase* DoublePrecGrid = src_d_in._grid; | ||||
|       FieldD tmp_d(DoublePrecGrid); | ||||
|       tmp_d.checkerboard = cb; | ||||
|      | ||||
|       FieldD tmp2_d(DoublePrecGrid); | ||||
|       tmp2_d.checkerboard = cb; | ||||
|      | ||||
|       FieldD src_d(DoublePrecGrid); | ||||
|       src_d = src_d_in; //source for next inner iteration, computed from residual during operation | ||||
|      | ||||
|       RealD inner_tol = Tolerance; | ||||
|      | ||||
|       FieldF src_f(SinglePrecGrid); | ||||
|       src_f.checkerboard = cb; | ||||
|      | ||||
|       FieldF sol_f(SinglePrecGrid); | ||||
|       sol_f.checkerboard = cb; | ||||
|      | ||||
|       ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations); | ||||
|       CG_f.ErrorOnNoConverge = false; | ||||
|  | ||||
|       GridStopWatch InnerCGtimer; | ||||
|  | ||||
|       GridStopWatch PrecChangeTimer; | ||||
|      | ||||
|       for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ | ||||
| 	//Compute double precision rsd and also new RHS vector. | ||||
| 	Linop_d.HermOp(sol_d, tmp_d); | ||||
| 	RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector | ||||
|        | ||||
| 	std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl; | ||||
|  | ||||
| 	if(norm < OuterLoopNormMult * stop){ | ||||
| 	  std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl; | ||||
| 	  break; | ||||
| 	} | ||||
| 	while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ?? | ||||
|  | ||||
| 	PrecChangeTimer.Start(); | ||||
| 	precisionChange(src_f, src_d); | ||||
| 	PrecChangeTimer.Stop(); | ||||
|        | ||||
| 	zeroit(sol_f); | ||||
|  | ||||
| 	//Optionally improve inner solver guess (eg using known eigenvectors) | ||||
| 	if(guesser != NULL) | ||||
| 	  (*guesser)(src_f, sol_f); | ||||
|  | ||||
| 	//Inner CG | ||||
| 	CG_f.Tolerance = inner_tol; | ||||
| 	InnerCGtimer.Start(); | ||||
| 	CG_f(Linop_f, src_f, sol_f); | ||||
| 	InnerCGtimer.Stop(); | ||||
|        | ||||
| 	//Convert sol back to double and add to double prec solution | ||||
| 	PrecChangeTimer.Start(); | ||||
| 	precisionChange(tmp_d, sol_f); | ||||
| 	PrecChangeTimer.Stop(); | ||||
|        | ||||
| 	axpy(sol_d, 1.0, tmp_d, sol_d); | ||||
|       } | ||||
|      | ||||
|       //Final trial CG | ||||
|       std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl; | ||||
|      | ||||
|       ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); | ||||
|       CG_d(Linop_d, src_d_in, sol_d); | ||||
|  | ||||
|       TotalTimer.Stop(); | ||||
|       std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl; | ||||
|     } | ||||
|   }; | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
| @@ -243,8 +243,6 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector | ||||
|       if ( (!converged[s]) ){ | ||||
| 	 | ||||
| 	RealD css  = c * z[s][iz]* z[s][iz]; | ||||
|     if((k%100)==0 && (s==0) ) | ||||
| 	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" css " <<css <<std::endl; | ||||
| 	 | ||||
| 	if(css<rsq[s]){ | ||||
| 	  if ( ! converged[s] ) | ||||
|   | ||||
| @@ -130,8 +130,8 @@ DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, | ||||
|  | ||||
| } | ||||
|  | ||||
| #include <algorithms/iterative/Householder.h> | ||||
| #include <algorithms/iterative/Francis.h> | ||||
| #include "Householder.h" | ||||
| #include "Francis.h" | ||||
|  | ||||
| #endif | ||||
|  | ||||
|   | ||||
| @@ -33,8 +33,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #ifdef USE_LAPACK | ||||
| #include <lapacke.h> | ||||
| #endif | ||||
| #include <algorithms/iterative/DenseMatrix.h> | ||||
| #include <algorithms/iterative/EigenSort.h> | ||||
| #include "DenseMatrix.h" | ||||
| #include "EigenSort.h" | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_CARTESIAN_BASE_H | ||||
| #define GRID_CARTESIAN_BASE_H | ||||
|  | ||||
| #include <Grid.h> | ||||
|  | ||||
| namespace Grid{ | ||||
|  | ||||
| @@ -82,11 +81,8 @@ public: | ||||
|     virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; | ||||
|     virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; | ||||
|     virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; | ||||
|     int  CheckerBoardFromOindex (int Oindex){ | ||||
|       std::vector<int> ocoor; | ||||
|       oCoorFromOindex(ocoor,Oindex);  | ||||
|       return CheckerBoard(ocoor); | ||||
|     } | ||||
|     virtual int CheckerBoardFromOindex (int Oindex)=0; | ||||
|     virtual int CheckerBoardFromOindexTable (int Oindex)=0; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Local layout calculations | ||||
| @@ -107,6 +103,12 @@ public: | ||||
|         for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||
|         return idx; | ||||
|     } | ||||
|     virtual int iIndex(std::vector<int> &lcoor) | ||||
|     { | ||||
|         int idx=0; | ||||
|         for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); | ||||
|         return idx; | ||||
|     } | ||||
|     inline int oIndexReduced(std::vector<int> &ocoor) | ||||
|     { | ||||
|       int idx=0;  | ||||
| @@ -123,12 +125,6 @@ public: | ||||
|     ////////////////////////////////////////////////////////// | ||||
|     // SIMD lane addressing | ||||
|     ////////////////////////////////////////////////////////// | ||||
|     inline int iIndex(std::vector<int> &lcoor) | ||||
|     { | ||||
|         int idx=0; | ||||
|         for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); | ||||
|         return idx; | ||||
|     } | ||||
|     inline void iCoorFromIindex(std::vector<int> &coor,int lane) | ||||
|     { | ||||
|       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); | ||||
| @@ -220,7 +216,7 @@ public: | ||||
|       } | ||||
|  | ||||
|       i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim | ||||
|       o_idx= oIndex(lcoor);// this implies divide by 2 on checkerdim | ||||
|       o_idx= oIndex(lcoor);  // this implies divide by 2 on checkerdim | ||||
|     } | ||||
|  | ||||
|     void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor) | ||||
|   | ||||
| @@ -39,6 +39,13 @@ class GridCartesian: public GridBase { | ||||
|  | ||||
| public: | ||||
|  | ||||
|     virtual int  CheckerBoardFromOindexTable (int Oindex) { | ||||
|       return 0; | ||||
|     } | ||||
|     virtual int  CheckerBoardFromOindex (int Oindex) | ||||
|     { | ||||
|       return 0; | ||||
|     } | ||||
|     virtual int CheckerBoarded(int dim){ | ||||
|       return 0; | ||||
|     } | ||||
|   | ||||
| @@ -32,23 +32,18 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|     static const int CbRed  =0; | ||||
|     static const int CbBlack=1; | ||||
|     static const int Even   =CbRed; | ||||
|     static const int Odd    =CbBlack; | ||||
|  | ||||
|     // Perhaps these are misplaced and  | ||||
|     // should be in sparse matrix. | ||||
|     // Also should make these a named enum type | ||||
|     static const int DaggerNo=0; | ||||
|     static const int DaggerYes=1; | ||||
|  | ||||
|   static const int CbRed  =0; | ||||
|   static const int CbBlack=1; | ||||
|   static const int Even   =CbRed; | ||||
|   static const int Odd    =CbBlack; | ||||
|      | ||||
| // Specialise this for red black grids storing half the data like a chess board. | ||||
| class GridRedBlackCartesian : public GridBase | ||||
| { | ||||
| public: | ||||
|     std::vector<int> _checker_dim_mask; | ||||
|     int              _checker_dim; | ||||
|     std::vector<int> _checker_board; | ||||
|  | ||||
|     virtual int CheckerBoarded(int dim){ | ||||
|       if( dim==_checker_dim) return 1; | ||||
| @@ -78,12 +73,20 @@ public: | ||||
|       // or by looping over x,y,z and multiply rather than computing checkerboard. | ||||
| 	   | ||||
|       if ( (source_cb+ocb)&1 ) { | ||||
|  | ||||
| 	return (shift)/2; | ||||
|       } else { | ||||
| 	return (shift+1)/2; | ||||
|       } | ||||
|     } | ||||
|     virtual int  CheckerBoardFromOindexTable (int Oindex) { | ||||
|       return _checker_board[Oindex]; | ||||
|     } | ||||
|     virtual int  CheckerBoardFromOindex (int Oindex) | ||||
|     { | ||||
|       std::vector<int> ocoor; | ||||
|       oCoorFromOindex(ocoor,Oindex); | ||||
|       return CheckerBoard(ocoor); | ||||
|     } | ||||
|     virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){ | ||||
|  | ||||
|       if(dim != _checker_dim) return shift; | ||||
| @@ -170,9 +173,15 @@ public: | ||||
| 	// Use a reduced simd grid | ||||
| 	_simd_layout[d] = simd_layout[d]; | ||||
| 	_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; | ||||
| 	assert(_rdimensions[d]>0); | ||||
|  | ||||
| 	// all elements of a simd vector must have same checkerboard. | ||||
| 	if ( simd_layout[d]>1 ) assert((_rdimensions[d]&0x1)==0);  | ||||
| 	// If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||
| 	if ( _simd_layout[d]>1 ) { | ||||
| 	  if ( checker_dim_mask[d] ) {  | ||||
| 	    assert( (_rdimensions[d]&0x1) == 0 ); | ||||
| 	  } | ||||
| 	} | ||||
|  | ||||
| 	_osites *= _rdimensions[d]; | ||||
| 	_isites *= _simd_layout[d]; | ||||
| @@ -185,6 +194,8 @@ public: | ||||
| 	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||
| 	  _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||
| 	} | ||||
|  | ||||
|  | ||||
|       } | ||||
|              | ||||
|       //////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -205,6 +216,18 @@ public: | ||||
| 	_slice_nblock[d]=nblock; | ||||
| 	block = block*_rdimensions[d]; | ||||
|       } | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // Create a checkerboard lookup table | ||||
|       //////////////////////////////////////////////// | ||||
|       int rvol = 1; | ||||
|       for(int d=0;d<_ndimension;d++){ | ||||
| 	rvol=rvol * _rdimensions[d]; | ||||
|       } | ||||
|       _checker_board.resize(rvol); | ||||
|       for(int osite=0;osite<_osites;osite++){ | ||||
| 	_checker_board[osite] = CheckerBoardFromOindex (osite); | ||||
|       } | ||||
|        | ||||
|     }; | ||||
| protected: | ||||
| @@ -218,9 +241,21 @@ protected: | ||||
| 	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||
| 	} | ||||
|       } | ||||
|         return idx; | ||||
|       return idx; | ||||
|     }; | ||||
|          | ||||
|     virtual int iIndex(std::vector<int> &lcoor) | ||||
|     { | ||||
|         int idx=0; | ||||
|         for(int d=0;d<_ndimension;d++) { | ||||
| 	  if( d==_checker_dim ) { | ||||
| 	    idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); | ||||
| 	  } else {  | ||||
| 	    idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); | ||||
| 	  } | ||||
| 	} | ||||
|         return idx; | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } | ||||
|   | ||||
							
								
								
									
										0
									
								
								lib/communicator/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/communicator/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -53,7 +53,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
|   _Nprocessors=1; | ||||
|   _processors = processors; | ||||
|   _processor_coor.resize(_ndimension); | ||||
|   std::cout << processors << std::endl; | ||||
|    | ||||
|   MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator); | ||||
|   MPI_Comm_rank(communicator,&_processor); | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
|  | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| @@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator< | ||||
|    | ||||
|   int e1=rhs._grid->_slice_nblock[dimension]; | ||||
|   int e2=rhs._grid->_slice_block[dimension]; | ||||
|  | ||||
|   int stride=rhs._grid->_slice_stride[dimension]; | ||||
|   if ( cbmask == 0x3 ) {  | ||||
| PARALLEL_NESTED_LOOP2 | ||||
| @@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2 | ||||
|     } | ||||
|   } else {  | ||||
|      int bo=0; | ||||
|      std::vector<std::pair<int,int> > table; | ||||
|      for(int n=0;n<e1;n++){ | ||||
|        for(int b=0;b<e2;b++){ | ||||
| 	 int o  = n*stride; | ||||
| 	 int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup | ||||
| 	 int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b); | ||||
| 	 if ( ocb &cbmask ) { | ||||
| 	   buffer[off+bo++]=compress(rhs._odata[so+o+b]); | ||||
| 	   table.push_back(std::pair<int,int> (bo++,o+b)); | ||||
| 	 } | ||||
|        } | ||||
|      } | ||||
| PARALLEL_FOR_LOOP      | ||||
|      for(int i=0;i<table.size();i++){ | ||||
|        buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]); | ||||
|      } | ||||
|   } | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										412
									
								
								lib/fftw/fftw3.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										412
									
								
								lib/fftw/fftw3.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,412 @@ | ||||
| /* | ||||
|  * Copyright (c) 2003, 2007-14 Matteo Frigo | ||||
|  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | ||||
|  * | ||||
|  * The following statement of license applies *only* to this header file, | ||||
|  * and *not* to the other files distributed with FFTW or derived therefrom: | ||||
|  *  | ||||
|  * Redistribution and use in source and binary forms, with or without | ||||
|  * modification, are permitted provided that the following conditions | ||||
|  * are met: | ||||
|  * | ||||
|  * 1. Redistributions of source code must retain the above copyright | ||||
|  *    notice, this list of conditions and the following disclaimer. | ||||
|  * | ||||
|  * 2. Redistributions in binary form must reproduce the above copyright | ||||
|  *    notice, this list of conditions and the following disclaimer in the | ||||
|  *    documentation and/or other materials provided with the distribution. | ||||
|  * | ||||
|  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | ||||
|  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||||
|  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
|  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | ||||
|  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
|  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | ||||
|  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||
|  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||||
|  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||||
|  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||||
|  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
|  */ | ||||
|  | ||||
| /***************************** NOTE TO USERS ********************************* | ||||
|  * | ||||
|  *                 THIS IS A HEADER FILE, NOT A MANUAL | ||||
|  * | ||||
|  *    If you want to know how to use FFTW, please read the manual, | ||||
|  *    online at http://www.fftw.org/doc/ and also included with FFTW. | ||||
|  *    For a quick start, see the manual's tutorial section. | ||||
|  * | ||||
|  *   (Reading header files to learn how to use a library is a habit | ||||
|  *    stemming from code lacking a proper manual.  Arguably, it's a | ||||
|  *    *bad* habit in most cases, because header files can contain | ||||
|  *    interfaces that are not part of the public, stable API.) | ||||
|  * | ||||
|  ****************************************************************************/ | ||||
|  | ||||
| #ifndef FFTW3_H | ||||
| #define FFTW3_H | ||||
|  | ||||
| #include <stdio.h> | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| extern "C" | ||||
| { | ||||
| #endif /* __cplusplus */ | ||||
|  | ||||
| /* If <complex.h> is included, use the C99 complex type.  Otherwise | ||||
|    define a type bit-compatible with C99 complex */ | ||||
| #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) | ||||
| #  define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C | ||||
| #else | ||||
| #  define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2] | ||||
| #endif | ||||
|  | ||||
| #define FFTW_CONCAT(prefix, name) prefix ## name | ||||
| #define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name) | ||||
| #define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name) | ||||
| #define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name) | ||||
| #define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name) | ||||
|  | ||||
| /* IMPORTANT: for Windows compilers, you should add a line | ||||
|         #define FFTW_DLL | ||||
|    here and in kernel/ifftw.h if you are compiling/using FFTW as a | ||||
|    DLL, in order to do the proper importing/exporting, or | ||||
|    alternatively compile with -DFFTW_DLL or the equivalent | ||||
|    command-line flag.  This is not necessary under MinGW/Cygwin, where | ||||
|    libtool does the imports/exports automatically. */ | ||||
| #if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__)) | ||||
|    /* annoying Windows syntax for shared-library declarations */ | ||||
| #  if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */ | ||||
| #    define FFTW_EXTERN extern __declspec(dllexport)  | ||||
| #  else /* user is calling FFTW; import symbol */ | ||||
| #    define FFTW_EXTERN extern __declspec(dllimport)  | ||||
| #  endif | ||||
| #else | ||||
| #  define FFTW_EXTERN extern | ||||
| #endif | ||||
|  | ||||
| enum fftw_r2r_kind_do_not_use_me { | ||||
|      FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2, | ||||
|      FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6, | ||||
|      FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10 | ||||
| }; | ||||
|  | ||||
| struct fftw_iodim_do_not_use_me { | ||||
|      int n;                     /* dimension size */ | ||||
|      int is;			/* input stride */ | ||||
|      int os;			/* output stride */ | ||||
| }; | ||||
|  | ||||
| #include <stddef.h> /* for ptrdiff_t */ | ||||
| struct fftw_iodim64_do_not_use_me { | ||||
|      ptrdiff_t n;                     /* dimension size */ | ||||
|      ptrdiff_t is;			/* input stride */ | ||||
|      ptrdiff_t os;			/* output stride */ | ||||
| }; | ||||
|  | ||||
| typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *); | ||||
| typedef int (*fftw_read_char_func_do_not_use_me)(void *); | ||||
|  | ||||
| /* | ||||
|   huge second-order macro that defines prototypes for all API | ||||
|   functions.  We expand this macro for each supported precision | ||||
|   | ||||
|   X: name-mangling macro | ||||
|   R: real data type | ||||
|   C: complex data type | ||||
| */ | ||||
|  | ||||
| #define FFTW_DEFINE_API(X, R, C)					   \ | ||||
| 									   \ | ||||
| FFTW_DEFINE_COMPLEX(R, C);						   \ | ||||
| 									   \ | ||||
| typedef struct X(plan_s) *X(plan);					   \ | ||||
| 									   \ | ||||
| typedef struct fftw_iodim_do_not_use_me X(iodim);			   \ | ||||
| typedef struct fftw_iodim64_do_not_use_me X(iodim64);			   \ | ||||
| 									   \ | ||||
| typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind);			   \ | ||||
| 									   \ | ||||
| typedef fftw_write_char_func_do_not_use_me X(write_char_func);		   \ | ||||
| typedef fftw_read_char_func_do_not_use_me X(read_char_func);		   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(execute)(const X(plan) p);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n,			   \ | ||||
| 		    C *in, C *out, int sign, unsigned flags);		   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign,	   \ | ||||
| 		       unsigned flags);					   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1,			   \ | ||||
| 		       C *in, C *out, int sign, unsigned flags);	   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2,		   \ | ||||
| 		       C *in, C *out, int sign, unsigned flags);	   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n,		   \ | ||||
|                          int howmany,					   \ | ||||
|                          C *in, const int *inembed,			   \ | ||||
|                          int istride, int idist,			   \ | ||||
|                          C *out, const int *onembed,			   \ | ||||
|                          int ostride, int odist,			   \ | ||||
|                          int sign, unsigned flags);			   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims,	   \ | ||||
| 			 int howmany_rank,				   \ | ||||
| 			 const X(iodim) *howmany_dims,			   \ | ||||
| 			 C *in, C *out,					   \ | ||||
| 			 int sign, unsigned flags);			   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \ | ||||
| 			 int howmany_rank,				   \ | ||||
| 			 const X(iodim) *howmany_dims,			   \ | ||||
| 			 R *ri, R *ii, R *ro, R *io,			   \ | ||||
| 			 unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank,			   \ | ||||
|                          const X(iodim64) *dims,			   \ | ||||
| 			 int howmany_rank,				   \ | ||||
| 			 const X(iodim64) *howmany_dims,		   \ | ||||
| 			 C *in, C *out,					   \ | ||||
| 			 int sign, unsigned flags);			   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank,			   \ | ||||
|                          const X(iodim64) *dims,			   \ | ||||
| 			 int howmany_rank,				   \ | ||||
| 			 const X(iodim64) *howmany_dims,		   \ | ||||
| 			 R *ri, R *ii, R *ro, R *io,			   \ | ||||
| 			 unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out);	   \ | ||||
| FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii,	   \ | ||||
|                                       R *ro, R *io);			   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n,	   \ | ||||
|                              int howmany,				   \ | ||||
|                              R *in, const int *inembed,			   \ | ||||
|                              int istride, int idist,			   \ | ||||
|                              C *out, const int *onembed,		   \ | ||||
|                              int ostride, int odist,			   \ | ||||
|                              unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n,		   \ | ||||
|                         R *in, C *out, unsigned flags);			   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1,			   \ | ||||
| 			   R *in, C *out, unsigned flags);		   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1,			   \ | ||||
| 			   int n2,					   \ | ||||
| 			   R *in, C *out, unsigned flags);		   \ | ||||
| 									   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n,	   \ | ||||
| 			     int howmany,				   \ | ||||
| 			     C *in, const int *inembed,			   \ | ||||
| 			     int istride, int idist,			   \ | ||||
| 			     R *out, const int *onembed,		   \ | ||||
| 			     int ostride, int odist,			   \ | ||||
| 			     unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n,		   \ | ||||
|                         C *in, R *out, unsigned flags);			   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1,			   \ | ||||
| 			   C *in, R *out, unsigned flags);		   \ | ||||
| FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1,			   \ | ||||
| 			   int n2,					   \ | ||||
| 			   C *in, R *out, unsigned flags);		   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims,   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim) *howmany_dims,		   \ | ||||
| 			     R *in, C *out,				   \ | ||||
| 			     unsigned flags);				   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims,   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim) *howmany_dims,		   \ | ||||
| 			     C *in, R *out,				   \ | ||||
| 			     unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)(				   \ | ||||
|                              int rank, const X(iodim) *dims,		   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim) *howmany_dims,		   \ | ||||
| 			     R *in, R *ro, R *io,			   \ | ||||
| 			     unsigned flags);				   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)(				   \ | ||||
|                              int rank, const X(iodim) *dims,		   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim) *howmany_dims,		   \ | ||||
| 			     R *ri, R *ii, R *out,			   \ | ||||
| 			     unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank,			   \ | ||||
|                              const X(iodim64) *dims,			   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim64) *howmany_dims,		   \ | ||||
| 			     R *in, C *out,				   \ | ||||
| 			     unsigned flags);				   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank,			   \ | ||||
|                              const X(iodim64) *dims,			   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim64) *howmany_dims,		   \ | ||||
| 			     C *in, R *out,				   \ | ||||
| 			     unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)(			   \ | ||||
|                              int rank, const X(iodim64) *dims,		   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim64) *howmany_dims,		   \ | ||||
| 			     R *in, R *ro, R *io,			   \ | ||||
| 			     unsigned flags);				   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)(			   \ | ||||
|                              int rank, const X(iodim64) *dims,		   \ | ||||
| 			     int howmany_rank,				   \ | ||||
| 			     const X(iodim64) *howmany_dims,		   \ | ||||
| 			     R *ri, R *ii, R *out,			   \ | ||||
| 			     unsigned flags);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out);	   \ | ||||
| FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out);	   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p,		   \ | ||||
|                                           R *in, R *ro, R *io);		   \ | ||||
| FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p,		   \ | ||||
|                                           R *ri, R *ii, R *out);	   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n,		   \ | ||||
|                          int howmany,					   \ | ||||
|                          R *in, const int *inembed,			   \ | ||||
|                          int istride, int idist,			   \ | ||||
|                          R *out, const int *onembed,			   \ | ||||
|                          int ostride, int odist,			   \ | ||||
|                          const X(r2r_kind) *kind, unsigned flags);	   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out,	   \ | ||||
|                     const X(r2r_kind) *kind, unsigned flags);		   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out,		   \ | ||||
|                        X(r2r_kind) kind, unsigned flags);		   \ | ||||
| FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out,	   \ | ||||
|                        X(r2r_kind) kind0, X(r2r_kind) kind1,		   \ | ||||
|                        unsigned flags);					   \ | ||||
| FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2,		   \ | ||||
|                        R *in, R *out, X(r2r_kind) kind0,		   \ | ||||
|                        X(r2r_kind) kind1, X(r2r_kind) kind2,		   \ | ||||
|                        unsigned flags);					   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims,	   \ | ||||
|                          int howmany_rank,				   \ | ||||
|                          const X(iodim) *howmany_dims,			   \ | ||||
|                          R *in, R *out,					   \ | ||||
|                          const X(r2r_kind) *kind, unsigned flags);	   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims,   \ | ||||
|                          int howmany_rank,				   \ | ||||
|                          const X(iodim64) *howmany_dims,		   \ | ||||
|                          R *in, R *out,					   \ | ||||
|                          const X(r2r_kind) *kind, unsigned flags);	   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out);	   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(destroy_plan)(X(plan) p);				   \ | ||||
| FFTW_EXTERN void X(forget_wisdom)(void);				   \ | ||||
| FFTW_EXTERN void X(cleanup)(void);					   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(set_timelimit)(double t);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(plan_with_nthreads)(int nthreads);			   \ | ||||
| FFTW_EXTERN int X(init_threads)(void);					   \ | ||||
| FFTW_EXTERN void X(cleanup_threads)(void);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename);	   \ | ||||
| FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file);		   \ | ||||
| FFTW_EXTERN char *X(export_wisdom_to_string)(void);			   \ | ||||
| FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char,   	   \ | ||||
|                                   void *data);				   \ | ||||
| FFTW_EXTERN int X(import_system_wisdom)(void);				   \ | ||||
| FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename);	   \ | ||||
| FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file);		   \ | ||||
| FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string);	   \ | ||||
| FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file);	   \ | ||||
| FFTW_EXTERN void X(print_plan)(const X(plan) p);			   \ | ||||
| FFTW_EXTERN char *X(sprint_plan)(const X(plan) p);			   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void *X(malloc)(size_t n);					   \ | ||||
| FFTW_EXTERN R *X(alloc_real)(size_t n);					   \ | ||||
| FFTW_EXTERN C *X(alloc_complex)(size_t n);				   \ | ||||
| FFTW_EXTERN void X(free)(void *p);					   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN void X(flops)(const X(plan) p,				   \ | ||||
|                           double *add, double *mul, double *fmas);	   \ | ||||
| FFTW_EXTERN double X(estimate_cost)(const X(plan) p);			   \ | ||||
| FFTW_EXTERN double X(cost)(const X(plan) p);				   \ | ||||
| 									   \ | ||||
| FFTW_EXTERN int X(alignment_of)(R *p);                                     \ | ||||
| FFTW_EXTERN const char X(version)[];                                       \ | ||||
| FFTW_EXTERN const char X(cc)[];						   \ | ||||
| FFTW_EXTERN const char X(codelet_optim)[]; | ||||
|  | ||||
|  | ||||
| /* end of FFTW_DEFINE_API macro */ | ||||
|  | ||||
| FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex) | ||||
| FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex) | ||||
| FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) | ||||
|  | ||||
| /* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64 | ||||
|    for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */ | ||||
| #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \ | ||||
|  && !(defined(__ICC) || defined(__INTEL_COMPILER)) \ | ||||
|  && (defined(__i386__) || defined(__x86_64__) || defined(__ia64__)) | ||||
| #  if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) | ||||
| /* note: __float128 is a typedef, which is not supported with the _Complex | ||||
|          keyword in gcc, so instead we use this ugly __attribute__ version. | ||||
|          However, we can't simply pass the __attribute__ version to | ||||
|          FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer | ||||
|          types.  Hence redefining FFTW_DEFINE_COMPLEX.  Ugh. */ | ||||
| #    undef FFTW_DEFINE_COMPLEX | ||||
| #    define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C | ||||
| #  endif | ||||
| FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex) | ||||
| #endif | ||||
|  | ||||
| #define FFTW_FORWARD (-1) | ||||
| #define FFTW_BACKWARD (+1) | ||||
|  | ||||
| #define FFTW_NO_TIMELIMIT (-1.0) | ||||
|  | ||||
| /* documented flags */ | ||||
| #define FFTW_MEASURE (0U) | ||||
| #define FFTW_DESTROY_INPUT (1U << 0) | ||||
| #define FFTW_UNALIGNED (1U << 1) | ||||
| #define FFTW_CONSERVE_MEMORY (1U << 2) | ||||
| #define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */ | ||||
| #define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */ | ||||
| #define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */ | ||||
| #define FFTW_ESTIMATE (1U << 6) | ||||
| #define FFTW_WISDOM_ONLY (1U << 21) | ||||
|  | ||||
| /* undocumented beyond-guru flags */ | ||||
| #define FFTW_ESTIMATE_PATIENT (1U << 7) | ||||
| #define FFTW_BELIEVE_PCOST (1U << 8) | ||||
| #define FFTW_NO_DFT_R2HC (1U << 9) | ||||
| #define FFTW_NO_NONTHREADED (1U << 10) | ||||
| #define FFTW_NO_BUFFERING (1U << 11) | ||||
| #define FFTW_NO_INDIRECT_OP (1U << 12) | ||||
| #define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */ | ||||
| #define FFTW_NO_RANK_SPLITS (1U << 14) | ||||
| #define FFTW_NO_VRANK_SPLITS (1U << 15) | ||||
| #define FFTW_NO_VRECURSE (1U << 16) | ||||
| #define FFTW_NO_SIMD (1U << 17) | ||||
| #define FFTW_NO_SLOW (1U << 18) | ||||
| #define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19) | ||||
| #define FFTW_ALLOW_PRUNING (1U << 20) | ||||
|  | ||||
| #ifdef __cplusplus | ||||
| }  /* extern "C" */ | ||||
| #endif /* __cplusplus */ | ||||
|  | ||||
| #endif /* FFTW3_H */ | ||||
| @@ -1,73 +1,74 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/lattice/Lattice_ET.h | ||||
| Source file: ./lib/lattice/Lattice_ET.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_LATTICE_ET_H | ||||
| #define GRID_LATTICE_ET_H | ||||
|  | ||||
| #include <iostream> | ||||
| #include <vector> | ||||
| #include <tuple> | ||||
| #include <typeinfo> | ||||
| #include <vector> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   //////////////////////////////////////////////////// | ||||
|   // Predicated where support | ||||
|   //////////////////////////////////////////////////// | ||||
|   template<class iobj,class vobj,class robj> | ||||
|     inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) { | ||||
| //////////////////////////////////////////////////// | ||||
| // Predicated where support | ||||
| //////////////////////////////////////////////////// | ||||
| template <class iobj, class vobj, class robj> | ||||
| inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, | ||||
|                             const robj &iffalse) { | ||||
|   typename std::remove_const<vobj>::type ret; | ||||
|  | ||||
|     typename std::remove_const<vobj>::type ret; | ||||
|   typedef typename vobj::scalar_object scalar_object; | ||||
|   typedef typename vobj::scalar_type scalar_type; | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|  | ||||
|     typedef typename vobj::scalar_object scalar_object; | ||||
|     typedef typename vobj::scalar_type scalar_type; | ||||
|     typedef typename vobj::vector_type vector_type; | ||||
|   const int Nsimd = vobj::vector_type::Nsimd(); | ||||
|   const int words = sizeof(vobj) / sizeof(vector_type); | ||||
|  | ||||
|     const int Nsimd = vobj::vector_type::Nsimd(); | ||||
|     const int words = sizeof(vobj)/sizeof(vector_type); | ||||
|   std::vector<Integer> mask(Nsimd); | ||||
|   std::vector<scalar_object> truevals(Nsimd); | ||||
|   std::vector<scalar_object> falsevals(Nsimd); | ||||
|  | ||||
|     std::vector<Integer> mask(Nsimd); | ||||
|     std::vector<scalar_object> truevals (Nsimd); | ||||
|     std::vector<scalar_object> falsevals(Nsimd); | ||||
|   extract(iftrue, truevals); | ||||
|   extract(iffalse, falsevals); | ||||
|   extract<vInteger, Integer>(TensorRemove(predicate), mask); | ||||
|  | ||||
|     extract(iftrue   ,truevals); | ||||
|     extract(iffalse  ,falsevals); | ||||
|     extract<vInteger,Integer>(TensorRemove(predicate),mask); | ||||
|  | ||||
|     for(int s=0;s<Nsimd;s++){ | ||||
|       if (mask[s]) falsevals[s]=truevals[s]; | ||||
|     } | ||||
|  | ||||
|     merge(ret,falsevals); | ||||
|     return ret; | ||||
|   for (int s = 0; s < Nsimd; s++) { | ||||
|     if (mask[s]) falsevals[s] = truevals[s]; | ||||
|   } | ||||
|  | ||||
|   merge(ret, falsevals); | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // recursive evaluation of expressions; Could | ||||
| // switch to generic approach with variadics, a la | ||||
| @@ -75,303 +76,351 @@ namespace Grid { | ||||
| // from tuple is hideous; C++14 introduces std::make_index_sequence for this | ||||
| //////////////////////////////////////////// | ||||
|  | ||||
| // leaf eval of lattice ; should enable if protect using traits | ||||
|  | ||||
| //leaf eval of lattice ; should enable if protect using traits | ||||
| template <typename T> | ||||
| using is_lattice = std::is_base_of<LatticeBase, T>; | ||||
|  | ||||
| template <typename T> using is_lattice      = std::is_base_of<LatticeBase,T >; | ||||
| template <typename T> | ||||
| using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>; | ||||
|  | ||||
| template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >; | ||||
|  | ||||
| //Specialization of getVectorType for lattices | ||||
| template<typename T> | ||||
| struct getVectorType<Lattice<T> >{ | ||||
|   typedef typename Lattice<T>::vector_object type; | ||||
| }; | ||||
|   | ||||
| template<class sobj> | ||||
| inline sobj eval(const unsigned int ss, const sobj &arg) | ||||
| { | ||||
|   return arg; | ||||
| } | ||||
| template<class lobj> | ||||
| inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) | ||||
| { | ||||
|     return arg._odata[ss]; | ||||
| template <class lobj> | ||||
| inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) { | ||||
|   return arg._odata[ss]; | ||||
| } | ||||
|  | ||||
| // handle nodes in syntax tree | ||||
| template <typename Op, typename T1> | ||||
| auto inline eval(const unsigned int ss, const LatticeUnaryExpression<Op,T1 > &expr) // eval one operand | ||||
|   -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)))) | ||||
| { | ||||
|   return expr.first.func(eval(ss,std::get<0>(expr.second))); | ||||
| auto inline eval( | ||||
|     const unsigned int ss, | ||||
|     const LatticeUnaryExpression<Op, T1> &expr)  // eval one operand | ||||
|     -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) { | ||||
|   return expr.first.func(eval(ss, std::get<0>(expr.second))); | ||||
| } | ||||
|  | ||||
| template <typename Op, typename T1, typename T2> | ||||
| auto inline eval(const unsigned int ss, const LatticeBinaryExpression<Op,T1,T2> &expr) // eval two operands | ||||
|   -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)))) | ||||
| { | ||||
|   return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second))); | ||||
| auto inline eval( | ||||
|     const unsigned int ss, | ||||
|     const LatticeBinaryExpression<Op, T1, T2> &expr)  // eval two operands | ||||
|     -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), | ||||
|                                 eval(ss, std::get<1>(expr.second)))) { | ||||
|   return expr.first.func(eval(ss, std::get<0>(expr.second)), | ||||
|                          eval(ss, std::get<1>(expr.second))); | ||||
| } | ||||
|  | ||||
| template <typename Op, typename T1, typename T2, typename T3> | ||||
| auto inline eval(const unsigned int ss, const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) // eval three operands | ||||
|   -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)))) | ||||
| { | ||||
|   return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)) ); | ||||
| auto inline eval(const unsigned int ss, | ||||
|                  const LatticeTrinaryExpression<Op, T1, T2, T3> | ||||
|                      &expr)  // eval three operands | ||||
|     -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), | ||||
|                                 eval(ss, std::get<1>(expr.second)), | ||||
|                                 eval(ss, std::get<2>(expr.second)))) { | ||||
|   return expr.first.func(eval(ss, std::get<0>(expr.second)), | ||||
|                          eval(ss, std::get<1>(expr.second)), | ||||
|                          eval(ss, std::get<2>(expr.second))); | ||||
| } | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Obtain the grid from an expression, ensuring conformable. This must follow a tree recursion | ||||
| // Obtain the grid from an expression, ensuring conformable. This must follow a | ||||
| // tree recursion | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr > | ||||
| inline void GridFromExpression(GridBase * &grid,const T1& lat)   // Lattice leaf | ||||
| { | ||||
|   if ( grid ) { | ||||
|     conformable(grid,lat._grid); | ||||
|   }  | ||||
|   grid=lat._grid; | ||||
| } | ||||
| template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr > | ||||
| inline void GridFromExpression(GridBase * &grid,const T1& notlat)   // non-lattice leaf | ||||
| template <class T1, | ||||
|           typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr> | ||||
| inline void GridFromExpression(GridBase *&grid, const T1 &lat)  // Lattice leaf | ||||
| { | ||||
|   if (grid) { | ||||
|     conformable(grid, lat._grid); | ||||
|   } | ||||
|   grid = lat._grid; | ||||
| } | ||||
| template <class T1, | ||||
|           typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr> | ||||
| inline void GridFromExpression(GridBase *&grid, | ||||
|                                const T1 ¬lat)  // non-lattice leaf | ||||
| {} | ||||
| template <typename Op, typename T1> | ||||
| inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr) | ||||
| { | ||||
|   GridFromExpression(grid,std::get<0>(expr.second));// recurse  | ||||
| inline void GridFromExpression(GridBase *&grid, | ||||
|                                const LatticeUnaryExpression<Op, T1> &expr) { | ||||
|   GridFromExpression(grid, std::get<0>(expr.second));  // recurse | ||||
| } | ||||
|  | ||||
| template <typename Op, typename T1, typename T2> | ||||
| inline void GridFromExpression(GridBase * &grid,const LatticeBinaryExpression<Op,T1,T2> &expr)  | ||||
| { | ||||
|   GridFromExpression(grid,std::get<0>(expr.second));// recurse | ||||
|   GridFromExpression(grid,std::get<1>(expr.second)); | ||||
| inline void GridFromExpression( | ||||
|     GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) { | ||||
|   GridFromExpression(grid, std::get<0>(expr.second));  // recurse | ||||
|   GridFromExpression(grid, std::get<1>(expr.second)); | ||||
| } | ||||
| template <typename Op, typename T1, typename T2, typename T3> | ||||
| inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)  | ||||
| { | ||||
|   GridFromExpression(grid,std::get<0>(expr.second));// recurse | ||||
|   GridFromExpression(grid,std::get<1>(expr.second)); | ||||
|   GridFromExpression(grid,std::get<2>(expr.second)); | ||||
| inline void GridFromExpression( | ||||
|     GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) { | ||||
|   GridFromExpression(grid, std::get<0>(expr.second));  // recurse | ||||
|   GridFromExpression(grid, std::get<1>(expr.second)); | ||||
|   GridFromExpression(grid, std::get<2>(expr.second)); | ||||
| } | ||||
|  | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion | ||||
| // Obtain the CB from an expression, ensuring conformable. This must follow a | ||||
| // tree recursion | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr > | ||||
| inline void CBFromExpression(int &cb,const T1& lat)   // Lattice leaf | ||||
| template <class T1, | ||||
|           typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr> | ||||
| inline void CBFromExpression(int &cb, const T1 &lat)  // Lattice leaf | ||||
| { | ||||
|   if ( (cb==Odd) || (cb==Even) ) { | ||||
|     assert(cb==lat.checkerboard); | ||||
|   }  | ||||
|   cb=lat.checkerboard; | ||||
|   if ((cb == Odd) || (cb == Even)) { | ||||
|     assert(cb == lat.checkerboard); | ||||
|   } | ||||
|   cb = lat.checkerboard; | ||||
|   //  std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl; | ||||
| } | ||||
| template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr > | ||||
| inline void CBFromExpression(int &cb,const T1& notlat)   // non-lattice leaf | ||||
| template <class T1, | ||||
|           typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr> | ||||
| inline void CBFromExpression(int &cb, const T1 ¬lat)  // non-lattice leaf | ||||
| { | ||||
|   //  std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl; | ||||
| } | ||||
| template <typename Op, typename T1> | ||||
| inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr) | ||||
| { | ||||
|   CBFromExpression(cb,std::get<0>(expr.second));// recurse  | ||||
| inline void CBFromExpression(int &cb, | ||||
|                              const LatticeUnaryExpression<Op, T1> &expr) { | ||||
|   CBFromExpression(cb, std::get<0>(expr.second));  // recurse | ||||
|   //  std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl; | ||||
| } | ||||
|  | ||||
| template <typename Op, typename T1, typename T2> | ||||
| inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr)  | ||||
| { | ||||
|   CBFromExpression(cb,std::get<0>(expr.second));// recurse | ||||
|   CBFromExpression(cb,std::get<1>(expr.second)); | ||||
| inline void CBFromExpression(int &cb, | ||||
|                              const LatticeBinaryExpression<Op, T1, T2> &expr) { | ||||
|   CBFromExpression(cb, std::get<0>(expr.second));  // recurse | ||||
|   CBFromExpression(cb, std::get<1>(expr.second)); | ||||
|   //  std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl; | ||||
| } | ||||
| template <typename Op, typename T1, typename T2, typename T3> | ||||
| inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)  | ||||
| { | ||||
|   CBFromExpression(cb,std::get<0>(expr.second));// recurse | ||||
|   CBFromExpression(cb,std::get<1>(expr.second)); | ||||
|   CBFromExpression(cb,std::get<2>(expr.second)); | ||||
| inline void CBFromExpression( | ||||
|     int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) { | ||||
|   CBFromExpression(cb, std::get<0>(expr.second));  // recurse | ||||
|   CBFromExpression(cb, std::get<1>(expr.second)); | ||||
|   CBFromExpression(cb, std::get<2>(expr.second)); | ||||
|   //  std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl; | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Unary operators and funcs | ||||
| //////////////////////////////////////////// | ||||
| #define GridUnopClass(name,ret)\ | ||||
| template <class arg> struct name\ | ||||
| {\ | ||||
|   static auto inline func(const arg a)-> decltype(ret) { return ret; } \ | ||||
| }; | ||||
| #define GridUnopClass(name, ret)                                          \ | ||||
|   template <class arg>                                                    \ | ||||
|   struct name {                                                           \ | ||||
|     static auto inline func(const arg a) -> decltype(ret) { return ret; } \ | ||||
|   }; | ||||
|  | ||||
| GridUnopClass(UnarySub,-a); | ||||
| GridUnopClass(UnaryNot,Not(a)); | ||||
| GridUnopClass(UnaryAdj,adj(a)); | ||||
| GridUnopClass(UnaryConj,conjugate(a)); | ||||
| GridUnopClass(UnaryTrace,trace(a)); | ||||
| GridUnopClass(UnaryTranspose,transpose(a)); | ||||
| GridUnopClass(UnaryTa,Ta(a)); | ||||
| GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a)); | ||||
| GridUnopClass(UnaryReal,real(a)); | ||||
| GridUnopClass(UnaryImag,imag(a)); | ||||
| GridUnopClass(UnaryToReal,toReal(a)); | ||||
| GridUnopClass(UnaryToComplex,toComplex(a)); | ||||
| GridUnopClass(UnaryAbs,abs(a)); | ||||
| GridUnopClass(UnarySqrt,sqrt(a)); | ||||
| GridUnopClass(UnaryRsqrt,rsqrt(a)); | ||||
| GridUnopClass(UnarySin,sin(a)); | ||||
| GridUnopClass(UnaryCos,cos(a)); | ||||
| GridUnopClass(UnaryLog,log(a)); | ||||
| GridUnopClass(UnaryExp,exp(a)); | ||||
| GridUnopClass(UnarySub, -a); | ||||
| GridUnopClass(UnaryNot, Not(a)); | ||||
| GridUnopClass(UnaryAdj, adj(a)); | ||||
| GridUnopClass(UnaryConj, conjugate(a)); | ||||
| GridUnopClass(UnaryTrace, trace(a)); | ||||
| GridUnopClass(UnaryTranspose, transpose(a)); | ||||
| GridUnopClass(UnaryTa, Ta(a)); | ||||
| GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a)); | ||||
| GridUnopClass(UnaryReal, real(a)); | ||||
| GridUnopClass(UnaryImag, imag(a)); | ||||
| GridUnopClass(UnaryToReal, toReal(a)); | ||||
| GridUnopClass(UnaryToComplex, toComplex(a)); | ||||
| GridUnopClass(UnaryTimesI, timesI(a)); | ||||
| GridUnopClass(UnaryTimesMinusI, timesMinusI(a)); | ||||
| GridUnopClass(UnaryAbs, abs(a)); | ||||
| GridUnopClass(UnarySqrt, sqrt(a)); | ||||
| GridUnopClass(UnaryRsqrt, rsqrt(a)); | ||||
| GridUnopClass(UnarySin, sin(a)); | ||||
| GridUnopClass(UnaryCos, cos(a)); | ||||
| GridUnopClass(UnaryAsin, asin(a)); | ||||
| GridUnopClass(UnaryAcos, acos(a)); | ||||
| GridUnopClass(UnaryLog, log(a)); | ||||
| GridUnopClass(UnaryExp, exp(a)); | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Binary operators | ||||
| //////////////////////////////////////////// | ||||
| #define GridBinOpClass(name,combination)\ | ||||
| template <class left,class right>\ | ||||
| struct name\ | ||||
| {\ | ||||
|   static auto inline func(const left &lhs,const right &rhs)-> decltype(combination) const \ | ||||
|     {\ | ||||
|       return combination;\ | ||||
|     }\ | ||||
| } | ||||
| GridBinOpClass(BinaryAdd,lhs+rhs); | ||||
| GridBinOpClass(BinarySub,lhs-rhs); | ||||
| GridBinOpClass(BinaryMul,lhs*rhs); | ||||
| #define GridBinOpClass(name, combination)                      \ | ||||
|   template <class left, class right>                           \ | ||||
|   struct name {                                                \ | ||||
|     static auto inline func(const left &lhs, const right &rhs) \ | ||||
|         -> decltype(combination) const {                       \ | ||||
|       return combination;                                      \ | ||||
|     }                                                          \ | ||||
|   } | ||||
| GridBinOpClass(BinaryAdd, lhs + rhs); | ||||
| GridBinOpClass(BinarySub, lhs - rhs); | ||||
| GridBinOpClass(BinaryMul, lhs *rhs); | ||||
|  | ||||
| GridBinOpClass(BinaryAnd   ,lhs&rhs); | ||||
| GridBinOpClass(BinaryOr    ,lhs|rhs); | ||||
| GridBinOpClass(BinaryAndAnd,lhs&&rhs); | ||||
| GridBinOpClass(BinaryOrOr  ,lhs||rhs); | ||||
| GridBinOpClass(BinaryAnd, lhs &rhs); | ||||
| GridBinOpClass(BinaryOr, lhs | rhs); | ||||
| GridBinOpClass(BinaryAndAnd, lhs &&rhs); | ||||
| GridBinOpClass(BinaryOrOr, lhs || rhs); | ||||
|  | ||||
| //////////////////////////////////////////////////// | ||||
| // Trinary conditional op | ||||
| //////////////////////////////////////////////////// | ||||
| #define GridTrinOpClass(name,combination)\ | ||||
| template <class predicate,class left, class right>	\ | ||||
| struct name\ | ||||
| {\ | ||||
|   static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \ | ||||
|     {\ | ||||
|       return combination;\ | ||||
|     }\ | ||||
| } | ||||
| #define GridTrinOpClass(name, combination)                                     \ | ||||
|   template <class predicate, class left, class right>                          \ | ||||
|   struct name {                                                                \ | ||||
|     static auto inline func(const predicate &pred, const left &lhs,            \ | ||||
|                             const right &rhs) -> decltype(combination) const { \ | ||||
|       return combination;                                                      \ | ||||
|     }                                                                          \ | ||||
|   } | ||||
|  | ||||
| GridTrinOpClass(TrinaryWhere,(predicatedWhere<predicate, \ | ||||
| 			       typename std::remove_reference<left>::type, \ | ||||
| 			       typename std::remove_reference<right>::type> (pred,lhs,rhs))); | ||||
| GridTrinOpClass( | ||||
|     TrinaryWhere, | ||||
|     (predicatedWhere<predicate, typename std::remove_reference<left>::type, | ||||
|                      typename std::remove_reference<right>::type>(pred, lhs, | ||||
|                                                                   rhs))); | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Operator syntactical glue | ||||
| //////////////////////////////////////////// | ||||
|   | ||||
| #define GRID_UNOP(name)   name<decltype(eval(0, arg))> | ||||
| #define GRID_BINOP(name)  name<decltype(eval(0, lhs)), decltype(eval(0, rhs))> | ||||
| #define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))> | ||||
|  | ||||
| #define GRID_DEF_UNOP(op, name)\ | ||||
| template <typename T1,\ | ||||
|   typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr> inline auto op(const T1 &arg) \ | ||||
|   -> decltype(LatticeUnaryExpression<GRID_UNOP(name),const T1&>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg)))) \ | ||||
| { return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg))); } | ||||
| #define GRID_UNOP(name) name<decltype(eval(0, arg))> | ||||
| #define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))> | ||||
| #define GRID_TRINOP(name) \ | ||||
|   name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))> | ||||
|  | ||||
| #define GRID_BINOP_LEFT(op, name)\ | ||||
| template <typename T1,typename T2,\ | ||||
|           typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr>\ | ||||
| inline auto op(const T1 &lhs,const T2&rhs) \ | ||||
|   -> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\ | ||||
| 											    std::forward_as_tuple(lhs, rhs)))) \ | ||||
| {\ | ||||
|  return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\ | ||||
| 									  std::forward_as_tuple(lhs, rhs))); \ | ||||
| } | ||||
| #define GRID_DEF_UNOP(op, name)                                             \ | ||||
|   template <typename T1,                                                    \ | ||||
|             typename std::enable_if<is_lattice<T1>::value ||                \ | ||||
|                                         is_lattice_expr<T1>::value,         \ | ||||
|                                     T1>::type * = nullptr>                  \ | ||||
|   inline auto op(const T1 &arg)                                             \ | ||||
|       ->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(       \ | ||||
|           std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \ | ||||
|     return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(             \ | ||||
|         std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)));     \ | ||||
|   } | ||||
|  | ||||
| #define GRID_BINOP_RIGHT(op, name)\ | ||||
|  template <typename T1,typename T2,\ | ||||
|            typename std::enable_if<!is_lattice<T1>::value && !is_lattice_expr<T1>::value, T1>::type* = nullptr,\ | ||||
|            typename std::enable_if< is_lattice<T2>::value ||  is_lattice_expr<T2>::value, T2>::type* = nullptr> \ | ||||
| inline auto op(const T1 &lhs,const T2&rhs)			\ | ||||
|   -> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\ | ||||
| 											    std::forward_as_tuple(lhs, rhs)))) \ | ||||
| {\ | ||||
|  return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\ | ||||
| 								          std::forward_as_tuple(lhs, rhs))); \ | ||||
| } | ||||
| #define GRID_BINOP_LEFT(op, name)                                             \ | ||||
|   template <typename T1, typename T2,                                         \ | ||||
|             typename std::enable_if<is_lattice<T1>::value ||                  \ | ||||
|                                         is_lattice_expr<T1>::value,           \ | ||||
|                                     T1>::type * = nullptr>                    \ | ||||
|   inline auto op(const T1 &lhs, const T2 &rhs)                                \ | ||||
|       ->decltype(                                                             \ | ||||
|           LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(  \ | ||||
|               std::make_pair(GRID_BINOP(name)(),                              \ | ||||
|                              std::forward_as_tuple(lhs, rhs)))) {             \ | ||||
|     return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \ | ||||
|         std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \ | ||||
|   } | ||||
|  | ||||
| #define GRID_DEF_BINOP(op, name)\ | ||||
|  GRID_BINOP_LEFT(op,name);\ | ||||
|  GRID_BINOP_RIGHT(op,name); | ||||
| #define GRID_BINOP_RIGHT(op, name)                                            \ | ||||
|   template <typename T1, typename T2,                                         \ | ||||
|             typename std::enable_if<!is_lattice<T1>::value &&                 \ | ||||
|                                         !is_lattice_expr<T1>::value,          \ | ||||
|                                     T1>::type * = nullptr,                    \ | ||||
|             typename std::enable_if<is_lattice<T2>::value ||                  \ | ||||
|                                         is_lattice_expr<T2>::value,           \ | ||||
|                                     T2>::type * = nullptr>                    \ | ||||
|   inline auto op(const T1 &lhs, const T2 &rhs)                                \ | ||||
|       ->decltype(                                                             \ | ||||
|           LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(  \ | ||||
|               std::make_pair(GRID_BINOP(name)(),                              \ | ||||
|                              std::forward_as_tuple(lhs, rhs)))) {             \ | ||||
|     return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \ | ||||
|         std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \ | ||||
|   } | ||||
|  | ||||
| #define GRID_DEF_BINOP(op, name) \ | ||||
|   GRID_BINOP_LEFT(op, name);     \ | ||||
|   GRID_BINOP_RIGHT(op, name); | ||||
|  | ||||
| #define GRID_DEF_TRINOP(op, name)\ | ||||
| template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,const T2&lhs,const T3 &rhs) \ | ||||
|   -> decltype(LatticeTrinaryExpression<GRID_TRINOP(name),const T1&,const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(),\ | ||||
| 										   std::forward_as_tuple(pred,lhs,rhs)))) \ | ||||
| {\ | ||||
|   return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(), \ | ||||
| 										 std::forward_as_tuple(pred,lhs, rhs))); \ | ||||
| } | ||||
| #define GRID_DEF_TRINOP(op, name)                                              \ | ||||
|   template <typename T1, typename T2, typename T3>                             \ | ||||
|   inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs)                 \ | ||||
|       ->decltype(                                                              \ | ||||
|           LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,  \ | ||||
|                                    const T3 &>(std::make_pair(                 \ | ||||
|               GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) {  \ | ||||
|     return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \ | ||||
|                                     const T3 &>(std::make_pair(                \ | ||||
|         GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)));          \ | ||||
|   } | ||||
| //////////////////////// | ||||
| //Operator definitions | ||||
| // Operator definitions | ||||
| //////////////////////// | ||||
|  | ||||
| GRID_DEF_UNOP(operator -,UnarySub); | ||||
| GRID_DEF_UNOP(Not,UnaryNot); | ||||
| GRID_DEF_UNOP(operator !,UnaryNot); | ||||
| GRID_DEF_UNOP(adj,UnaryAdj); | ||||
| GRID_DEF_UNOP(conjugate,UnaryConj); | ||||
| GRID_DEF_UNOP(trace,UnaryTrace); | ||||
| GRID_DEF_UNOP(transpose,UnaryTranspose); | ||||
| GRID_DEF_UNOP(Ta,UnaryTa); | ||||
| GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup); | ||||
| GRID_DEF_UNOP(real,UnaryReal); | ||||
| GRID_DEF_UNOP(imag,UnaryImag); | ||||
| GRID_DEF_UNOP(toReal,UnaryToReal); | ||||
| GRID_DEF_UNOP(toComplex,UnaryToComplex); | ||||
| GRID_DEF_UNOP(abs  ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing | ||||
| GRID_DEF_UNOP(sqrt ,UnarySqrt); | ||||
| GRID_DEF_UNOP(rsqrt,UnaryRsqrt); | ||||
| GRID_DEF_UNOP(sin  ,UnarySin); | ||||
| GRID_DEF_UNOP(cos  ,UnaryCos); | ||||
| GRID_DEF_UNOP(log  ,UnaryLog); | ||||
| GRID_DEF_UNOP(exp  ,UnaryExp); | ||||
| GRID_DEF_UNOP(operator-, UnarySub); | ||||
| GRID_DEF_UNOP(Not, UnaryNot); | ||||
| GRID_DEF_UNOP(operator!, UnaryNot); | ||||
| GRID_DEF_UNOP(adj, UnaryAdj); | ||||
| GRID_DEF_UNOP(conjugate, UnaryConj); | ||||
| GRID_DEF_UNOP(trace, UnaryTrace); | ||||
| GRID_DEF_UNOP(transpose, UnaryTranspose); | ||||
| GRID_DEF_UNOP(Ta, UnaryTa); | ||||
| GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup); | ||||
| GRID_DEF_UNOP(real, UnaryReal); | ||||
| GRID_DEF_UNOP(imag, UnaryImag); | ||||
| GRID_DEF_UNOP(toReal, UnaryToReal); | ||||
| GRID_DEF_UNOP(toComplex, UnaryToComplex); | ||||
| GRID_DEF_UNOP(timesI, UnaryTimesI); | ||||
| GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI); | ||||
| GRID_DEF_UNOP(abs, UnaryAbs);  // abs overloaded in cmath C++98; DON'T do the | ||||
|                                // abs-fabs-dabs-labs thing | ||||
| GRID_DEF_UNOP(sqrt, UnarySqrt); | ||||
| GRID_DEF_UNOP(rsqrt, UnaryRsqrt); | ||||
| GRID_DEF_UNOP(sin, UnarySin); | ||||
| GRID_DEF_UNOP(cos, UnaryCos); | ||||
| GRID_DEF_UNOP(asin, UnaryAsin); | ||||
| GRID_DEF_UNOP(acos, UnaryAcos); | ||||
| GRID_DEF_UNOP(log, UnaryLog); | ||||
| GRID_DEF_UNOP(exp, UnaryExp); | ||||
|  | ||||
| GRID_DEF_BINOP(operator+,BinaryAdd); | ||||
| GRID_DEF_BINOP(operator-,BinarySub); | ||||
| GRID_DEF_BINOP(operator*,BinaryMul); | ||||
| GRID_DEF_BINOP(operator+, BinaryAdd); | ||||
| GRID_DEF_BINOP(operator-, BinarySub); | ||||
| GRID_DEF_BINOP(operator*, BinaryMul); | ||||
|  | ||||
| GRID_DEF_BINOP(operator&,BinaryAnd); | ||||
| GRID_DEF_BINOP(operator|,BinaryOr); | ||||
| GRID_DEF_BINOP(operator&&,BinaryAndAnd); | ||||
| GRID_DEF_BINOP(operator||,BinaryOrOr); | ||||
| GRID_DEF_BINOP(operator&, BinaryAnd); | ||||
| GRID_DEF_BINOP(operator|, BinaryOr); | ||||
| GRID_DEF_BINOP(operator&&, BinaryAndAnd); | ||||
| GRID_DEF_BINOP(operator||, BinaryOrOr); | ||||
|  | ||||
| GRID_DEF_TRINOP(where,TrinaryWhere); | ||||
| GRID_DEF_TRINOP(where, TrinaryWhere); | ||||
|  | ||||
| ///////////////////////////////////////////////////////////// | ||||
| // Closure convenience to force expression to evaluate | ||||
| ///////////////////////////////////////////////////////////// | ||||
| template<class Op,class T1> | ||||
|   auto closure(const LatticeUnaryExpression<Op,T1> & expr) | ||||
|   -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))> | ||||
| { | ||||
|   Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))> ret(expr); | ||||
| template <class Op, class T1> | ||||
| auto closure(const LatticeUnaryExpression<Op, T1> &expr) | ||||
|     -> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> { | ||||
|   Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret( | ||||
|       expr); | ||||
|   return ret; | ||||
| } | ||||
| template<class Op,class T1, class T2> | ||||
|   auto closure(const LatticeBinaryExpression<Op,T1,T2> & expr) | ||||
|   -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||
| 				      eval(0,std::get<1>(expr.second))))> | ||||
| { | ||||
|   Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||
| 				   eval(0,std::get<1>(expr.second))))> ret(expr); | ||||
| template <class Op, class T1, class T2> | ||||
| auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr) | ||||
|     -> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), | ||||
|                                         eval(0, std::get<1>(expr.second))))> { | ||||
|   Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), | ||||
|                                    eval(0, std::get<1>(expr.second))))> | ||||
|       ret(expr); | ||||
|   return ret; | ||||
| } | ||||
| template<class Op,class T1, class T2, class T3> | ||||
|   auto closure(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) | ||||
|   -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||
| 				      eval(0,std::get<1>(expr.second)), | ||||
| 				      eval(0,std::get<2>(expr.second))))> | ||||
| { | ||||
|   Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||
| 				   eval(0,std::get<1>(expr.second)), | ||||
| 				   eval(0,std::get<2>(expr.second))))> ret(expr); | ||||
| template <class Op, class T1, class T2, class T3> | ||||
| auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) | ||||
|     -> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), | ||||
|                                         eval(0, std::get<1>(expr.second)), | ||||
|                                         eval(0, std::get<2>(expr.second))))> { | ||||
|   Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), | ||||
|                                    eval(0, std::get<1>(expr.second)), | ||||
|                                    eval(0, std::get<2>(expr.second))))> | ||||
|       ret(expr); | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| @@ -382,12 +431,11 @@ template<class Op,class T1, class T2, class T3> | ||||
| #undef GRID_DEF_UNOP | ||||
| #undef GRID_DEF_BINOP | ||||
| #undef GRID_DEF_TRINOP | ||||
|  | ||||
| } | ||||
|  | ||||
| #if 0 | ||||
| using namespace Grid; | ||||
|  	       | ||||
|          | ||||
|  int main(int argc,char **argv){ | ||||
|     | ||||
|    Lattice<double> v1(16); | ||||
| @@ -397,7 +445,7 @@ using namespace Grid; | ||||
|    BinaryAdd<double,double> tmp; | ||||
|    LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>  | ||||
|      expr(std::make_pair(tmp, | ||||
| 	  std::forward_as_tuple(v1,v2))); | ||||
|     std::forward_as_tuple(v1,v2))); | ||||
|    tmp.func(eval(0,v1),eval(0,v2)); | ||||
|  | ||||
|    auto var = v1+v2; | ||||
|   | ||||
| @@ -1,32 +1,33 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/lattice/Lattice_base.h | ||||
| Source file: ./lib/lattice/Lattice_base.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_LATTICE_BASE_H | ||||
| #define GRID_LATTICE_BASE_H | ||||
|  | ||||
| @@ -101,6 +102,7 @@ public: | ||||
|     int begin(void) { return 0;}; | ||||
|     int end(void)   { return _odata.size(); } | ||||
|     vobj & operator[](int i) { return _odata[i]; }; | ||||
|     const vobj & operator[](int i) const { return _odata[i]; }; | ||||
|  | ||||
| public: | ||||
|     typedef typename vobj::scalar_type scalar_type; | ||||
| @@ -255,6 +257,18 @@ PARALLEL_FOR_LOOP | ||||
|         checkerboard=0; | ||||
|     } | ||||
|  | ||||
|     Lattice(const Lattice& r){ // copy constructor | ||||
|     	_grid = r._grid; | ||||
|     	checkerboard = r.checkerboard; | ||||
|     	_odata.resize(_grid->oSites());// essential | ||||
|   		PARALLEL_FOR_LOOP | ||||
|         for(int ss=0;ss<_grid->oSites();ss++){ | ||||
|             _odata[ss]=r._odata[ss]; | ||||
|         }  	 | ||||
|     } | ||||
|  | ||||
|  | ||||
|  | ||||
|     virtual ~Lattice(void) = default; | ||||
|      | ||||
|     template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ | ||||
| @@ -267,7 +281,7 @@ PARALLEL_FOR_LOOP | ||||
|     template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){ | ||||
|       this->checkerboard = r.checkerboard; | ||||
|       conformable(*this,r); | ||||
|       std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl; | ||||
|        | ||||
| PARALLEL_FOR_LOOP | ||||
|         for(int ss=0;ss<_grid->oSites();ss++){ | ||||
|             this->_odata[ss]=r._odata[ss]; | ||||
| @@ -324,27 +338,27 @@ PARALLEL_FOR_LOOP | ||||
|  | ||||
|  | ||||
|  | ||||
| #include <lattice/Lattice_conformable.h> | ||||
| #include "Lattice_conformable.h" | ||||
| #define GRID_LATTICE_EXPRESSION_TEMPLATES | ||||
| #ifdef  GRID_LATTICE_EXPRESSION_TEMPLATES | ||||
| #include <lattice/Lattice_ET.h> | ||||
| #include "Lattice_ET.h" | ||||
| #else  | ||||
| #include <lattice/Lattice_overload.h> | ||||
| #include "Lattice_overload.h" | ||||
| #endif | ||||
| #include <lattice/Lattice_arith.h> | ||||
| #include <lattice/Lattice_trace.h> | ||||
| #include <lattice/Lattice_transpose.h> | ||||
| #include <lattice/Lattice_local.h> | ||||
| #include <lattice/Lattice_reduction.h> | ||||
| #include <lattice/Lattice_peekpoke.h> | ||||
| #include <lattice/Lattice_reality.h> | ||||
| #include <lattice/Lattice_comparison_utils.h> | ||||
| #include <lattice/Lattice_comparison.h> | ||||
| #include <lattice/Lattice_coordinate.h> | ||||
| #include <lattice/Lattice_where.h> | ||||
| #include <lattice/Lattice_rng.h> | ||||
| #include <lattice/Lattice_unary.h> | ||||
| #include <lattice/Lattice_transfer.h> | ||||
| #include "Lattice_arith.h" | ||||
| #include "Lattice_trace.h" | ||||
| #include "Lattice_transpose.h" | ||||
| #include "Lattice_local.h" | ||||
| #include "Lattice_reduction.h" | ||||
| #include "Lattice_peekpoke.h" | ||||
| #include "Lattice_reality.h" | ||||
| #include "Lattice_comparison_utils.h" | ||||
| #include "Lattice_comparison.h" | ||||
| #include "Lattice_coordinate.h" | ||||
| #include "Lattice_where.h" | ||||
| #include "Lattice_rng.h" | ||||
| #include "Lattice_unary.h" | ||||
| #include "Lattice_transfer.h" | ||||
|  | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -40,7 +40,7 @@ namespace Grid { | ||||
|     //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | ||||
|     ComplexD nrm = innerProduct(arg,arg); | ||||
|     return real(nrm);  | ||||
|     return std::real(nrm);  | ||||
|   } | ||||
|  | ||||
|     template<class vobj> | ||||
|   | ||||
| @@ -31,6 +31,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| #include <random> | ||||
|  | ||||
| // Have not enable RNG_SPRNG_SHA256 by default yet. | ||||
| // Uncomment the following line to see the effect of the new RNG. | ||||
| // #define RNG_SPRNG_SHA256 | ||||
|  | ||||
| #ifdef RNG_SPRNG_SHA256 | ||||
| #include "rng/sprng-sha256.h" | ||||
| #endif | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|  | ||||
| @@ -110,7 +118,11 @@ namespace Grid { | ||||
|     int _seeded; | ||||
|     // One generator per site. | ||||
|     // Uniform and Gaussian distributions from these generators. | ||||
| #ifdef RNG_RANLUX | ||||
| #ifdef RNG_SPRNG_SHA256 | ||||
|     typedef uint32_t      RngStateType; | ||||
|     typedef SprngSha256 RngEngine; | ||||
|     static const int RngStateCount = 22; | ||||
| #elif defined RNG_RANLUX | ||||
|     typedef uint64_t      RngStateType; | ||||
|     typedef std::ranlux48 RngEngine; | ||||
|     static const int RngStateCount = 15; | ||||
| @@ -273,6 +285,34 @@ namespace Grid { | ||||
|     } | ||||
|  | ||||
|  | ||||
| #ifdef RNG_SPRNG_SHA256 | ||||
|     template<class source> void Seed(source &src) | ||||
|     { | ||||
|       std::vector<int> gcoor; | ||||
|  | ||||
|       long gsites = _grid->_gsites; | ||||
|  | ||||
|       RngState rs; | ||||
|       for (int i = 0; i < 8; ++i) { | ||||
|         splitRngState(rs, rs, src()); | ||||
|       } | ||||
|  | ||||
|       for(long gidx=0;gidx<gsites;gidx++){ | ||||
|  | ||||
|         int rank,o_idx,i_idx; | ||||
|         _grid->GlobalIndexToGlobalCoor(gidx,gcoor); | ||||
|         _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); | ||||
|  | ||||
|         int l_idx=generator_idx(o_idx,i_idx); | ||||
|  | ||||
|         if( rank == _grid->ThisRank() ){ | ||||
|           splitRngState(_generators[l_idx].rs, rs, gidx); | ||||
|         } | ||||
|       } | ||||
|       _seeded=1; | ||||
|  | ||||
|     } | ||||
| #else | ||||
|     // This loop could be made faster to avoid the Ahmdahl by | ||||
|     // i)  seed generators on each timeslice, for x=y=z=0; | ||||
|     // ii) seed generators on each z for x=y=0 | ||||
| @@ -312,6 +352,7 @@ namespace Grid { | ||||
|       } | ||||
|       _seeded=1; | ||||
|     }     | ||||
| #endif | ||||
|  | ||||
|     //FIXME implement generic IO and create state save/restore | ||||
|     //void SaveState(const std::string<char> &file); | ||||
|   | ||||
| @@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out) | ||||
|     assert(ig->_ldimensions[d] == og->_ldimensions[d]); | ||||
|   } | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|   //PARALLEL_FOR_LOOP | ||||
|   for(int idx=0;idx<ig->lSites();idx++){ | ||||
|     std::vector<int> lcoor(ni); | ||||
|     ig->LocalIndexToLocalCoor(idx,lcoor); | ||||
| @@ -386,7 +386,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int | ||||
|   } | ||||
|  | ||||
|   // the above should guarantee that the operations are local | ||||
| PARALLEL_FOR_LOOP | ||||
|   //PARALLEL_FOR_LOOP | ||||
|   for(int idx=0;idx<lg->lSites();idx++){ | ||||
|     std::vector<int> lcoor(nl); | ||||
|     std::vector<int> hcoor(nh); | ||||
| @@ -420,15 +420,15 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in | ||||
|   assert(hg->_processors[orthog]==1); | ||||
|  | ||||
|   int dl; dl = 0; | ||||
|   for(int d=0;d<nh;d++){ | ||||
|     if ( d != orthog) { | ||||
|       assert(lg->_processors[dl]  == hg->_processors[d]); | ||||
|       assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); | ||||
|       dl++; | ||||
|     for(int d=0;d<nh;d++){ | ||||
|       if ( d != orthog) { | ||||
| 	assert(lg->_processors[dl]  == hg->_processors[d]); | ||||
| 	assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); | ||||
| 	dl++; | ||||
|     } | ||||
|   } | ||||
|   // the above should guarantee that the operations are local | ||||
| PARALLEL_FOR_LOOP | ||||
|   //PARALLEL_FOR_LOOP | ||||
|   for(int idx=0;idx<lg->lSites();idx++){ | ||||
|     std::vector<int> lcoor(nl); | ||||
|     std::vector<int> hcoor(nh); | ||||
| @@ -446,6 +446,79 @@ PARALLEL_FOR_LOOP | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||
| { | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   sobj s; | ||||
|  | ||||
|   GridBase *lg = lowDim._grid; | ||||
|   GridBase *hg = higherDim._grid; | ||||
|   int nl = lg->_ndimension; | ||||
|   int nh = hg->_ndimension; | ||||
|  | ||||
|   assert(nl == nh); | ||||
|   assert(orthog<nh); | ||||
|   assert(orthog>=0); | ||||
|  | ||||
|   for(int d=0;d<nh;d++){ | ||||
|     assert(lg->_processors[d]  == hg->_processors[d]); | ||||
|     assert(lg->_ldimensions[d] == hg->_ldimensions[d]); | ||||
|   } | ||||
|  | ||||
|   // the above should guarantee that the operations are local | ||||
|   //PARALLEL_FOR_LOOP | ||||
|   for(int idx=0;idx<lg->lSites();idx++){ | ||||
|     std::vector<int> lcoor(nl); | ||||
|     std::vector<int> hcoor(nh); | ||||
|     lg->LocalIndexToLocalCoor(idx,lcoor); | ||||
|     if( lcoor[orthog] == slice_lo ) {  | ||||
|       hcoor=lcoor; | ||||
|       hcoor[orthog] = slice_hi; | ||||
|       peekLocalSite(s,lowDim,lcoor); | ||||
|       pokeLocalSite(s,higherDim,hcoor); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||
| { | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   sobj s; | ||||
|  | ||||
|   GridBase *lg = lowDim._grid; | ||||
|   GridBase *hg = higherDim._grid; | ||||
|   int nl = lg->_ndimension; | ||||
|   int nh = hg->_ndimension; | ||||
|  | ||||
|   assert(nl == nh); | ||||
|   assert(orthog<nh); | ||||
|   assert(orthog>=0); | ||||
|  | ||||
|   for(int d=0;d<nh;d++){ | ||||
|     assert(lg->_processors[d]  == hg->_processors[d]); | ||||
|     assert(lg->_ldimensions[d] == hg->_ldimensions[d]); | ||||
|   } | ||||
|  | ||||
|   // the above should guarantee that the operations are local | ||||
|   //PARALLEL_FOR_LOOP | ||||
|   for(int idx=0;idx<lg->lSites();idx++){ | ||||
|     std::vector<int> lcoor(nl); | ||||
|     std::vector<int> hcoor(nh); | ||||
|     lg->LocalIndexToLocalCoor(idx,lcoor); | ||||
|     if( lcoor[orthog] == slice_lo ) {  | ||||
|       hcoor=lcoor; | ||||
|       hcoor[orthog] = slice_hi; | ||||
|       peekLocalSite(s,higherDim,hcoor); | ||||
|       pokeLocalSite(s,lowDim,lcoor); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
| { | ||||
| @@ -482,6 +555,96 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
|  | ||||
| } | ||||
|  | ||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||
| template<typename vobj, typename sobj> | ||||
| typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){ | ||||
|   typedef typename vobj::vector_type vtype; | ||||
|    | ||||
|   GridBase* in_grid = in._grid; | ||||
|   out.resize(in_grid->lSites()); | ||||
|    | ||||
|   int ndim = in_grid->Nd(); | ||||
|   int in_nsimd = vtype::Nsimd(); | ||||
|  | ||||
|   std::vector<std::vector<int> > in_icoor(in_nsimd); | ||||
|        | ||||
|   for(int lane=0; lane < in_nsimd; lane++){ | ||||
|     in_icoor[lane].resize(ndim); | ||||
|     in_grid->iCoorFromIindex(in_icoor[lane], lane); | ||||
|   } | ||||
|    | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index | ||||
|     //Assemble vector of pointers to output elements | ||||
|     std::vector<sobj*> out_ptrs(in_nsimd); | ||||
|  | ||||
|     std::vector<int> in_ocoor(ndim); | ||||
|     in_grid->oCoorFromOindex(in_ocoor, in_oidx); | ||||
|  | ||||
|     std::vector<int> lcoor(in_grid->Nd()); | ||||
|        | ||||
|     for(int lane=0; lane < in_nsimd; lane++){ | ||||
|       for(int mu=0;mu<ndim;mu++) | ||||
| 	lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu]; | ||||
|  | ||||
|       int lex; | ||||
|       Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions); | ||||
|       out_ptrs[lane] = &out[lex]; | ||||
|     } | ||||
|      | ||||
|     //Unpack into those ptrs | ||||
|     const vobj & in_vobj = in._odata[in_oidx]; | ||||
|     extract1(in_vobj, out_ptrs, 0); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //Convert a Lattice from one precision to another | ||||
| template<class VobjOut, class VobjIn> | ||||
| void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | ||||
|   assert(out._grid->Nd() == in._grid->Nd()); | ||||
|   out.checkerboard = in.checkerboard; | ||||
|   GridBase *in_grid=in._grid; | ||||
|   GridBase *out_grid = out._grid; | ||||
|  | ||||
|   typedef typename VobjOut::scalar_object SobjOut; | ||||
|   typedef typename VobjIn::scalar_object SobjIn; | ||||
|  | ||||
|   int ndim = out._grid->Nd(); | ||||
|   int out_nsimd = out_grid->Nsimd(); | ||||
|      | ||||
|   std::vector<std::vector<int> > out_icoor(out_nsimd); | ||||
|        | ||||
|   for(int lane=0; lane < out_nsimd; lane++){ | ||||
|     out_icoor[lane].resize(ndim); | ||||
|     out_grid->iCoorFromIindex(out_icoor[lane], lane); | ||||
|   } | ||||
|          | ||||
|   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); | ||||
|   unvectorizeToLexOrdArray(in_slex_conv, in); | ||||
|      | ||||
|   PARALLEL_FOR_LOOP | ||||
|   for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ | ||||
|     std::vector<int> out_ocoor(ndim); | ||||
|     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | ||||
|  | ||||
|     std::vector<SobjOut*> ptrs(out_nsimd);       | ||||
|  | ||||
|     std::vector<int> lcoor(out_grid->Nd()); | ||||
|        | ||||
|     for(int lane=0; lane < out_nsimd; lane++){ | ||||
|       for(int mu=0;mu<ndim;mu++) | ||||
| 	lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu]; | ||||
| 	 | ||||
|       int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); | ||||
|       ptrs[lane] = &in_slex_conv[llex]; | ||||
|     } | ||||
|     merge(out._odata[out_oidx], ptrs, 0); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
|    | ||||
|  | ||||
|   | ||||
| } | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										353
									
								
								lib/lattice/rng/rng-state.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										353
									
								
								lib/lattice/rng/rng-state.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,353 @@ | ||||
| // vim: set ts=2 sw=2 expandtab: | ||||
|  | ||||
| // Copyright (c) 2016 Luchang Jin | ||||
| // All rights reserved. | ||||
|  | ||||
| // This program is free software: you can redistribute it and/or modify | ||||
| // it under the terms of the GNU General Public License as published by | ||||
| // the Free Software Foundation, either version 2 of the License, or | ||||
| // (at your option) any later version. | ||||
| // | ||||
| // This program is distributed in the hope that it will be useful, | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| // GNU General Public License for more details. | ||||
| // | ||||
| // You should have received a copy of the GNU General Public License | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #ifndef INCLUDE_RNG_STATE_H | ||||
| #define INCLUDE_RNG_STATE_H | ||||
|  | ||||
| #include "show.h" | ||||
|  | ||||
| #ifndef USE_OPENSSL | ||||
| #include "sha256.h" | ||||
| #else | ||||
| #include <openssl/sha.h> | ||||
| #endif | ||||
|  | ||||
| #include <stdint.h> | ||||
| #include <endian.h> | ||||
| #include <cstring> | ||||
| #include <cmath> | ||||
| #include <cassert> | ||||
| #include <string> | ||||
| #include <ostream> | ||||
| #include <istream> | ||||
| #include <vector> | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| namespace CURRENT_DEFAULT_NAMESPACE_NAME { | ||||
| #endif | ||||
|  | ||||
| struct RngState; | ||||
|  | ||||
| inline void reset(RngState& rs); | ||||
|  | ||||
| inline void reset(RngState& rs, const std::string& seed); | ||||
|  | ||||
| inline void reset(RngState& rs, const long seed) | ||||
| { | ||||
|   reset(rs, show(seed)); | ||||
| } | ||||
|  | ||||
| inline void splitRngState(RngState& rs, const RngState& rs0, const std::string& sindex); | ||||
|  | ||||
| inline void splitRngState(RngState& rs, const RngState& rs0, const long sindex = 0) | ||||
| { | ||||
|   splitRngState(rs, rs0, show(sindex)); | ||||
| } | ||||
|  | ||||
| inline uint64_t randGen(RngState& rs); | ||||
|  | ||||
| inline double uRandGen(RngState& rs, const double upper = 1.0, const double lower = 0.0); | ||||
|  | ||||
| inline double gRandGen(RngState& rs, const double sigma = 1.0, const double center = 0.0); | ||||
|  | ||||
| inline void computeHashWithInput(uint32_t hash[8], const RngState& rs, const std::string& input); | ||||
|  | ||||
| struct RngState | ||||
| { | ||||
|   uint64_t numBytes; | ||||
|   uint32_t hash[8]; | ||||
|   unsigned long index; | ||||
|   // | ||||
|   uint64_t cache[3]; | ||||
|   double gaussian; | ||||
|   int cacheAvail; | ||||
|   bool gaussianAvail; | ||||
|   // | ||||
|   inline void init() | ||||
|   { | ||||
|     reset(*this); | ||||
|   } | ||||
|   // | ||||
|   RngState() | ||||
|   { | ||||
|     init(); | ||||
|   } | ||||
|   RngState(const std::string& seed) | ||||
|   { | ||||
|     reset(*this, seed); | ||||
|   } | ||||
|   RngState(const long seed) | ||||
|   { | ||||
|     reset(*this, seed); | ||||
|   } | ||||
|   RngState(const RngState& rs0, const std::string& sindex) | ||||
|   { | ||||
|     splitRngState(*this, rs0, sindex); | ||||
|   } | ||||
|   RngState(const RngState& rs0, const long sindex) | ||||
|   { | ||||
|     splitRngState(*this, rs0, sindex); | ||||
|   } | ||||
|   // | ||||
|   RngState split(const std::string& sindex) | ||||
|   { | ||||
|     RngState rs(*this, sindex); | ||||
|     return rs; | ||||
|   } | ||||
|   RngState split(const long sindex) | ||||
|   { | ||||
|     RngState rs(*this, sindex); | ||||
|     return rs; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| const size_t RNG_STATE_NUM_OF_INT32 = 2 + 8 + 2 + 3 * 2 + 2 + 1 + 1; | ||||
|  | ||||
| inline uint64_t patchTwoUint32(const uint32_t a, const uint32_t b) | ||||
| { | ||||
|   return (uint64_t)a << 32 | (uint64_t)b; | ||||
| } | ||||
|  | ||||
| inline void splitTwoUint32(uint32_t& a, uint32_t& b, const uint64_t x) | ||||
| { | ||||
|   b = (uint32_t)x; | ||||
|   a = (uint32_t)(x >> 32); | ||||
|   assert(x == patchTwoUint32(a, b)); | ||||
| } | ||||
|  | ||||
| inline void exportRngState(uint32_t* v, const RngState& rs) | ||||
| { | ||||
|   assert(22 == RNG_STATE_NUM_OF_INT32); | ||||
|   splitTwoUint32(v[0], v[1], rs.numBytes); | ||||
|   for (int i = 0; i < 8; ++i) { | ||||
|     v[2 + i] = rs.hash[i]; | ||||
|   } | ||||
|   splitTwoUint32(v[10], v[11], rs.index); | ||||
|   for (int i = 0; i < 3; ++i) { | ||||
|     splitTwoUint32(v[12 + i * 2], v[12 + i * 2 + 1], rs.cache[i]); | ||||
|   } | ||||
|   union { | ||||
|     double d; | ||||
|     uint64_t l; | ||||
|   } g; | ||||
|   g.d = rs.gaussian; | ||||
|   splitTwoUint32(v[18], v[19], g.l); | ||||
|   v[20] = rs.cacheAvail; | ||||
|   v[21] = rs.gaussianAvail; | ||||
| } | ||||
|  | ||||
| inline void importRngState(RngState& rs, const uint32_t* v) | ||||
| { | ||||
|   assert(22 == RNG_STATE_NUM_OF_INT32); | ||||
|   rs.numBytes = patchTwoUint32(v[0], v[1]); | ||||
|   for (int i = 0; i < 8; ++i) { | ||||
|     rs.hash[i] = v[2 + i]; | ||||
|   } | ||||
|   rs.index = patchTwoUint32(v[10], v[11]); | ||||
|   for (int i = 0; i < 3; ++i) { | ||||
|     rs.cache[i] = patchTwoUint32(v[12 + i * 2], v[12 + i * 2 + 1]); | ||||
|   } | ||||
|   union { | ||||
|     double d; | ||||
|     uint64_t l; | ||||
|   } g; | ||||
|   g.l = patchTwoUint32(v[18], v[19]); | ||||
|   rs.gaussian = g.d; | ||||
|   rs.cacheAvail = v[20]; | ||||
|   rs.gaussianAvail = v[21]; | ||||
| } | ||||
|  | ||||
| inline void exportRngState(std::vector<uint32_t>& v, const RngState& rs) | ||||
| { | ||||
|   v.resize(RNG_STATE_NUM_OF_INT32); | ||||
|   exportRngState(v.data(), rs); | ||||
| } | ||||
|  | ||||
| inline void importRngState(RngState& rs, const std::vector<uint32_t>& v) | ||||
| { | ||||
|   assert(RNG_STATE_NUM_OF_INT32 == v.size()); | ||||
|   importRngState(rs, v.data()); | ||||
| } | ||||
|  | ||||
| inline std::ostream& operator<<(std::ostream& os, const RngState& rs) | ||||
| { | ||||
|   std::vector<uint32_t> v(RNG_STATE_NUM_OF_INT32); | ||||
|   exportRngState(v, rs); | ||||
|   for (size_t i = 0; i < v.size() - 1; ++i) { | ||||
|     os << v[i] << " "; | ||||
|   } | ||||
|   os << v.back(); | ||||
|   return os; | ||||
| } | ||||
|  | ||||
| inline std::istream& operator>>(std::istream& is, RngState& rs) | ||||
| { | ||||
|   std::vector<uint32_t> v(RNG_STATE_NUM_OF_INT32); | ||||
|   for (size_t i = 0; i < v.size(); ++i) { | ||||
|     is >> v[i]; | ||||
|   } | ||||
|   importRngState(rs, v); | ||||
|   return is; | ||||
| } | ||||
|  | ||||
| inline std::string show(const RngState& rs) | ||||
| { | ||||
|   return shows(rs); | ||||
| } | ||||
|  | ||||
| inline bool operator==(const RngState& rs1, const RngState& rs2) | ||||
| { | ||||
|   return 0 == memcmp(&rs1, &rs2, sizeof(RngState)); | ||||
| } | ||||
|  | ||||
| inline void reset(RngState& rs) | ||||
| { | ||||
|   std::memset(&rs, 0, sizeof(RngState)); | ||||
|   rs.numBytes = 0; | ||||
|   rs.hash[0] = 0; | ||||
|   rs.hash[1] = 0; | ||||
|   rs.hash[2] = 0; | ||||
|   rs.hash[3] = 0; | ||||
|   rs.hash[4] = 0; | ||||
|   rs.hash[5] = 0; | ||||
|   rs.hash[6] = 0; | ||||
|   rs.hash[7] = 0; | ||||
|   rs.index = 0; | ||||
|   rs.cache[0] = 0; | ||||
|   rs.cache[1] = 0; | ||||
|   rs.cache[2] = 0; | ||||
|   rs.gaussian = 0.0; | ||||
|   rs.cacheAvail = 0; | ||||
|   rs.gaussianAvail = false; | ||||
| } | ||||
|  | ||||
| inline void reset(RngState& rs, const std::string& seed) | ||||
| { | ||||
|   reset(rs); | ||||
|   splitRngState(rs, rs, seed); | ||||
| } | ||||
|  | ||||
| inline void computeHashWithInput(uint32_t hash[8], const RngState& rs, const std::string& input) | ||||
| { | ||||
|   std::string data(32, ' '); | ||||
|   for (int i = 0; i < 8; ++i) { | ||||
|     data[i*4 + 0] = (rs.hash[i] >> 24) & 0xFF; | ||||
|     data[i*4 + 1] = (rs.hash[i] >> 16) & 0xFF; | ||||
|     data[i*4 + 2] = (rs.hash[i] >>  8) & 0xFF; | ||||
|     data[i*4 + 3] =  rs.hash[i]        & 0xFF; | ||||
|   } | ||||
|   data += input; | ||||
| #ifndef USE_OPENSSL | ||||
|   sha256::computeHash(hash, (const uint8_t*)data.c_str(), data.length()); | ||||
| #else | ||||
|   { | ||||
|     uint8_t rawHash[32]; | ||||
|     SHA256((unsigned char*)data.c_str(), data.length(), rawHash); | ||||
|     for (int i = 0; i < 8; ++i) { | ||||
|       hash[i] = (((uint32_t)rawHash[i*4 + 0]) << 24) | ||||
|               + (((uint32_t)rawHash[i*4 + 1]) << 16) | ||||
|               + (((uint32_t)rawHash[i*4 + 2]) <<  8) | ||||
|               + ( (uint32_t)rawHash[i*4 + 3]); | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
| } | ||||
|  | ||||
| inline void splitRngState(RngState& rs, const RngState& rs0, const std::string& sindex) | ||||
|   // produce a new rng ``rs'' uniquely identified by ``rs0'' and ``sindex'' | ||||
|   // will not affect old rng ``rs0'' | ||||
|   // the function should behave correctly even if ``rs'' is actually ``rs0'' | ||||
| { | ||||
|   std::string input = ssprintf("[%lu] {%s}", rs0.index, sindex.c_str()); | ||||
|   rs.numBytes = rs0.numBytes + 64 * ((32 + input.length() + 1 + 8 - 1) / 64 + 1); | ||||
|   computeHashWithInput(rs.hash, rs0, input); | ||||
|   rs.index = 0; | ||||
|   rs.cache[0] = 0; | ||||
|   rs.cache[1] = 0; | ||||
|   rs.cache[2] = 0; | ||||
|   rs.gaussian = 0.0; | ||||
|   rs.cacheAvail = 0; | ||||
|   rs.gaussianAvail = false; | ||||
| } | ||||
|  | ||||
| inline uint64_t randGen(RngState& rs) | ||||
| { | ||||
|   assert(0 <= rs.cacheAvail && rs.cacheAvail <= 3); | ||||
|   rs.index += 1; | ||||
|   if (rs.cacheAvail > 0) { | ||||
|     rs.cacheAvail -= 1; | ||||
|     uint64_t r = rs.cache[rs.cacheAvail]; | ||||
|     rs.cache[rs.cacheAvail] = 0; | ||||
|     return r; | ||||
|   } else { | ||||
|     uint32_t hash[8]; | ||||
|     computeHashWithInput(hash, rs, ssprintf("[%lu]", rs.index)); | ||||
|     rs.cache[0] = patchTwoUint32(hash[0], hash[1]); | ||||
|     rs.cache[1] = patchTwoUint32(hash[2], hash[3]); | ||||
|     rs.cache[2] = patchTwoUint32(hash[4], hash[5]); | ||||
|     rs.cacheAvail = 3; | ||||
|     return patchTwoUint32(hash[6], hash[7]); | ||||
|   } | ||||
| } | ||||
|  | ||||
| inline double uRandGen(RngState& rs, const double upper, const double lower) | ||||
| { | ||||
|   uint64_t u = randGen(rs); | ||||
|   const double fac = 1.0 / (256.0 * 256.0 * 256.0 * 256.0) / (256.0 * 256.0 * 256.0 * 256.0); | ||||
|   return u * fac * (upper - lower) + lower; | ||||
| } | ||||
|  | ||||
| inline double gRandGen(RngState& rs, const double sigma, const double center) | ||||
| { | ||||
|   rs.index += 1; | ||||
|   if (rs.gaussianAvail) { | ||||
|     rs.gaussianAvail = false; | ||||
|     return rs.gaussian * sigma + center; | ||||
|   } else { | ||||
|     // pick 2 uniform numbers in the square extending from | ||||
|     // -1 to 1 in each direction, see if they are in the | ||||
|     // unit circle, and try again if they are not. | ||||
|     int num_try = 1; | ||||
|     double v1, v2, rsq; | ||||
|     do { | ||||
|       v1 = uRandGen(rs, 1.0, -1.0); | ||||
|       v2 = uRandGen(rs, 1.0, -1.0); | ||||
|       if ((num_try % 1000)==0) { | ||||
|         printf("gRandGen : WARNING num_try=%d v1=%e v2=%e\n",num_try,v1,v2); | ||||
|       } | ||||
|       rsq = v1*v1 + v2*v2; | ||||
|       num_try++; | ||||
|     } while ((num_try < 10000) && (rsq >= 1.0 || rsq == 0)); | ||||
|     if (num_try > 9999) { | ||||
|       printf("gRandGen : WARNING failed after 10000 tries (corrupted RNG?), returning ridiculous numbers (1e+10)\n"); | ||||
|       return 1e+10; | ||||
|     } | ||||
|     double fac = std::sqrt(-2.0 * std::log(rsq)/rsq); | ||||
|     rs.gaussian = v1 * fac; | ||||
|     rs.gaussianAvail = true; | ||||
|     return v2 * fac * sigma + center; | ||||
|   } | ||||
| } | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										348
									
								
								lib/lattice/rng/sha256.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										348
									
								
								lib/lattice/rng/sha256.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,348 @@ | ||||
| // vim: set ts=2 sw=2 expandtab: | ||||
|  | ||||
| // Copyright (c) 2016 Luchang Jin | ||||
| // All rights reserved. | ||||
|  | ||||
| // This program is free software: you can redistribute it and/or modify | ||||
| // it under the terms of the GNU General Public License as published by | ||||
| // the Free Software Foundation, either version 2 of the License, or | ||||
| // (at your option) any later version. | ||||
| // | ||||
| // This program is distributed in the hope that it will be useful, | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| // GNU General Public License for more details. | ||||
| // | ||||
| // You should have received a copy of the GNU General Public License | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
|  | ||||
| // Code within namespace sha256 are originally from Stephan Brumme. | ||||
| // see http://create.stephan-brumme.com/disclaimer.html | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <stdint.h> | ||||
| #include <endian.h> | ||||
| #include <cstring> | ||||
| #include <cmath> | ||||
| #include <cassert> | ||||
| #include <string> | ||||
| #include <ostream> | ||||
| #include <istream> | ||||
| #include <vector> | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| namespace CURRENT_DEFAULT_NAMESPACE_NAME { | ||||
| #endif | ||||
|  | ||||
| namespace sha256 { | ||||
|  | ||||
|   const size_t BlockSize = 512 / 8; | ||||
|  | ||||
|   const size_t HashBytes = 32; | ||||
|  | ||||
|   const size_t HashValues = HashBytes / 4; | ||||
|  | ||||
|   inline uint32_t rotate(uint32_t a, uint32_t c) | ||||
|   { | ||||
|     return (a >> c) | (a << (32 - c)); | ||||
|   } | ||||
|  | ||||
|   inline uint32_t swap(uint32_t x) | ||||
|   { | ||||
|     return (x >> 24) | | ||||
|       ((x >>  8) & 0x0000FF00) | | ||||
|       ((x <<  8) & 0x00FF0000) | | ||||
|       (x << 24); | ||||
|   } | ||||
|  | ||||
|   inline uint32_t f1(uint32_t e, uint32_t f, uint32_t g) | ||||
|   // mix functions for processBlock() | ||||
|   { | ||||
|     uint32_t term1 = rotate(e, 6) ^ rotate(e, 11) ^ rotate(e, 25); | ||||
|     uint32_t term2 = (e & f) ^ (~e & g); //(g ^ (e & (f ^ g))) | ||||
|     return term1 + term2; | ||||
|   } | ||||
|  | ||||
|   inline uint32_t f2(uint32_t a, uint32_t b, uint32_t c) | ||||
|   // mix functions for processBlock() | ||||
|   { | ||||
|     uint32_t term1 = rotate(a, 2) ^ rotate(a, 13) ^ rotate(a, 22); | ||||
|     uint32_t term2 = ((a | b) & c) | (a & b); //(a & (b ^ c)) ^ (b & c); | ||||
|     return term1 + term2; | ||||
|   } | ||||
|  | ||||
|   inline void processBlock(uint32_t newHash[8], const uint32_t oldHash[8], const uint8_t data[64]) | ||||
|     // process 64 bytes of data | ||||
|     // newHash and oldHash and be the same | ||||
|   { | ||||
|     // get last hash | ||||
|     uint32_t a = oldHash[0]; | ||||
|     uint32_t b = oldHash[1]; | ||||
|     uint32_t c = oldHash[2]; | ||||
|     uint32_t d = oldHash[3]; | ||||
|     uint32_t e = oldHash[4]; | ||||
|     uint32_t f = oldHash[5]; | ||||
|     uint32_t g = oldHash[6]; | ||||
|     uint32_t h = oldHash[7]; | ||||
|     // data represented as 16x 32-bit words | ||||
|     const uint32_t* input = (uint32_t*) data; | ||||
|     // convert to big endian | ||||
|     uint32_t words[64]; | ||||
|     int i; | ||||
|     for (i = 0; i < 16; i++) { | ||||
| #if defined(__BYTE_ORDER) && (__BYTE_ORDER != 0) && (__BYTE_ORDER == __BIG_ENDIAN) | ||||
|       words[i] =      input[i]; | ||||
| #else | ||||
|       words[i] = swap(input[i]); | ||||
| #endif | ||||
|     } | ||||
|     uint32_t x,y; // temporaries | ||||
|     // first round | ||||
|     x = h + f1(e,f,g) + 0x428a2f98 + words[ 0]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0x71374491 + words[ 1]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0xb5c0fbcf + words[ 2]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0xe9b5dba5 + words[ 3]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0x3956c25b + words[ 4]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0x59f111f1 + words[ 5]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0x923f82a4 + words[ 6]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0xab1c5ed5 + words[ 7]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // secound round | ||||
|     x = h + f1(e,f,g) + 0xd807aa98 + words[ 8]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0x12835b01 + words[ 9]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0x243185be + words[10]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0x550c7dc3 + words[11]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0x72be5d74 + words[12]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0x80deb1fe + words[13]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0x9bdc06a7 + words[14]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0xc19bf174 + words[15]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // extend to 24 words | ||||
|     for (; i < 24; i++) | ||||
|       words[i] = words[i-16] + | ||||
|         (rotate(words[i-15],  7) ^ rotate(words[i-15], 18) ^ (words[i-15] >>  3)) + | ||||
|         words[i-7] + | ||||
|         (rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10)); | ||||
|     // third round | ||||
|     x = h + f1(e,f,g) + 0xe49b69c1 + words[16]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0xefbe4786 + words[17]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0x0fc19dc6 + words[18]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0x240ca1cc + words[19]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0x2de92c6f + words[20]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0x4a7484aa + words[21]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0x5cb0a9dc + words[22]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0x76f988da + words[23]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // extend to 32 words | ||||
|     for (; i < 32; i++) | ||||
|       words[i] = words[i-16] + | ||||
|         (rotate(words[i-15],  7) ^ rotate(words[i-15], 18) ^ (words[i-15] >>  3)) + | ||||
|         words[i-7] + | ||||
|         (rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10)); | ||||
|     // fourth round | ||||
|     x = h + f1(e,f,g) + 0x983e5152 + words[24]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0xa831c66d + words[25]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0xb00327c8 + words[26]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0xbf597fc7 + words[27]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0xc6e00bf3 + words[28]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0xd5a79147 + words[29]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0x06ca6351 + words[30]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0x14292967 + words[31]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // extend to 40 words | ||||
|     for (; i < 40; i++) | ||||
|       words[i] = words[i-16] + | ||||
|         (rotate(words[i-15],  7) ^ rotate(words[i-15], 18) ^ (words[i-15] >>  3)) + | ||||
|         words[i-7] + | ||||
|         (rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10)); | ||||
|     // fifth round | ||||
|     x = h + f1(e,f,g) + 0x27b70a85 + words[32]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0x2e1b2138 + words[33]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0x4d2c6dfc + words[34]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0x53380d13 + words[35]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0x650a7354 + words[36]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0x766a0abb + words[37]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0x81c2c92e + words[38]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0x92722c85 + words[39]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // extend to 48 words | ||||
|     for (; i < 48; i++) | ||||
|       words[i] = words[i-16] + | ||||
|         (rotate(words[i-15],  7) ^ rotate(words[i-15], 18) ^ (words[i-15] >>  3)) + | ||||
|         words[i-7] + | ||||
|         (rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10)); | ||||
|     // sixth round | ||||
|     x = h + f1(e,f,g) + 0xa2bfe8a1 + words[40]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0xa81a664b + words[41]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0xc24b8b70 + words[42]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0xc76c51a3 + words[43]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0xd192e819 + words[44]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0xd6990624 + words[45]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0xf40e3585 + words[46]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0x106aa070 + words[47]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // extend to 56 words | ||||
|     for (; i < 56; i++) | ||||
|       words[i] = words[i-16] + | ||||
|         (rotate(words[i-15],  7) ^ rotate(words[i-15], 18) ^ (words[i-15] >>  3)) + | ||||
|         words[i-7] + | ||||
|         (rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10)); | ||||
|     // seventh round | ||||
|     x = h + f1(e,f,g) + 0x19a4c116 + words[48]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0x1e376c08 + words[49]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0x2748774c + words[50]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0x34b0bcb5 + words[51]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0x391c0cb3 + words[52]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0x4ed8aa4a + words[53]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0x5b9cca4f + words[54]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0x682e6ff3 + words[55]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // extend to 64 words | ||||
|     for (; i < 64; i++) | ||||
|       words[i] = words[i-16] + | ||||
|         (rotate(words[i-15],  7) ^ rotate(words[i-15], 18) ^ (words[i-15] >>  3)) + | ||||
|         words[i-7] + | ||||
|         (rotate(words[i- 2], 17) ^ rotate(words[i- 2], 19) ^ (words[i- 2] >> 10)); | ||||
|     // eigth round | ||||
|     x = h + f1(e,f,g) + 0x748f82ee + words[56]; y = f2(a,b,c); d += x; h = x + y; | ||||
|     x = g + f1(d,e,f) + 0x78a5636f + words[57]; y = f2(h,a,b); c += x; g = x + y; | ||||
|     x = f + f1(c,d,e) + 0x84c87814 + words[58]; y = f2(g,h,a); b += x; f = x + y; | ||||
|     x = e + f1(b,c,d) + 0x8cc70208 + words[59]; y = f2(f,g,h); a += x; e = x + y; | ||||
|     x = d + f1(a,b,c) + 0x90befffa + words[60]; y = f2(e,f,g); h += x; d = x + y; | ||||
|     x = c + f1(h,a,b) + 0xa4506ceb + words[61]; y = f2(d,e,f); g += x; c = x + y; | ||||
|     x = b + f1(g,h,a) + 0xbef9a3f7 + words[62]; y = f2(c,d,e); f += x; b = x + y; | ||||
|     x = a + f1(f,g,h) + 0xc67178f2 + words[63]; y = f2(b,c,d); e += x; a = x + y; | ||||
|     // update hash | ||||
|     newHash[0] = a + oldHash[0]; | ||||
|     newHash[1] = b + oldHash[1]; | ||||
|     newHash[2] = c + oldHash[2]; | ||||
|     newHash[3] = d + oldHash[3]; | ||||
|     newHash[4] = e + oldHash[4]; | ||||
|     newHash[5] = f + oldHash[5]; | ||||
|     newHash[6] = g + oldHash[6]; | ||||
|     newHash[7] = h + oldHash[7]; | ||||
|   } | ||||
|  | ||||
|   inline void processInput( | ||||
|       uint32_t hash[8], | ||||
|       const uint32_t oldHash[8], const uint64_t numBytes, | ||||
|       const uint8_t* input, const size_t inputSize) | ||||
|     // process final block, less than 64 bytes | ||||
|     // newHash and oldHash and be the same | ||||
|   { | ||||
|     // the input bytes are considered as bits strings, where the first bit is the most significant bit of the byte | ||||
|     // - append "1" bit to message | ||||
|     // - append "0" bits until message length in bit mod 512 is 448 | ||||
|     // - append length as 64 bit integer | ||||
|     // process initial parts of input | ||||
|     std::memmove(hash, oldHash, 32); | ||||
|     const int nBlocks = inputSize / 64; | ||||
|     for (int i = 0; i < nBlocks; ++i) { | ||||
|       processBlock(hash, hash, input + i * 64); | ||||
|     } | ||||
|     // initialize buffer from input | ||||
|     const size_t bufferSize = inputSize - nBlocks * 64; | ||||
|     unsigned char buffer[BlockSize]; | ||||
|     std::memcpy(buffer, input + nBlocks * 64, bufferSize); | ||||
|     // number of bits | ||||
|     size_t paddedLength = bufferSize * 8; | ||||
|     // plus one bit set to 1 (always appended) | ||||
|     paddedLength++; | ||||
|     // number of bits must be (numBits % 512) = 448 | ||||
|     size_t lower11Bits = paddedLength & 511; | ||||
|     if (lower11Bits <= 448) { | ||||
|       paddedLength +=       448 - lower11Bits; | ||||
|     } else { | ||||
|       paddedLength += 512 + 448 - lower11Bits; | ||||
|     } | ||||
|     // convert from bits to bytes | ||||
|     paddedLength /= 8; | ||||
|     // only needed if additional data flows over into a second block | ||||
|     unsigned char extra[BlockSize]; | ||||
|     // append a "1" bit, 128 => binary 10000000 | ||||
|     if (bufferSize < BlockSize) { | ||||
|       buffer[bufferSize] = 128; | ||||
|     } else { | ||||
|       extra[0] = 128; | ||||
|     } | ||||
|     size_t i; | ||||
|     for (i = bufferSize + 1; i < BlockSize; i++) { | ||||
|       buffer[i] = 0; | ||||
|     } | ||||
|     for (; i < paddedLength; i++) { | ||||
|       extra[i - BlockSize] = 0; | ||||
|     } | ||||
|     // add message length in bits as 64 bit number | ||||
|     uint64_t msgBits = 8 * (numBytes + inputSize); | ||||
|     // find right position | ||||
|     unsigned char* addLength; | ||||
|     if (paddedLength < BlockSize) { | ||||
|       addLength = buffer + paddedLength; | ||||
|     } else { | ||||
|       addLength = extra + paddedLength - BlockSize; | ||||
|     } | ||||
|     // must be big endian | ||||
|     *addLength++ = (unsigned char)((msgBits >> 56) & 0xFF); | ||||
|     *addLength++ = (unsigned char)((msgBits >> 48) & 0xFF); | ||||
|     *addLength++ = (unsigned char)((msgBits >> 40) & 0xFF); | ||||
|     *addLength++ = (unsigned char)((msgBits >> 32) & 0xFF); | ||||
|     *addLength++ = (unsigned char)((msgBits >> 24) & 0xFF); | ||||
|     *addLength++ = (unsigned char)((msgBits >> 16) & 0xFF); | ||||
|     *addLength++ = (unsigned char)((msgBits >>  8) & 0xFF); | ||||
|     *addLength   = (unsigned char)( msgBits        & 0xFF); | ||||
|     // process blocks | ||||
|     processBlock(hash, hash, buffer); | ||||
|     // flowed over into a second block ? | ||||
|     if (paddedLength > BlockSize) { | ||||
|       processBlock(hash, hash, extra); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   inline void setInitialHash(uint32_t hash[8]) | ||||
|   { | ||||
|     hash[0] = 0x6a09e667; | ||||
|     hash[1] = 0xbb67ae85; | ||||
|     hash[2] = 0x3c6ef372; | ||||
|     hash[3] = 0xa54ff53a; | ||||
|     hash[4] = 0x510e527f; | ||||
|     hash[5] = 0x9b05688c; | ||||
|     hash[6] = 0x1f83d9ab; | ||||
|     hash[7] = 0x5be0cd19; | ||||
|   } | ||||
|  | ||||
|   inline void computeHash(uint32_t hash[8], const void* data, const size_t size) | ||||
|   { | ||||
|     uint32_t initHash[8]; | ||||
|     setInitialHash(initHash); | ||||
|     processInput(hash, initHash, 0, (const uint8_t*)data, size); | ||||
|   } | ||||
|  | ||||
|   inline void rawHashFromHash(uint8_t rawHash[HashBytes], const uint32_t hash[HashValues]) | ||||
|   { | ||||
|     uint8_t* current = rawHash; | ||||
|     for (size_t i = 0; i < HashValues; i++) { | ||||
|       *current++ = (hash[i] >> 24) & 0xFF; | ||||
|       *current++ = (hash[i] >> 16) & 0xFF; | ||||
|       *current++ = (hash[i] >>  8) & 0xFF; | ||||
|       *current++ =  hash[i]        & 0xFF; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   inline std::string showRawHash(const uint8_t rawHash[HashBytes]) | ||||
|   { | ||||
|     std::string result; | ||||
|     result.reserve(2 * HashBytes); | ||||
|     for (size_t i = 0; i < HashBytes; i++) { | ||||
|       static const char dec2hex[16+1] = "0123456789abcdef"; | ||||
|       result += dec2hex[(rawHash[i] >> 4) & 15]; | ||||
|       result += dec2hex[ rawHash[i]       & 15]; | ||||
|     } | ||||
|     return result; | ||||
|   } | ||||
|  | ||||
|   inline std::string showHash(const uint32_t hash[8]) | ||||
|   { | ||||
|     unsigned char rawHash[HashBytes]; | ||||
|     rawHashFromHash(rawHash, hash); | ||||
|     return showRawHash(rawHash); | ||||
|   } | ||||
|  | ||||
| } | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										125
									
								
								lib/lattice/rng/show.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								lib/lattice/rng/show.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,125 @@ | ||||
| // vim: set ts=2 sw=2 expandtab: | ||||
|  | ||||
| // Copyright (c) 2014 Luchang Jin | ||||
| // All rights reserved. | ||||
|  | ||||
| // This program is free software: you can redistribute it and/or modify | ||||
| // it under the terms of the GNU General Public License as published by | ||||
| // the Free Software Foundation, either version 2 of the License, or | ||||
| // (at your option) any later version. | ||||
| // | ||||
| // This program is distributed in the hope that it will be useful, | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| // GNU General Public License for more details. | ||||
| // | ||||
| // You should have received a copy of the GNU General Public License | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #ifndef INCLUDE_SHOW_H | ||||
| #define INCLUDE_SHOW_H | ||||
|  | ||||
| #include <sstream> | ||||
| #include <string> | ||||
| #include <cstdarg> | ||||
| #include <cstring> | ||||
| #include <cstdlib> | ||||
| #include <cstdio> | ||||
| #include <sstream> | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| namespace CURRENT_DEFAULT_NAMESPACE_NAME { | ||||
| #endif | ||||
|  | ||||
| inline std::string vssprintf(const char* fmt, va_list args) | ||||
| { | ||||
|   std::string str; | ||||
|   char* cstr; | ||||
|   vasprintf(&cstr, fmt, args); | ||||
|   str += std::string(cstr); | ||||
|   std::free(cstr); | ||||
|   return str; | ||||
| } | ||||
|  | ||||
| inline std::string ssprintf(const char* fmt, ...) | ||||
| { | ||||
|   va_list args; | ||||
|   va_start(args, fmt); | ||||
|   return vssprintf(fmt, args); | ||||
| } | ||||
|  | ||||
| inline std::string show() | ||||
| { | ||||
|   return ""; | ||||
| } | ||||
|  | ||||
| inline std::string show(const int& x) | ||||
| { | ||||
|   return ssprintf("%d", x); | ||||
| } | ||||
|  | ||||
| inline std::string show(const unsigned int& x) | ||||
| { | ||||
|   return ssprintf("%u", x); | ||||
| } | ||||
|  | ||||
| inline std::string show(const long& x) | ||||
| { | ||||
|   return ssprintf("%ld", x); | ||||
| } | ||||
|  | ||||
| inline std::string show(const unsigned long& x) | ||||
| { | ||||
|   return ssprintf("%lu", x); | ||||
| } | ||||
|  | ||||
| inline std::string show(const double& x) | ||||
| { | ||||
|   return ssprintf("%24.17E", x); | ||||
| } | ||||
|  | ||||
| inline std::string show(const bool& x) | ||||
| { | ||||
|   return x ? "true" : "false"; | ||||
| } | ||||
|  | ||||
| inline std::string show(const std::string& x) | ||||
| { | ||||
|   std::ostringstream out; | ||||
|   out << x; | ||||
|   return out.str(); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| std::string shows(const T& x) | ||||
| { | ||||
|   std::ostringstream out; | ||||
|   out << x; | ||||
|   return out.str(); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| T& reads(T& x, const std::string& str) | ||||
| { | ||||
|   std::istringstream in(str); | ||||
|   in >> x; | ||||
|   return x; | ||||
| } | ||||
|  | ||||
| inline void fdisplay(FILE* fp, const std::string& str) | ||||
| { | ||||
|   fprintf(fp, "%s", str.c_str()); | ||||
| } | ||||
|  | ||||
| inline void fdisplayln(FILE* fp, const std::string& str) | ||||
| { | ||||
|   fprintf(fp, "%s\n", str.c_str()); | ||||
| } | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										115
									
								
								lib/lattice/rng/sprng-sha256.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								lib/lattice/rng/sprng-sha256.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,115 @@ | ||||
| // vim: set ts=2 sw=2 expandtab: | ||||
|  | ||||
| // Copyright (c) 2016 Luchang Jin | ||||
| // All rights reserved. | ||||
|  | ||||
| // This program is free software: you can redistribute it and/or modify | ||||
| // it under the terms of the GNU General Public License as published by | ||||
| // the Free Software Foundation, either version 2 of the License, or | ||||
| // (at your option) any later version. | ||||
| // | ||||
| // This program is distributed in the hope that it will be useful, | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| // GNU General Public License for more details. | ||||
| // | ||||
| // You should have received a copy of the GNU General Public License | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #ifndef INCLUDE_SPRNG_SHA256_H | ||||
| #define INCLUDE_SPRNG_SHA256_H | ||||
|  | ||||
| #include "rng-state.h" | ||||
|  | ||||
| #include <array> | ||||
| #include <cstring> | ||||
| #include <ostream> | ||||
| #include <istream> | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| namespace CURRENT_DEFAULT_NAMESPACE_NAME { | ||||
| #endif | ||||
|  | ||||
| struct SprngSha256 | ||||
| { | ||||
|   RngState rs; | ||||
|   // | ||||
|   using result_type = uint64_t; | ||||
|   // | ||||
|   static constexpr result_type default_seed = 0; | ||||
|   // | ||||
|   explicit SprngSha256(result_type val = default_seed) | ||||
|   { | ||||
|     seed(val); | ||||
|   } | ||||
|   template<typename Sseq, typename = typename | ||||
|     std::enable_if<!std::is_same<Sseq, SprngSha256>::value> | ||||
|     ::type> | ||||
|   explicit SprngSha256(Sseq& q) | ||||
|   { | ||||
|     seed(q); | ||||
|   } | ||||
|   // | ||||
|   static constexpr result_type min() | ||||
|   { | ||||
|     return 0; | ||||
|   } | ||||
|   // | ||||
|   static constexpr result_type max() | ||||
|   { | ||||
|     return UINT64_MAX; | ||||
|   } | ||||
|   // | ||||
|   void seed(result_type val = default_seed) | ||||
|   { | ||||
|     reset(rs, (long)val); | ||||
|   } | ||||
|   template <class Sseq> | ||||
|   typename std::enable_if<std::is_class<Sseq>::value>::type | ||||
|   seed(Sseq& q) | ||||
|   { | ||||
|     std::array<uint32_t, 8> seq; | ||||
|     q.generate(seq.begin(), seq.end()); | ||||
|     reset(rs); | ||||
|     for (size_t i = 0; i < seq.size(); ++i) { | ||||
|       splitRngState(rs, rs, seq[i]); | ||||
|     } | ||||
|   } | ||||
|   // | ||||
|   result_type operator()() | ||||
|   { | ||||
|     return randGen(rs); | ||||
|   } | ||||
|   // | ||||
|   void discard(unsigned long long z) | ||||
|   { | ||||
|     for (unsigned long long i = 0; i < z; ++i) { | ||||
|       randGen(rs); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| inline std::ostream& operator<<(std::ostream& os, const SprngSha256& ss) | ||||
| { | ||||
|   os << ss.rs; | ||||
|   return os; | ||||
| } | ||||
|  | ||||
| inline std::istream& operator>>(std::istream& is, SprngSha256& ss) | ||||
| { | ||||
|   is >> ss.rs; | ||||
|   return is; | ||||
| } | ||||
|  | ||||
| inline bool operator==(const SprngSha256& ss1, const SprngSha256& ss2) | ||||
| { | ||||
|   return ss1.rs == ss2.rs; | ||||
| } | ||||
|  | ||||
| #ifdef CURRENT_DEFAULT_NAMESPACE_NAME | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
| @@ -194,22 +194,22 @@ class BinaryIO { | ||||
|  | ||||
|       std::vector<int> site({x,y,z,t}); | ||||
|  | ||||
|       if ( grid->IsBoss() ) { | ||||
| 	fin.read((char *)&file_object,sizeof(file_object)); | ||||
| 	bytes += sizeof(file_object); | ||||
| 	if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); | ||||
| 	if(ieee32)    le32toh_v((void *)&file_object,sizeof(file_object)); | ||||
| 	if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); | ||||
| 	if(ieee64)    le64toh_v((void *)&file_object,sizeof(file_object)); | ||||
|       if (grid->IsBoss()) { | ||||
|         fin.read((char *)&file_object, sizeof(file_object)); | ||||
|         bytes += sizeof(file_object); | ||||
|         if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); | ||||
|         if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); | ||||
|         if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); | ||||
|         if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); | ||||
|  | ||||
| 	munge(file_object,munged,csum); | ||||
|         munge(file_object, munged, csum); | ||||
|       } | ||||
|       // The boss who read the file has their value poked | ||||
|       pokeSite(munged,Umu,site); | ||||
|     }}}} | ||||
|     timer.Stop(); | ||||
|     std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " | ||||
| 	     << (double)bytes/ (double)timer.useconds() <<" MB/s "  <<std::endl; | ||||
|        << (double)bytes/ (double)timer.useconds() <<" MB/s "  <<std::endl; | ||||
|  | ||||
|     return csum; | ||||
|   } | ||||
| @@ -254,20 +254,20 @@ class BinaryIO { | ||||
|  | ||||
|        | ||||
|       if ( grid->IsBoss() ) { | ||||
| 	 | ||||
| 	if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); | ||||
| 	if(ieee32)    htole32_v((void *)&file_object,sizeof(file_object)); | ||||
| 	if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); | ||||
| 	if(ieee64)    htole64_v((void *)&file_object,sizeof(file_object)); | ||||
|    | ||||
|   if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); | ||||
|   if(ieee32)    htole32_v((void *)&file_object,sizeof(file_object)); | ||||
|   if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); | ||||
|   if(ieee64)    htole64_v((void *)&file_object,sizeof(file_object)); | ||||
|  | ||||
| 	// NB could gather an xstrip as an optimisation. | ||||
| 	fout.write((char *)&file_object,sizeof(file_object)); | ||||
| 	bytes+=sizeof(file_object); | ||||
|   // NB could gather an xstrip as an optimisation. | ||||
|   fout.write((char *)&file_object,sizeof(file_object)); | ||||
|   bytes+=sizeof(file_object); | ||||
|       } | ||||
|     }}}} | ||||
|     timer.Stop(); | ||||
|     std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " | ||||
| 	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||
|        << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||
|  | ||||
|     return csum; | ||||
|   } | ||||
| @@ -305,15 +305,15 @@ class BinaryIO { | ||||
|       int l_idx=parallel.generator_idx(o_idx,i_idx); | ||||
|  | ||||
|       if( rank == grid->ThisRank() ){ | ||||
| 	//	std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl; | ||||
| 	parallel.GetState(saved,l_idx); | ||||
|   //  std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl; | ||||
|   parallel.GetState(saved,l_idx); | ||||
|       } | ||||
|  | ||||
|       grid->Broadcast(rank,(void *)&saved[0],bytes); | ||||
|  | ||||
|       if ( grid->IsBoss() ) { | ||||
| 	Uint32Checksum((uint32_t *)&saved[0],bytes,csum); | ||||
| 	fout.write((char *)&saved[0],bytes); | ||||
|   Uint32Checksum((uint32_t *)&saved[0],bytes,csum); | ||||
|   fout.write((char *)&saved[0],bytes); | ||||
|       } | ||||
|  | ||||
|     } | ||||
| @@ -355,14 +355,14 @@ class BinaryIO { | ||||
|       int l_idx=parallel.generator_idx(o_idx,i_idx); | ||||
|  | ||||
|       if ( grid->IsBoss() ) { | ||||
| 	fin.read((char *)&saved[0],bytes); | ||||
| 	Uint32Checksum((uint32_t *)&saved[0],bytes,csum); | ||||
|   fin.read((char *)&saved[0],bytes); | ||||
|   Uint32Checksum((uint32_t *)&saved[0],bytes,csum); | ||||
|       } | ||||
|  | ||||
|       grid->Broadcast(0,(void *)&saved[0],bytes); | ||||
|  | ||||
|       if( rank == grid->ThisRank() ){ | ||||
| 	parallel.SetState(saved,l_idx); | ||||
|   parallel.SetState(saved,l_idx); | ||||
|       } | ||||
|  | ||||
|     } | ||||
| @@ -415,15 +415,15 @@ class BinaryIO { | ||||
|  | ||||
|       if ( d == 0 ) parallel[d] = 0; | ||||
|       if (parallel[d]) { | ||||
| 	range[d] = grid->_ldimensions[d]; | ||||
| 	start[d] = grid->_processor_coor[d]*range[d]; | ||||
| 	ioproc[d]= grid->_processor_coor[d]; | ||||
|   range[d] = grid->_ldimensions[d]; | ||||
|   start[d] = grid->_processor_coor[d]*range[d]; | ||||
|   ioproc[d]= grid->_processor_coor[d]; | ||||
|       } else { | ||||
| 	range[d] = grid->_gdimensions[d]; | ||||
| 	start[d] = 0; | ||||
| 	ioproc[d]= 0; | ||||
|   range[d] = grid->_gdimensions[d]; | ||||
|   start[d] = 0; | ||||
|   ioproc[d]= 0; | ||||
|  | ||||
| 	if ( grid->_processor_coor[d] != 0 ) IOnode = 0; | ||||
|   if ( grid->_processor_coor[d] != 0 ) IOnode = 0; | ||||
|       } | ||||
|       slice_vol = slice_vol * range[d]; | ||||
|     } | ||||
| @@ -434,9 +434,9 @@ class BinaryIO { | ||||
|       std::cout<< std::dec ; | ||||
|       std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice "; | ||||
|       for(int d=0;d<grid->_ndimension;d++){ | ||||
| 	std::cout<< range[d]; | ||||
| 	if( d< grid->_ndimension-1 )  | ||||
| 	  std::cout<< " x "; | ||||
|   std::cout<< range[d]; | ||||
|   if( d< grid->_ndimension-1 )  | ||||
|     std::cout<< " x "; | ||||
|       } | ||||
|       std::cout << std::endl; | ||||
|     } | ||||
| @@ -463,7 +463,7 @@ class BinaryIO { | ||||
|  | ||||
|       // need to implement these loops in Nd independent way with a lexico conversion | ||||
|     for(int tlex=0;tlex<slice_vol;tlex++){ | ||||
| 	 | ||||
|    | ||||
|       std::vector<int> tsite(nd); // temporary mixed up site | ||||
|       std::vector<int> gsite(nd); | ||||
|       std::vector<int> lsite(nd); | ||||
| @@ -472,8 +472,8 @@ class BinaryIO { | ||||
|       Lexicographic::CoorFromIndex(tsite,tlex,range); | ||||
|  | ||||
|       for(int d=0;d<nd;d++){ | ||||
| 	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site | ||||
| 	gsite[d] = tsite[d]+start[d];               // global site | ||||
|   lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site | ||||
|   gsite[d] = tsite[d]+start[d];               // global site | ||||
|       } | ||||
|  | ||||
|       ///////////////////////// | ||||
| @@ -487,29 +487,29 @@ class BinaryIO { | ||||
|       // iorank reads from the seek | ||||
|       //////////////////////////////// | ||||
|       if (myrank == iorank) { | ||||
| 	 | ||||
| 	fin.seekg(offset+g_idx*sizeof(fileObj)); | ||||
| 	fin.read((char *)&fileObj,sizeof(fileObj)); | ||||
| 	bytes+=sizeof(fileObj); | ||||
| 	 | ||||
| 	if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	if(ieee32)    le32toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	if(ieee64)    le64toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	 | ||||
| 	munge(fileObj,siteObj,csum); | ||||
|    | ||||
|   fin.seekg(offset+g_idx*sizeof(fileObj)); | ||||
|   fin.read((char *)&fileObj,sizeof(fileObj)); | ||||
|   bytes+=sizeof(fileObj); | ||||
|    | ||||
|   if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
|   if(ieee32)    le32toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
|   if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
|   if(ieee64)    le64toh_v((void *)&fileObj,sizeof(fileObj)); | ||||
|    | ||||
|   munge(fileObj,siteObj,csum); | ||||
|  | ||||
|       }	 | ||||
|       }  | ||||
|  | ||||
|       // Possibly do transport through pt2pt  | ||||
|       if ( rank != iorank ) {  | ||||
| 	if ( (myrank == rank) || (myrank==iorank) ) { | ||||
| 	  grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); | ||||
| 	} | ||||
|   if ( (myrank == rank) || (myrank==iorank) ) { | ||||
|     grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); | ||||
|   } | ||||
|       } | ||||
|       // Poke at destination | ||||
|       if ( myrank == rank ) { | ||||
| 	  pokeLocalSite(siteObj,Umu,lsite); | ||||
|     pokeLocalSite(siteObj,Umu,lsite); | ||||
|       } | ||||
|       grid->Barrier(); // necessary? | ||||
|     } | ||||
| @@ -520,7 +520,7 @@ class BinaryIO { | ||||
|  | ||||
|     timer.Stop(); | ||||
|     std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " | ||||
| 	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||
|        << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||
|      | ||||
|     return csum; | ||||
|   } | ||||
| @@ -558,15 +558,15 @@ class BinaryIO { | ||||
|       if ( d!= grid->_ndimension-1 ) parallel[d] = 0; | ||||
|  | ||||
|       if (parallel[d]) { | ||||
| 	range[d] = grid->_ldimensions[d]; | ||||
| 	start[d] = grid->_processor_coor[d]*range[d]; | ||||
| 	ioproc[d]= grid->_processor_coor[d]; | ||||
|   range[d] = grid->_ldimensions[d]; | ||||
|   start[d] = grid->_processor_coor[d]*range[d]; | ||||
|   ioproc[d]= grid->_processor_coor[d]; | ||||
|       } else { | ||||
| 	range[d] = grid->_gdimensions[d]; | ||||
| 	start[d] = 0; | ||||
| 	ioproc[d]= 0; | ||||
|   range[d] = grid->_gdimensions[d]; | ||||
|   start[d] = 0; | ||||
|   ioproc[d]= 0; | ||||
|  | ||||
| 	if ( grid->_processor_coor[d] != 0 ) IOnode = 0; | ||||
|   if ( grid->_processor_coor[d] != 0 ) IOnode = 0; | ||||
|       } | ||||
|  | ||||
|       slice_vol = slice_vol * range[d]; | ||||
| @@ -577,9 +577,9 @@ class BinaryIO { | ||||
|       grid->GlobalSum(tmp); | ||||
|       std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice "; | ||||
|       for(int d=0;d<grid->_ndimension;d++){ | ||||
| 	std::cout<< range[d]; | ||||
| 	if( d< grid->_ndimension-1 )  | ||||
| 	  std::cout<< " x "; | ||||
|   std::cout<< range[d]; | ||||
|   if( d< grid->_ndimension-1 )  | ||||
|     std::cout<< " x "; | ||||
|       } | ||||
|       std::cout << std::endl; | ||||
|     } | ||||
| @@ -610,7 +610,7 @@ class BinaryIO { | ||||
|     // should aggregate a whole chunk and then write. | ||||
|     // need to implement these loops in Nd independent way with a lexico conversion | ||||
|     for(int tlex=0;tlex<slice_vol;tlex++){ | ||||
| 	 | ||||
|    | ||||
|       std::vector<int> tsite(nd); // temporary mixed up site | ||||
|       std::vector<int> gsite(nd); | ||||
|       std::vector<int> lsite(nd); | ||||
| @@ -619,8 +619,8 @@ class BinaryIO { | ||||
|       Lexicographic::CoorFromIndex(tsite,tlex,range); | ||||
|  | ||||
|       for(int d=0;d<nd;d++){ | ||||
| 	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site | ||||
| 	gsite[d] = tsite[d]+start[d];               // global site | ||||
|   lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site | ||||
|   gsite[d] = tsite[d]+start[d];               // global site | ||||
|       } | ||||
|  | ||||
|  | ||||
| @@ -640,26 +640,26 @@ class BinaryIO { | ||||
|  | ||||
|       // Pair of nodes may need to do pt2pt send | ||||
|       if ( rank != iorank ) { // comms is necessary | ||||
| 	if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it | ||||
| 	  // Send to IOrank  | ||||
| 	  grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); | ||||
| 	} | ||||
|   if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it | ||||
|     // Send to IOrank  | ||||
|     grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); | ||||
|   } | ||||
|       } | ||||
|  | ||||
|       grid->Barrier(); // necessary? | ||||
|  | ||||
|       if (myrank == iorank) { | ||||
| 	 | ||||
| 	munge(siteObj,fileObj,csum); | ||||
|    | ||||
|   munge(siteObj,fileObj,csum); | ||||
|  | ||||
| 	if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	if(ieee32)    htole32_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	if(ieee64)    htole64_v((void *)&fileObj,sizeof(fileObj)); | ||||
| 	 | ||||
| 	fout.seekp(offset+g_idx*sizeof(fileObj)); | ||||
| 	fout.write((char *)&fileObj,sizeof(fileObj)); | ||||
| 	bytes+=sizeof(fileObj); | ||||
|   if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); | ||||
|   if(ieee32)    htole32_v((void *)&fileObj,sizeof(fileObj)); | ||||
|   if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); | ||||
|   if(ieee64)    htole64_v((void *)&fileObj,sizeof(fileObj)); | ||||
|    | ||||
|   fout.seekp(offset+g_idx*sizeof(fileObj)); | ||||
|   fout.write((char *)&fileObj,sizeof(fileObj)); | ||||
|   bytes+=sizeof(fileObj); | ||||
|       } | ||||
|     } | ||||
|  | ||||
| @@ -668,7 +668,7 @@ class BinaryIO { | ||||
|  | ||||
|     timer.Stop(); | ||||
|     std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " | ||||
| 	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||
|        << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl; | ||||
|  | ||||
|     return csum; | ||||
|   } | ||||
|   | ||||
| @@ -17,7 +17,7 @@ | ||||
| #endif | ||||
|  | ||||
| // Include user configuration file (this can define various configuration macros) | ||||
| #include <pugixml/pugiconfig.hpp> | ||||
| #include "pugiconfig.hpp" | ||||
|  | ||||
| #ifndef HEADER_PUGIXML_HPP | ||||
| #define HEADER_PUGIXML_HPP | ||||
|   | ||||
| @@ -55,10 +55,19 @@ namespace QCD { | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     // QCD iMatrix types | ||||
|     // Index conventions:                            Lorentz x Spin x Colour | ||||
|     // note: static const int or constexpr will work for type deductions | ||||
|     //       with the intel compiler (up to version 17) | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     static const int ColourIndex = 2; | ||||
|     static const int SpinIndex   = 1; | ||||
|     static const int LorentzIndex= 0; | ||||
|     #define ColourIndex  2 | ||||
|     #define SpinIndex    1 | ||||
|     #define LorentzIndex 0 | ||||
|  | ||||
|    | ||||
|     // Also should make these a named enum type | ||||
|     static const int DaggerNo=0; | ||||
|     static const int DaggerYes=1; | ||||
|     static const int InverseNo=0; | ||||
|     static const int InverseYes=1; | ||||
|  | ||||
|     // Useful traits is this a spin index | ||||
|     //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
| @@ -484,16 +493,27 @@ namespace QCD { | ||||
| }   //namespace QCD | ||||
| } // Grid | ||||
|  | ||||
| #include <qcd/utils/SpaceTimeGrid.h> | ||||
| #include <qcd/spin/Dirac.h> | ||||
| #include <qcd/spin/TwoSpinor.h> | ||||
| #include <qcd/utils/LinalgUtils.h> | ||||
| #include <qcd/utils/CovariantCshift.h> | ||||
| #include <qcd/utils/SUn.h> | ||||
| #include <qcd/action/Actions.h> | ||||
| #include <qcd/hmc/integrators/Integrator.h> | ||||
| #include <qcd/hmc/integrators/Integrator_algorithm.h> | ||||
| #include <qcd/hmc/HMC.h> | ||||
|  | ||||
| #include <Grid/qcd/utils/SpaceTimeGrid.h> | ||||
| #include <Grid/qcd/spin/Dirac.h> | ||||
| #include <Grid/qcd/spin/TwoSpinor.h> | ||||
| #include <Grid/qcd/utils/LinalgUtils.h> | ||||
| #include <Grid/qcd/utils/CovariantCshift.h> | ||||
|  | ||||
| // Include representations 	 | ||||
| #include <Grid/qcd/utils/SUn.h> | ||||
| #include <Grid/qcd/utils/SUnAdjoint.h> | ||||
| #include <Grid/qcd/utils/SUnTwoIndex.h> | ||||
| #include <Grid/qcd/representations/hmc_types.h> | ||||
|  | ||||
| #include <Grid/qcd/action/Actions.h> | ||||
|  | ||||
| #include <Grid/qcd/smearing/Smearing.h> | ||||
|  | ||||
| #include <Grid/qcd/hmc/integrators/Integrator.h> | ||||
| #include <Grid/qcd/hmc/integrators/Integrator_algorithm.h> | ||||
| #include <Grid/qcd/hmc/HMC.h> | ||||
|  | ||||
|  | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -1,86 +1,153 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/qcd/action/ActionBase.h | ||||
| Source file: ./lib/qcd/action/ActionBase.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef QCD_ACTION_BASE | ||||
| #define QCD_ACTION_BASE | ||||
| namespace Grid { | ||||
| namespace QCD{ | ||||
|  | ||||
| template<class GaugeField> | ||||
| class Action {  | ||||
| namespace QCD { | ||||
|  | ||||
| template <class GaugeField> | ||||
| class Action { | ||||
|  public: | ||||
|   bool is_smeared = false; | ||||
|   // Boundary conditions? // Heatbath? | ||||
|   virtual void  refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions | ||||
|   virtual RealD S    (const GaugeField &U)                        = 0;  // evaluate the action | ||||
|   virtual void  deriv(const GaugeField &U,GaugeField & dSdU )     = 0;  // evaluate the action derivative | ||||
|   virtual ~Action() {}; | ||||
|   virtual void refresh(const GaugeField& U, | ||||
|                        GridParallelRNG& pRNG) = 0;  // refresh pseudofermions | ||||
|   virtual RealD S(const GaugeField& U) = 0;         // evaluate the action | ||||
|   virtual void deriv(const GaugeField& U, | ||||
|                      GaugeField& dSdU) = 0;  // evaluate the action derivative | ||||
|   virtual ~Action(){}; | ||||
| }; | ||||
|  | ||||
| // Indexing of tuple types | ||||
| template <class T, class Tuple> | ||||
| struct Index; | ||||
|  | ||||
| template <class T, class... Types> | ||||
| struct Index<T, std::tuple<T, Types...>> { | ||||
|   static const std::size_t value = 0; | ||||
| }; | ||||
|  | ||||
| template <class T, class U, class... Types> | ||||
| struct Index<T, std::tuple<U, Types...>> { | ||||
|   static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value; | ||||
| }; | ||||
|  | ||||
| // Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh | ||||
| /* | ||||
| template<class GaugeField, class FermionField> | ||||
| class PseudoFermionAction : public Action<GaugeField> { | ||||
| template <class GaugeField> | ||||
| struct ActionLevel { | ||||
|  public: | ||||
|   FermionField Phi; | ||||
|   GridParallelRNG &pRNG; | ||||
|   GridBase &Grid; | ||||
|   typedef Action<GaugeField>* | ||||
|       ActPtr;  // now force the same colours as the rest of the code | ||||
|  | ||||
|   PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) { | ||||
|   }; | ||||
|   //Add supported representations here | ||||
|  | ||||
|   virtual void refresh(const GaugeField &gauge) { | ||||
|     gaussian(Phi,pRNG); | ||||
|   }; | ||||
|  | ||||
| }; | ||||
| */ | ||||
|  | ||||
| template<class GaugeField> struct ActionLevel{ | ||||
| public: | ||||
|     | ||||
|   typedef Action<GaugeField>*  ActPtr; // now force the same colours as the rest of the code | ||||
|  | ||||
|   int multiplier; | ||||
|   unsigned int multiplier; | ||||
|  | ||||
|   std::vector<ActPtr> actions; | ||||
|  | ||||
|   ActionLevel(int mul = 1) : multiplier(mul) { | ||||
|     assert (mul > 0); | ||||
|   ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) { | ||||
|     assert(mul >= 1); | ||||
|   }; | ||||
|     | ||||
|   void push_back(ActPtr ptr){ | ||||
|     actions.push_back(ptr); | ||||
|  | ||||
|   void push_back(ActPtr ptr) { actions.push_back(ptr); } | ||||
| }; | ||||
| */ | ||||
|  | ||||
| template <class GaugeField, class Repr = NoHirep > | ||||
| struct ActionLevel { | ||||
|  public: | ||||
|   unsigned int multiplier;  | ||||
|  | ||||
|   // Fundamental repr actions separated because of the smearing | ||||
|   typedef Action<GaugeField>* ActPtr; | ||||
|  | ||||
|   // construct a tuple of vectors of the actions for the corresponding higher | ||||
|   // representation fields | ||||
|   typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection; | ||||
|   action_collection actions_hirep; | ||||
|   typedef typename  AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types; | ||||
|  | ||||
|   std::vector<ActPtr>& actions; | ||||
|  | ||||
|   // Temporary conversion between ActionLevel and ActionLevelHirep | ||||
|   //ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){} | ||||
|  | ||||
|   ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) { | ||||
|     // initialize the hirep vectors to zero. | ||||
|     //apply(this->resize, actions_hirep, 0); //need a working resize | ||||
|     assert(mul >= 1); | ||||
|   }; | ||||
|  | ||||
|   //void push_back(ActPtr ptr) { actions.push_back(ptr); } | ||||
|  | ||||
|  | ||||
|  | ||||
|   template < class Field > | ||||
|   void push_back(Action<Field>* ptr) { | ||||
|     // insert only in the correct vector | ||||
|     std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr); | ||||
|   }; | ||||
|  | ||||
|   | ||||
|  | ||||
|   template < class ActPtr> | ||||
|   static void resize(ActPtr ap, unsigned int n){ | ||||
|     ap->resize(n); | ||||
|  | ||||
|   } | ||||
|  | ||||
|   //template <std::size_t I> | ||||
|   //auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U)  {return std::get<I>(R).U;} | ||||
|  | ||||
|   // Loop on tuple for a callable function | ||||
|   template <std::size_t I = 1, typename Callable, typename ...Args> | ||||
|   inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply( | ||||
|       Callable, Repr& R,Args&...) const {} | ||||
|  | ||||
|   template <std::size_t I = 1, typename Callable, typename ...Args> | ||||
|   inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply( | ||||
|       Callable fn, Repr& R, Args&... arguments) const { | ||||
|     fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...); | ||||
|     apply<I + 1>(fn, R, arguments...); | ||||
|   }   | ||||
|  | ||||
| }; | ||||
|  | ||||
| template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >; | ||||
|  | ||||
| //template <class GaugeField> | ||||
| //using ActionSet = std::vector<ActionLevel<GaugeField> >; | ||||
|  | ||||
| }} | ||||
| template <class GaugeField, class R> | ||||
| using ActionSet = std::vector<ActionLevel<GaugeField, R> >; | ||||
|  | ||||
| } | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -40,25 +40,25 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| //////////////////////////////////////////// | ||||
| // Abstract base interface | ||||
| //////////////////////////////////////////// | ||||
| #include <qcd/action/ActionBase.h> | ||||
| #include <qcd/action/ActionParams.h> | ||||
| #include <Grid/qcd/action/ActionBase.h> | ||||
| #include <Grid/qcd/action/ActionParams.h> | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Utility functions | ||||
| //////////////////////////////////////////// | ||||
| #include <qcd/action/gauge/GaugeImpl.h> | ||||
| #include <qcd/utils/WilsonLoops.h> | ||||
| #include <Grid/qcd/action/gauge/GaugeImpl.h> | ||||
| #include <Grid/qcd/utils/WilsonLoops.h> | ||||
|  | ||||
| #include <qcd/action/fermion/WilsonCompressor.h>     //used by all wilson type fermions | ||||
| #include <qcd/action/fermion/FermionOperatorImpl.h> | ||||
| #include <qcd/action/fermion/FermionOperator.h> | ||||
| #include <qcd/action/fermion/WilsonKernels.h>        //used by all wilson type fermions | ||||
| #include <Grid/qcd/action/fermion/WilsonCompressor.h>     //used by all wilson type fermions | ||||
| #include <Grid/qcd/action/fermion/FermionOperatorImpl.h> | ||||
| #include <Grid/qcd/action/fermion/FermionOperator.h> | ||||
| #include <Grid/qcd/action/fermion/WilsonKernels.h>        //used by all wilson type fermions | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Gauge Actions | ||||
| //////////////////////////////////////////// | ||||
| #include <qcd/action/gauge/WilsonGaugeAction.h> | ||||
| #include <qcd/action/gauge/PlaqPlusRectangleAction.h> | ||||
| #include <Grid/qcd/action/gauge/WilsonGaugeAction.h> | ||||
| #include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h> | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
| @@ -107,41 +107,64 @@ typedef SymanzikGaugeAction<ConjugateGimplD>        ConjugateSymanzikGaugeAction | ||||
| // for EVERY .cc file. This define centralises the list and restores global push of impl cases | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| #define FermOpTemplateInstantiate(A) \ | ||||
|  | ||||
| #define FermOp4dVecTemplateInstantiate(A) \ | ||||
|   template class A<WilsonImplF>;		\ | ||||
|   template class A<WilsonImplD>;		\ | ||||
|   template class A<ZWilsonImplF>;		\ | ||||
|   template class A<ZWilsonImplD>;		\ | ||||
|   template class A<GparityWilsonImplF>;		\ | ||||
|   template class A<GparityWilsonImplD>;		 | ||||
|  | ||||
| #define AdjointFermOpTemplateInstantiate(A) \ | ||||
|   template class A<WilsonAdjImplF>; \ | ||||
|   template class A<WilsonAdjImplD>;  | ||||
|  | ||||
| #define TwoIndexFermOpTemplateInstantiate(A) \ | ||||
|   template class A<WilsonTwoIndexSymmetricImplF>; \ | ||||
|   template class A<WilsonTwoIndexSymmetricImplD>;  | ||||
|  | ||||
| #define FermOp5dVecTemplateInstantiate(A) \ | ||||
|   template class A<DomainWallVec5dImplF>;	\ | ||||
|   template class A<DomainWallVec5dImplD>;	\ | ||||
|   template class A<ZDomainWallVec5dImplF>;	\ | ||||
|   template class A<ZDomainWallVec5dImplD>;	 | ||||
|  | ||||
| #define FermOpTemplateInstantiate(A) \ | ||||
|  FermOp4dVecTemplateInstantiate(A) \ | ||||
|  FermOp5dVecTemplateInstantiate(A)  | ||||
|  | ||||
|  | ||||
| #define GparityFermOpTemplateInstantiate(A)  | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Fermion operators / actions | ||||
| //////////////////////////////////////////// | ||||
|  | ||||
| #include <qcd/action/fermion/WilsonFermion.h>       // 4d wilson like | ||||
| #include <qcd/action/fermion/WilsonTMFermion.h>       // 4d wilson like | ||||
| #include <qcd/action/fermion/WilsonFermion5D.h>     // 5d base used by all 5d overlap types | ||||
| #include <Grid/qcd/action/fermion/WilsonFermion.h>       // 4d wilson like | ||||
| #include <Grid/qcd/action/fermion/WilsonTMFermion.h>       // 4d wilson like | ||||
| #include <Grid/qcd/action/fermion/WilsonFermion5D.h>     // 5d base used by all 5d overlap types | ||||
|  | ||||
| //#include <qcd/action/fermion/CloverFermion.h> | ||||
| //#include <Grid/qcd/action/fermion/CloverFermion.h> | ||||
|  | ||||
| #include <qcd/action/fermion/CayleyFermion5D.h>     // Cayley types | ||||
| #include <qcd/action/fermion/DomainWallFermion.h> | ||||
| #include <qcd/action/fermion/DomainWallFermion.h> | ||||
| #include <qcd/action/fermion/MobiusFermion.h> | ||||
| #include <qcd/action/fermion/ScaledShamirFermion.h> | ||||
| #include <qcd/action/fermion/MobiusZolotarevFermion.h> | ||||
| #include <qcd/action/fermion/ShamirZolotarevFermion.h> | ||||
| #include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h> | ||||
| #include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h> | ||||
| #include <Grid/qcd/action/fermion/CayleyFermion5D.h>     // Cayley types | ||||
| #include <Grid/qcd/action/fermion/DomainWallFermion.h> | ||||
| #include <Grid/qcd/action/fermion/DomainWallFermion.h> | ||||
| #include <Grid/qcd/action/fermion/MobiusFermion.h> | ||||
| #include <Grid/qcd/action/fermion/ZMobiusFermion.h> | ||||
| #include <Grid/qcd/action/fermion/ScaledShamirFermion.h> | ||||
| #include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h> | ||||
| #include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h> | ||||
| #include <Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h> | ||||
| #include <Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h> | ||||
|  | ||||
| #include <qcd/action/fermion/ContinuedFractionFermion5D.h>               // Continued fraction | ||||
| #include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h> | ||||
| #include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h> | ||||
| #include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>               // Continued fraction | ||||
| #include <Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h> | ||||
| #include <Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h> | ||||
|  | ||||
| #include <qcd/action/fermion/PartialFractionFermion5D.h>                 // Partial fraction | ||||
| #include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h> | ||||
| #include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h> | ||||
| #include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>                 // Partial fraction | ||||
| #include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h> | ||||
| #include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h> | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // More maintainable to maintain the following typedef list centrally, as more "impl" targets | ||||
| @@ -157,6 +180,14 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR; | ||||
| typedef WilsonFermion<WilsonImplF> WilsonFermionF; | ||||
| typedef WilsonFermion<WilsonImplD> WilsonFermionD; | ||||
|  | ||||
| typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | ||||
| typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | ||||
| typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD; | ||||
|  | ||||
| typedef WilsonFermion<WilsonTwoIndexSymmetricImplR> WilsonTwoIndexSymmetricFermionR; | ||||
| typedef WilsonFermion<WilsonTwoIndexSymmetricImplF> WilsonTwoIndexSymmetricFermionF; | ||||
| typedef WilsonFermion<WilsonTwoIndexSymmetricImplD> WilsonTwoIndexSymmetricFermionD; | ||||
|  | ||||
| typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR; | ||||
| typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF; | ||||
| typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD; | ||||
| @@ -167,6 +198,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | ||||
| typedef MobiusFermion<WilsonImplR> MobiusFermionR; | ||||
| typedef MobiusFermion<WilsonImplF> MobiusFermionF; | ||||
| typedef MobiusFermion<WilsonImplD> MobiusFermionD; | ||||
|  | ||||
| typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | ||||
| typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | ||||
| typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | ||||
|  | ||||
| typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | ||||
| typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF; | ||||
| typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD; | ||||
| @@ -222,21 +258,21 @@ typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | ||||
| /////////////////////////////////////////////////////////////////////////////// | ||||
| // G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code | ||||
| /////////////////////////////////////////////////////////////////////////////// | ||||
| #include <qcd/action/fermion/g5HermitianLinop.h> | ||||
| #include <Grid/qcd/action/fermion/g5HermitianLinop.h> | ||||
|  | ||||
| //////////////////////////////////////// | ||||
| // Pseudo fermion combinations for HMC | ||||
| //////////////////////////////////////// | ||||
| #include <qcd/action/pseudofermion/EvenOddSchurDifferentiable.h> | ||||
| #include <Grid/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h> | ||||
|  | ||||
| #include <qcd/action/pseudofermion/TwoFlavour.h> | ||||
| #include <qcd/action/pseudofermion/TwoFlavourRatio.h> | ||||
| #include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h> | ||||
| #include <qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h> | ||||
| #include <Grid/qcd/action/pseudofermion/TwoFlavour.h> | ||||
| #include <Grid/qcd/action/pseudofermion/TwoFlavourRatio.h> | ||||
| #include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h> | ||||
| #include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h> | ||||
|  | ||||
| #include <qcd/action/pseudofermion/OneFlavourRational.h> | ||||
| #include <qcd/action/pseudofermion/OneFlavourRationalRatio.h> | ||||
| #include <qcd/action/pseudofermion/OneFlavourEvenOddRational.h> | ||||
| #include <qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h> | ||||
| #include <Grid/qcd/action/pseudofermion/OneFlavourRational.h> | ||||
| #include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h> | ||||
| #include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h> | ||||
| #include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										0
									
								
								lib/qcd/action/fermion/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/qcd/action/fermion/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid.h> | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| @@ -45,486 +48,352 @@ namespace QCD { | ||||
| 		   FourDimGrid, | ||||
|  	 	   FourDimRedBlackGrid,_M5,p), | ||||
|    mass(_mass) | ||||
|  { | ||||
|  } | ||||
|  { } | ||||
|  | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::Meooe5D    (const FermionField &psi, FermionField &Din) | ||||
|   { | ||||
|     // Assemble Din | ||||
|     int Ls=this->Ls; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	//	Din = bs psi[s] + cs[s] psi[s+1} | ||||
| 	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1); | ||||
| 	//      Din+= -mass*cs[s] psi[s+1} | ||||
| 	axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0); | ||||
| 	axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1); | ||||
| 	axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5D   (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   std::vector<Coeff_t> diag (Ls,1.0); | ||||
|   std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass; | ||||
|   std::vector<Coeff_t> lower(Ls,-1.0); lower[0]   =mass; | ||||
|   M5D(psi,chi,chi,lower,diag,upper); | ||||
| } | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::Meooe5D    (const FermionField &psi, FermionField &Din) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   std::vector<Coeff_t> diag = bs; | ||||
|   std::vector<Coeff_t> upper= cs; | ||||
|   std::vector<Coeff_t> lower= cs;  | ||||
|   upper[Ls-1]=-mass*upper[Ls-1]; | ||||
|   lower[0]   =-mass*lower[0]; | ||||
|   M5D(psi,psi,Din,lower,diag,upper); | ||||
| } | ||||
| template<class Impl> void CayleyFermion5D<Impl>::Meo5D     (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   std::vector<Coeff_t> diag = beo; | ||||
|   std::vector<Coeff_t> upper(Ls); | ||||
|   std::vector<Coeff_t> lower(Ls); | ||||
|   for(int i=0;i<Ls;i++) { | ||||
|     upper[i]=-ceo[i]; | ||||
|     lower[i]=-ceo[i]; | ||||
|   } | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MeooeDag5D    (const FermionField &psi, FermionField &Din) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1); | ||||
| 	axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0); | ||||
| 	axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1); | ||||
| 	axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
|   upper[Ls-1]=-mass*upper[Ls-1]; | ||||
|   lower[0]   =-mass*lower[0]; | ||||
|   M5D(psi,psi,chi,lower,diag,upper); | ||||
| } | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   std::vector<Coeff_t> diag = bee; | ||||
|   std::vector<Coeff_t> upper(Ls); | ||||
|   std::vector<Coeff_t> lower(Ls); | ||||
|   for(int i=0;i<Ls;i++) { | ||||
|     upper[i]=-cee[i]; | ||||
|     lower[i]=-cee[i]; | ||||
|   } | ||||
|   upper[Ls-1]=-mass*upper[Ls-1]; | ||||
|   lower[0]   =-mass*lower[0]; | ||||
|   M5D(psi,psi,chi,lower,diag,upper); | ||||
| } | ||||
|  | ||||
|   // override multiply | ||||
|  template<class Impl> | ||||
|   RealD CayleyFermion5D<Impl>::M    (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   std::vector<Coeff_t> diag = bee; | ||||
|   std::vector<Coeff_t> upper(Ls); | ||||
|   std::vector<Coeff_t> lower(Ls); | ||||
|  | ||||
|     FermionField Din(psi._grid); | ||||
|  | ||||
|     // Assemble Din | ||||
|     /* | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	//	Din = bs psi[s] + cs[s] psi[s+1} | ||||
| 	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1); | ||||
| 	//      Din+= -mass*cs[s] psi[s+1} | ||||
| 	axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0); | ||||
| 	axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1); | ||||
| 	axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
|     */ | ||||
|     Meooe5D(psi,Din); | ||||
|  | ||||
|     this->DW(Din,chi,DaggerNo); | ||||
|     // ((b D_W + D_w hop terms +1) on s-diag | ||||
|     axpby(chi,1.0,1.0,chi,psi);  | ||||
|  | ||||
|     // Call Mooee?? | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ){ | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1); | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) { | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0); | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1); | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
|     return norm2(chi); | ||||
|   } | ||||
|  | ||||
|  template<class Impl> | ||||
|   RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     // Under adjoint | ||||
|     //D1+        D1- P-    ->   D1+^dag   P+ D2-^dag | ||||
|     //D2- P+     D2+            P-D1-^dag D2+dag | ||||
|  | ||||
|     FermionField Din(psi._grid); | ||||
|     // Apply Dw | ||||
|     this->DW(psi,Din,DaggerYes);  | ||||
|  | ||||
|     MeooeDag5D(Din,chi); | ||||
|  | ||||
|     int Ls=this->Ls; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|  | ||||
|       // Collect the terms in DW | ||||
|       //	Chi = bs Din[s] + cs[s] Din[s+1} | ||||
|       //    Chi+= -mass*cs[s] psi[s+1} | ||||
|       /* | ||||
|       if ( s==0 ) { | ||||
| 	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pplus (chi,bs[s],Din,-mass*cs[0],Din,s,0); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1); | ||||
|       } | ||||
|       */ | ||||
|  | ||||
|       // FIXME just call MooeeDag?? | ||||
|  | ||||
|       // Collect the terms indept of DW | ||||
|       if ( s==0 ){ | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) { | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
|     // ((b D_W + D_w hop terms +1) on s-diag | ||||
|     axpby (chi,1.0,1.0,chi,psi);  | ||||
|     return norm2(chi); | ||||
|   } | ||||
|  | ||||
|   // half checkerboard operations | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
|  | ||||
|     FermionField tmp(psi._grid); | ||||
|   for (int s=0;s<Ls;s++){ | ||||
|     // Assemble the 5d matrix | ||||
|     Meooe5D(psi,tmp);  | ||||
| #if 0 | ||||
|     std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	//	tmp = bs psi[s] + cs[s] psi[s+1} | ||||
| 	//      tmp+= -mass*cs[s] psi[s+1} | ||||
| 	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1); | ||||
| 	axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0); | ||||
| 	axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1); | ||||
| 	axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
|     std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl; | ||||
| #endif | ||||
|  | ||||
|     // Apply 4d dslash | ||||
|     if ( psi.checkerboard == Odd ) { | ||||
|       this->DhopEO(tmp,chi,DaggerNo); | ||||
|     if ( s==0 ) { | ||||
|       upper[s] = -cee[s+1] ; | ||||
|       lower[s] = mass*cee[Ls-1]; | ||||
|     } else if ( s==(Ls-1)) {  | ||||
|       upper[s] = mass*cee[0]; | ||||
|       lower[s] = -cee[s-1]; | ||||
|     } else { | ||||
|       this->DhopOE(tmp,chi,DaggerNo); | ||||
|       upper[s]=-cee[s+1]; | ||||
|       lower[s]=-cee[s-1]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MeooeDag    (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     FermionField tmp(psi._grid); | ||||
|     // Apply 4d dslash | ||||
|     if ( psi.checkerboard == Odd ) { | ||||
|       this->DhopEO(psi,tmp,DaggerYes); | ||||
|     } else { | ||||
|       this->DhopOE(psi,tmp,DaggerYes); | ||||
|     } | ||||
|   M5Ddag(psi,psi,chi,lower,diag,upper); | ||||
| } | ||||
|  | ||||
|     MeooeDag5D(tmp,chi);  | ||||
| #if 0 | ||||
|     std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl; | ||||
|     // Assemble the 5d matrix | ||||
|     int Ls=this->Ls; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	axpby_ssp_pplus(chi,beo[s],tmp,   -ceo[s+1]  ,tmp,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,   1.0,chi,mass*ceo[Ls-1],tmp,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pplus(chi,beo[s],tmp,mass*ceo[0],tmp,s,0); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,-ceo[s-1],tmp,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pplus(chi,beo[s],tmp,-ceo[s+1],tmp,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,1.0   ,chi,-ceo[s-1],tmp,s,s-1); | ||||
|       } | ||||
|     } | ||||
|     std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl; | ||||
| #endif | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   std::vector<Coeff_t> diag(Ls,1.0); | ||||
|   std::vector<Coeff_t> upper(Ls,-1.0); | ||||
|   std::vector<Coeff_t> lower(Ls,-1.0); | ||||
|   upper[Ls-1]=-mass*upper[Ls-1]; | ||||
|   lower[0]   =-mass*lower[0]; | ||||
|   M5Ddag(psi,chi,chi,lower,diag,upper); | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MeooeDag5D    (const FermionField &psi, FermionField &Din) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   std::vector<Coeff_t> diag =bs; | ||||
|   std::vector<Coeff_t> upper=cs; | ||||
|   std::vector<Coeff_t> lower=cs; | ||||
|   upper[Ls-1]=-mass*upper[Ls-1]; | ||||
|   lower[0]   =-mass*lower[0]; | ||||
|   M5Ddag(psi,psi,Din,lower,diag,upper); | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| RealD CayleyFermion5D<Impl>::M    (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|    | ||||
|   FermionField Din(psi._grid); | ||||
|    | ||||
|   // Assemble Din | ||||
|   Meooe5D(psi,Din); | ||||
|    | ||||
|   this->DW(Din,chi,DaggerNo); | ||||
|   // ((b D_W + D_w hop terms +1) on s-diag | ||||
|   axpby(chi,1.0,1.0,chi,psi);  | ||||
|    | ||||
|   M5D(psi,chi); | ||||
|   return(norm2(chi)); | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   // Under adjoint | ||||
|   //D1+        D1- P-    ->   D1+^dag   P+ D2-^dag | ||||
|   //D2- P+     D2+            P-D1-^dag D2+dag | ||||
|    | ||||
|   FermionField Din(psi._grid); | ||||
|   // Apply Dw | ||||
|   this->DW(psi,Din,DaggerYes);  | ||||
|    | ||||
|   MeooeDag5D(Din,chi); | ||||
|    | ||||
|   M5Ddag(psi,chi); | ||||
|   // ((b D_W + D_w hop terms +1) on s-diag | ||||
|   axpby (chi,1.0,1.0,chi,psi);  | ||||
|   return norm2(chi); | ||||
| } | ||||
|  | ||||
| // half checkerboard operations | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   FermionField tmp(psi._grid); | ||||
|  | ||||
|   Meooe5D(psi,tmp);  | ||||
|  | ||||
|   if ( psi.checkerboard == Odd ) { | ||||
|     this->DhopEO(tmp,chi,DaggerNo); | ||||
|   } else { | ||||
|     this->DhopOE(tmp,chi,DaggerNo); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
|     for (int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1); | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0); | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1); | ||||
| 	axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MeooeDag    (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   FermionField tmp(psi._grid); | ||||
|   // Apply 4d dslash | ||||
|   if ( psi.checkerboard == Odd ) { | ||||
|     this->DhopEO(psi,tmp,DaggerYes); | ||||
|   } else { | ||||
|     this->DhopOE(psi,tmp,DaggerYes); | ||||
|   } | ||||
|   MeooeDag5D(tmp,chi);  | ||||
| } | ||||
|  | ||||
|  template<class Impl> | ||||
|   void  CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ | ||||
|     int Ls=this->Ls; | ||||
|     FermionField tmp(psi._grid); | ||||
|     // Assemble the 5d matrix | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	//	tmp = bs psi[s] + cs[s] psi[s+1} | ||||
| 	//      tmp+= -mass*cs[s] psi[s+1} | ||||
| 	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1); | ||||
| 	axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0); | ||||
| 	axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1); | ||||
| 	axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
|     // Apply 4d dslash fragment | ||||
|     this->DhopDir(tmp,chi,dir,disp); | ||||
| template<class Impl> | ||||
| void  CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ | ||||
|   FermionField tmp(psi._grid); | ||||
|   Meo5D(psi,tmp); | ||||
|   // Apply 4d dslash fragment | ||||
|   this->DhopDir(tmp,chi,dir,disp); | ||||
| } | ||||
| // force terms; five routines; default to Dhop on diagonal | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
| { | ||||
|   FermionField Din(V._grid); | ||||
|    | ||||
|   if ( dag == DaggerNo ) { | ||||
|     //      U d/du [D_w D5] V = U d/du DW D5 V | ||||
|     Meooe5D(V,Din); | ||||
|     this->DhopDeriv(mat,U,Din,dag); | ||||
|   } else { | ||||
|     //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call | ||||
|     Meooe5D(U,Din); | ||||
|     this->DhopDeriv(mat,Din,V,dag); | ||||
|   } | ||||
|  | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
|     for (int s=0;s<Ls;s++){ | ||||
|       // Assemble the 5d matrix | ||||
|       if ( s==0 ) { | ||||
| 	axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1]  ,psi,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1); | ||||
|       } else if ( s==(Ls-1)) {  | ||||
| 	axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0); | ||||
| 	axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1); | ||||
|       } else { | ||||
| 	axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1); | ||||
| 	axpby_ssp_pminus(chi,1.0   ,chi,-cee[s-1],psi,s,s-1); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
|     // Apply (L^{\prime})^{-1} | ||||
|     axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0] | ||||
|     for (int s=1;s<Ls;s++){ | ||||
|       axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1] | ||||
|     } | ||||
|     // L_m^{-1}  | ||||
|     for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||
|       axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s); | ||||
|     } | ||||
|     // U_m^{-1} D^{-1} | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       // Chi[s] + 1/d chi[s]  | ||||
|       axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1); | ||||
|     }	 | ||||
|     axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable  | ||||
|      | ||||
|     // Apply U^{-1} | ||||
|     for (int s=Ls-2;s>=0;s--){ | ||||
|       axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1);  // chi[Ls] | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
|     // Apply (U^{\prime})^{-dagger} | ||||
|     axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0] | ||||
|     for (int s=1;s<Ls;s++){ | ||||
|       axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1); | ||||
|     } | ||||
|     // U_m^{-\dagger}  | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s); | ||||
|     } | ||||
|     // L_m^{-\dagger} D^{-dagger} | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1); | ||||
|     }	 | ||||
|     axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable  | ||||
|      | ||||
|     // Apply L^{-dagger} | ||||
|     for (int s=Ls-2;s>=0;s--){ | ||||
|       axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1);  // chi[Ls] | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // force terms; five routines; default to Dhop on diagonal | ||||
|   template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
|   { | ||||
|     FermionField Din(V._grid); | ||||
|  | ||||
|     if ( dag == DaggerNo ) { | ||||
|       //      U d/du [D_w D5] V = U d/du DW D5 V | ||||
|       Meooe5D(V,Din); | ||||
|       this->DhopDeriv(mat,U,Din,dag); | ||||
|     } else { | ||||
|       //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call | ||||
|       Meooe5D(U,Din); | ||||
|       this->DhopDeriv(mat,Din,V,dag); | ||||
|     } | ||||
|   }; | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
|   { | ||||
|     FermionField Din(V._grid); | ||||
|  | ||||
|     if ( dag == DaggerNo ) { | ||||
|       //      U d/du [D_w D5] V = U d/du DW D5 V | ||||
|       Meooe5D(V,Din); | ||||
|       this->DhopDerivOE(mat,U,Din,dag); | ||||
|     } else { | ||||
|       //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call | ||||
| }; | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
| { | ||||
|   FermionField Din(V._grid); | ||||
|    | ||||
|   if ( dag == DaggerNo ) { | ||||
|     //      U d/du [D_w D5] V = U d/du DW D5 V | ||||
|     Meooe5D(V,Din); | ||||
|     this->DhopDerivOE(mat,U,Din,dag); | ||||
|   } else { | ||||
|     //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call | ||||
|       Meooe5D(U,Din); | ||||
|       this->DhopDerivOE(mat,Din,V,dag); | ||||
|     } | ||||
|   }; | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
|   { | ||||
|     FermionField Din(V._grid); | ||||
|  | ||||
|     if ( dag == DaggerNo ) { | ||||
|       //      U d/du [D_w D5] V = U d/du DW D5 V | ||||
|       Meooe5D(V,Din); | ||||
|       this->DhopDerivEO(mat,U,Din,dag); | ||||
|     } else { | ||||
|       //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call | ||||
|       Meooe5D(U,Din); | ||||
|       this->DhopDerivEO(mat,Din,V,dag); | ||||
|     } | ||||
|   }; | ||||
|   } | ||||
| }; | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
| { | ||||
|   FermionField Din(V._grid); | ||||
|    | ||||
|   // Tanh | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) | ||||
|   { | ||||
|     SetCoefficientsZolotarev(1.0,zdata,b,c); | ||||
|  | ||||
|   if ( dag == DaggerNo ) { | ||||
|     //      U d/du [D_w D5] V = U d/du DW D5 V | ||||
|     Meooe5D(V,Din); | ||||
|     this->DhopDerivEO(mat,U,Din,dag); | ||||
|   } else { | ||||
|     //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call | ||||
|     Meooe5D(U,Din); | ||||
|     this->DhopDerivEO(mat,Din,V,dag); | ||||
|   } | ||||
|   //Zolo | ||||
|  template<class Impl> | ||||
|   void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) | ||||
|   { | ||||
|     int Ls=this->Ls; | ||||
| }; | ||||
|    | ||||
| // Tanh | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) | ||||
| { | ||||
|   std::vector<Coeff_t> gamma(this->Ls); | ||||
|   for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s]; | ||||
|   SetCoefficientsInternal(1.0,gamma,b,c); | ||||
| } | ||||
| //Zolo | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) | ||||
| { | ||||
|   std::vector<Coeff_t> gamma(this->Ls); | ||||
|   for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s]; | ||||
|   SetCoefficientsInternal(zolo_hi,gamma,b,c); | ||||
| } | ||||
| //Zolo | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|  | ||||
|     /////////////////////////////////////////////////////////// | ||||
|     // The Cayley coeffs (unprec) | ||||
|     /////////////////////////////////////////////////////////// | ||||
|     omega.resize(Ls); | ||||
|     bs.resize(Ls); | ||||
|     cs.resize(Ls); | ||||
|     as.resize(Ls); | ||||
|   /////////////////////////////////////////////////////////// | ||||
|   // The Cayley coeffs (unprec) | ||||
|   /////////////////////////////////////////////////////////// | ||||
|   omega.resize(Ls); | ||||
|   bs.resize(Ls); | ||||
|   cs.resize(Ls); | ||||
|   as.resize(Ls); | ||||
|    | ||||
|   //  | ||||
|   // Ts = (    [bs+cs]Dw        )^-1 (    (bs+cs) Dw         ) | ||||
|   //     -(g5  -------       -1 )    ( g5 ---------     + 1  ) | ||||
|   //      (   {2+(bs-cs)Dw}     )    (    2+(bs-cs) Dw       ) | ||||
|   // | ||||
|   //  bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2(  1/omega(b+c) + (b-c) ) | ||||
|   //  cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2(  1/omega(b+c) - (b-c) ) | ||||
|   // | ||||
|   // bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c) | ||||
|   // bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c | ||||
|   // | ||||
|   // So  | ||||
|   // | ||||
|   // Ts = (    [b+c]Dw/omega_s    )^-1 (    (b+c) Dw /omega_s        ) | ||||
|   //     -(g5  -------         -1 )    ( g5 ---------           + 1  ) | ||||
|   //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               ) | ||||
|   // | ||||
|   // Ts = (    [b+c]Dw            )^-1 (    (b+c) Dw                 ) | ||||
|   //     -(g5  -------    -omega_s)    ( g5 ---------      + omega_s ) | ||||
|   //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               ) | ||||
|   //  | ||||
|      | ||||
|     //  | ||||
|     // Ts = (    [bs+cs]Dw        )^-1 (    (bs+cs) Dw         ) | ||||
|     //     -(g5  -------       -1 )    ( g5 ---------     + 1  ) | ||||
|     //      (   {2+(bs-cs)Dw}     )    (    2+(bs-cs) Dw       ) | ||||
|     // | ||||
|     //  bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2(  1/omega(b+c) + (b-c) ) | ||||
|     //  cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2(  1/omega(b+c) - (b-c) ) | ||||
|     // | ||||
|     // bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c) | ||||
|     // bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c | ||||
|     // | ||||
|     // So  | ||||
|     // | ||||
|     // Ts = (    [b+c]Dw/omega_s    )^-1 (    (b+c) Dw /omega_s        ) | ||||
|     //     -(g5  -------         -1 )    ( g5 ---------           + 1  ) | ||||
|     //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               ) | ||||
|     // | ||||
|     // Ts = (    [b+c]Dw            )^-1 (    (b+c) Dw                 ) | ||||
|     //     -(g5  -------    -omega_s)    ( g5 ---------      + omega_s ) | ||||
|     //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               ) | ||||
|     //  | ||||
|   double bpc = b+c; | ||||
|   double bmc = b-c; | ||||
|   for(int i=0; i < Ls; i++){ | ||||
|     as[i] = 1.0; | ||||
|     omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code | ||||
|     bs[i] = 0.5*(bpc/omega[i] + bmc); | ||||
|     cs[i] = 0.5*(bpc/omega[i] - bmc); | ||||
|   } | ||||
|    | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // Constants for the preconditioned matrix Cayley form | ||||
|   //////////////////////////////////////////////////////// | ||||
|   bee.resize(Ls); | ||||
|   cee.resize(Ls); | ||||
|   beo.resize(Ls); | ||||
|   ceo.resize(Ls); | ||||
|    | ||||
|   for(int i=0;i<Ls;i++){ | ||||
|     bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0); | ||||
|     cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); | ||||
|     beo[i]=as[i]*bs[i]; | ||||
|     ceo[i]=-as[i]*cs[i]; | ||||
|   } | ||||
|    | ||||
|   aee.resize(Ls); | ||||
|   aeo.resize(Ls); | ||||
|   for(int i=0;i<Ls;i++){ | ||||
|     aee[i]=cee[i]; | ||||
|     aeo[i]=ceo[i]; | ||||
|   } | ||||
|    | ||||
|   ////////////////////////////////////////// | ||||
|   // LDU decomposition of eeoo | ||||
|   ////////////////////////////////////////// | ||||
|   dee.resize(Ls); | ||||
|   lee.resize(Ls); | ||||
|   leem.resize(Ls); | ||||
|   uee.resize(Ls); | ||||
|   ueem.resize(Ls); | ||||
|    | ||||
|   for(int i=0;i<Ls;i++){ | ||||
|      | ||||
|     double bpc = b+c; | ||||
|     double bmc = b-c; | ||||
|     for(int i=0; i < Ls; i++){ | ||||
|       as[i] = 1.0; | ||||
|       omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code | ||||
|       bs[i] = 0.5*(bpc/omega[i] + bmc); | ||||
|       cs[i] = 0.5*(bpc/omega[i] - bmc); | ||||
|     } | ||||
|  | ||||
|     //////////////////////////////////////////////////////// | ||||
|     // Constants for the preconditioned matrix Cayley form | ||||
|     //////////////////////////////////////////////////////// | ||||
|     bee.resize(Ls); | ||||
|     cee.resize(Ls); | ||||
|     beo.resize(Ls); | ||||
|     ceo.resize(Ls); | ||||
|     dee[i] = bee[i]; | ||||
|      | ||||
|     for(int i=0;i<Ls;i++){ | ||||
|       bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0); | ||||
|       cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); | ||||
|       beo[i]=as[i]*bs[i]; | ||||
|       ceo[i]=-as[i]*cs[i]; | ||||
|     } | ||||
|  | ||||
|     aee.resize(Ls); | ||||
|     aeo.resize(Ls); | ||||
|     for(int i=0;i<Ls;i++){ | ||||
|       aee[i]=cee[i]; | ||||
|       aeo[i]=ceo[i]; | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////////// | ||||
|     // LDU decomposition of eeoo | ||||
|     ////////////////////////////////////////// | ||||
|     dee.resize(Ls); | ||||
|     lee.resize(Ls); | ||||
|     leem.resize(Ls); | ||||
|     uee.resize(Ls); | ||||
|     ueem.resize(Ls); | ||||
|      | ||||
|     for(int i=0;i<Ls;i++){ | ||||
|     if ( i < Ls-1 ) { | ||||
|        | ||||
|       dee[i] = bee[i]; | ||||
|       lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column | ||||
|        | ||||
|       if ( i < Ls-1 ) { | ||||
| 	 | ||||
| 	lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column | ||||
| 	     | ||||
| 	leem[i]=mass*cee[Ls-1]/bee[0]; | ||||
| 	for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1]; | ||||
| 	 | ||||
| 	uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row | ||||
| 	 | ||||
| 	ueem[i]=mass; | ||||
| 	for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j]; | ||||
| 	ueem[i]*= aee[0]/bee[0]; | ||||
| 	     | ||||
|       } else {  | ||||
| 	lee[i] =0.0; | ||||
| 	leem[i]=0.0; | ||||
| 	uee[i] =0.0; | ||||
| 	ueem[i]=0.0; | ||||
|       } | ||||
|     } | ||||
| 	 | ||||
|     {  | ||||
|       double delta_d=mass*cee[Ls-1]; | ||||
|       for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j]; | ||||
|       dee[Ls-1] += delta_d; | ||||
|       leem[i]=mass*cee[Ls-1]/bee[0]; | ||||
|       for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1]; | ||||
|        | ||||
|       uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row | ||||
|        | ||||
|       ueem[i]=mass; | ||||
|       for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j]; | ||||
|       ueem[i]*= aee[0]/bee[0]; | ||||
|        | ||||
|     } else {  | ||||
|       lee[i] =0.0; | ||||
|       leem[i]=0.0; | ||||
|       uee[i] =0.0; | ||||
|       ueem[i]=0.0; | ||||
|     } | ||||
|   } | ||||
| 	 | ||||
|   {  | ||||
|     Coeff_t delta_d=mass*cee[Ls-1]; | ||||
|     for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j]; | ||||
|     dee[Ls-1] += delta_d; | ||||
|   }   | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
|   FermOpTemplateInstantiate(CayleyFermion5D); | ||||
|   GparityFermOpTemplateInstantiate(CayleyFermion5D); | ||||
|   | ||||
| @@ -51,6 +51,29 @@ namespace Grid { | ||||
|       virtual void   MooeeDag    (const FermionField &in, FermionField &out); | ||||
|       virtual void   MooeeInv    (const FermionField &in, FermionField &out); | ||||
|       virtual void   MooeeInvDag (const FermionField &in, FermionField &out); | ||||
|       virtual void   Meo5D (const FermionField &psi, FermionField &chi); | ||||
|  | ||||
|       virtual void   M5D   (const FermionField &psi, FermionField &chi); | ||||
|       virtual void   M5Ddag(const FermionField &psi, FermionField &chi); | ||||
|  | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // Instantiate different versions depending on Impl | ||||
|       ///////////////////////////////////////////////////// | ||||
|       void M5D(const FermionField &psi, | ||||
| 	       const FermionField &phi,  | ||||
| 	       FermionField &chi, | ||||
| 	       std::vector<Coeff_t> &lower, | ||||
| 	       std::vector<Coeff_t> &diag, | ||||
| 	       std::vector<Coeff_t> &upper); | ||||
|  | ||||
|       void M5Ddag(const FermionField &psi, | ||||
| 		  const FermionField &phi,  | ||||
| 		  FermionField &chi, | ||||
| 		  std::vector<Coeff_t> &lower, | ||||
| 		  std::vector<Coeff_t> &diag, | ||||
| 		  std::vector<Coeff_t> &upper); | ||||
|       void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); | ||||
|  | ||||
|       virtual void   Instantiatable(void)=0; | ||||
|  | ||||
|       // force terms; five routines; default to Dhop on diagonal | ||||
| @@ -68,23 +91,23 @@ namespace Grid { | ||||
|       RealD mass; | ||||
|  | ||||
|       // Cayley form Moebius (tanh and zolotarev) | ||||
|       std::vector<RealD> omega;  | ||||
|       std::vector<RealD> bs;    // S dependent coeffs | ||||
|       std::vector<RealD> cs;     | ||||
|       std::vector<RealD> as;     | ||||
|       std::vector<Coeff_t> omega;  | ||||
|       std::vector<Coeff_t> bs;    // S dependent coeffs | ||||
|       std::vector<Coeff_t> cs;     | ||||
|       std::vector<Coeff_t> as;     | ||||
|       // For preconditioning Cayley form | ||||
|       std::vector<RealD> bee;     | ||||
|       std::vector<RealD> cee;     | ||||
|       std::vector<RealD> aee;     | ||||
|       std::vector<RealD> beo;     | ||||
|       std::vector<RealD> ceo;     | ||||
|       std::vector<RealD> aeo;     | ||||
|       std::vector<Coeff_t> bee;     | ||||
|       std::vector<Coeff_t> cee;     | ||||
|       std::vector<Coeff_t> aee;     | ||||
|       std::vector<Coeff_t> beo;     | ||||
|       std::vector<Coeff_t> ceo;     | ||||
|       std::vector<Coeff_t> aeo;     | ||||
|       // LDU factorisation of the eeoo matrix | ||||
|       std::vector<RealD> lee;     | ||||
|       std::vector<RealD> leem;     | ||||
|       std::vector<RealD> uee;     | ||||
|       std::vector<RealD> ueem;     | ||||
|       std::vector<RealD> dee;     | ||||
|       std::vector<Coeff_t> lee;     | ||||
|       std::vector<Coeff_t> leem;     | ||||
|       std::vector<Coeff_t> uee;     | ||||
|       std::vector<Coeff_t> ueem;     | ||||
|       std::vector<Coeff_t> dee;     | ||||
|  | ||||
|       // Constructors | ||||
|       CayleyFermion5D(GaugeField &_Umu, | ||||
| @@ -97,9 +120,20 @@ namespace Grid { | ||||
|     protected: | ||||
|       void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); | ||||
|       void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); | ||||
|       void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c); | ||||
|     }; | ||||
|  | ||||
|   } | ||||
| } | ||||
| #define INSTANTIATE_DPERP(A)\ | ||||
| template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\ | ||||
| 					std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \ | ||||
| template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\ | ||||
| 					   std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \ | ||||
| template void CayleyFermion5D< A >::MooeeInv    (const FermionField &psi, FermionField &chi); \ | ||||
| template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); | ||||
|  | ||||
| #define CAYLEY_DPERP_CACHE | ||||
| #undef  CAYLEY_DPERP_LINALG | ||||
|  | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										211
									
								
								lib/qcd/action/fermion/CayleyFermion5Dcache.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								lib/qcd/action/fermion/CayleyFermion5Dcache.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,211 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid.h> | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
|   // FIXME -- make a version of these routines with site loop outermost for cache reuse. | ||||
|  | ||||
|   // Pminus fowards | ||||
|   // Pplus  backwards.. | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5D(const FermionField &psi, | ||||
| 				const FermionField &phi,  | ||||
| 				FermionField &chi, | ||||
| 				std::vector<Coeff_t> &lower, | ||||
| 				std::vector<Coeff_t> &diag, | ||||
| 				std::vector<Coeff_t> &upper) | ||||
| { | ||||
|   int Ls =this->Ls; | ||||
|   GridBase *grid=psi._grid; | ||||
|   assert(phi.checkerboard == psi.checkerboard); | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       auto tmp = psi._odata[0]; | ||||
|       if ( s==0 ) { | ||||
|  	                            spProj5m(tmp,psi._odata[ss+s+1]); | ||||
| 	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp; | ||||
|  | ||||
| 	                    spProj5p(tmp,psi._odata[ss+Ls-1]); | ||||
| 	chi[ss+s]=chi[ss+s]+lower[s]*tmp; | ||||
|       } else if ( s==(Ls-1)) { | ||||
| 	                            spProj5m(tmp,psi._odata[ss+0]); | ||||
| 	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp; | ||||
|  | ||||
|  	                    spProj5p(tmp,psi._odata[ss+s-1]); | ||||
| 	chi[ss+s]=chi[ss+s]+lower[s]*tmp; | ||||
|       } else {  | ||||
| 	                            spProj5m(tmp,psi._odata[ss+s+1]); | ||||
| 	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp; | ||||
|  | ||||
| 	                    spProj5p(tmp,psi._odata[ss+s-1]); | ||||
| 	chi[ss+s]=chi[ss+s]+lower[s]*tmp; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, | ||||
| 				   const FermionField &phi,  | ||||
| 				   FermionField &chi, | ||||
| 				   std::vector<Coeff_t> &lower, | ||||
| 				   std::vector<Coeff_t> &diag, | ||||
| 				   std::vector<Coeff_t> &upper) | ||||
| { | ||||
|   int Ls =this->Ls; | ||||
|   GridBase *grid=psi._grid; | ||||
|   assert(phi.checkerboard == psi.checkerboard); | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls | ||||
|     auto tmp = psi._odata[0]; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==0 ) { | ||||
| 	spProj5p(tmp,psi._odata[ss+s+1]); | ||||
| 	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp; | ||||
|  | ||||
| 	spProj5m(tmp,psi._odata[ss+Ls-1]); | ||||
| 	chi[ss+s]=chi[ss+s]+lower[s]*tmp; | ||||
|       } else if ( s==(Ls-1)) { | ||||
| 	spProj5p(tmp,psi._odata[ss+0]); | ||||
| 	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp; | ||||
|  | ||||
| 	spProj5m(tmp,psi._odata[ss+s-1]); | ||||
| 	chi[ss+s]=chi[ss+s]+lower[s]*tmp; | ||||
|       } else {  | ||||
| 	spProj5p(tmp,psi._odata[ss+s+1]); | ||||
| 	chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp; | ||||
|  | ||||
| 	spProj5m(tmp,psi._odata[ss+s-1]); | ||||
| 	chi[ss+s]=chi[ss+s]+lower[s]*tmp; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   GridBase *grid=psi._grid; | ||||
|   int Ls=this->Ls; | ||||
|  | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls | ||||
|     auto tmp = psi._odata[0]; | ||||
|  | ||||
|     // Apply (L^{\prime})^{-1} | ||||
|     chi[ss]=psi[ss]; // chi[0]=psi[0] | ||||
|     for(int s=1;s<Ls;s++){ | ||||
|                             spProj5p(tmp,chi[ss+s-1]);   | ||||
|       chi[ss+s] = psi[ss+s]-lee[s-1]*tmp; | ||||
|     } | ||||
|     // L_m^{-1}  | ||||
|     for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||
|                                    spProj5m(tmp,chi[ss+s]);     | ||||
|       chi[ss+Ls-1] = chi[ss+Ls-1] - leem[s]*tmp; | ||||
|     } | ||||
|     // U_m^{-1} D^{-1} | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       // Chi[s] + 1/d chi[s]  | ||||
|                                                 spProj5p(tmp,chi[ss+Ls-1]);  | ||||
|       chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(ueem[s]/dee[Ls-1])*tmp; | ||||
|     }	 | ||||
|     chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1]; | ||||
|        | ||||
|     // Apply U^{-1} | ||||
|     for (int s=Ls-2;s>=0;s--){ | ||||
|                             spProj5m(tmp,chi[ss+s+1]);   | ||||
|       chi[ss+s] = chi[ss+s] - uee[s]*tmp; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   GridBase *grid=psi._grid; | ||||
|   int Ls=this->Ls; | ||||
|  | ||||
|   assert(psi.checkerboard == psi.checkerboard); | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|  | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls | ||||
|  | ||||
|     auto tmp = psi._odata[0]; | ||||
|  | ||||
|     // Apply (U^{\prime})^{-dagger} | ||||
|     chi[ss]=psi[ss]; | ||||
|     for (int s=1;s<Ls;s++){ | ||||
|                             spProj5m(tmp,chi[ss+s-1]); | ||||
|       chi[ss+s] = psi[ss+s]-uee[s-1]*tmp; | ||||
|     } | ||||
|     // U_m^{-\dagger}  | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|                                    spProj5p(tmp,chi[ss+s]); | ||||
|       chi[ss+Ls-1] = chi[ss+Ls-1] - ueem[s]*tmp; | ||||
|     } | ||||
|  | ||||
|     // L_m^{-\dagger} D^{-dagger} | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       spProj5m(tmp,chi[ss+Ls-1]); | ||||
|       chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(leem[s]/dee[Ls-1])*tmp; | ||||
|     }	 | ||||
|     chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1]; | ||||
|    | ||||
|     // Apply L^{-dagger} | ||||
|     for (int s=Ls-2;s>=0;s--){ | ||||
|       spProj5p(tmp,chi[ss+s+1]); | ||||
|       chi[ss+s] = chi[ss+s] - lee[s]*tmp; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| #ifdef CAYLEY_DPERP_CACHE | ||||
|   INSTANTIATE_DPERP(WilsonImplF); | ||||
|   INSTANTIATE_DPERP(WilsonImplD); | ||||
|   INSTANTIATE_DPERP(GparityWilsonImplF); | ||||
|   INSTANTIATE_DPERP(GparityWilsonImplD); | ||||
|   INSTANTIATE_DPERP(ZWilsonImplF); | ||||
|   INSTANTIATE_DPERP(ZWilsonImplD); | ||||
| #endif | ||||
|  | ||||
| }} | ||||
							
								
								
									
										133
									
								
								lib/qcd/action/fermion/CayleyFermion5Ddense.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								lib/qcd/action/fermion/CayleyFermion5Ddense.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid/Eigen/Dense> | ||||
| #include <Grid.h> | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|   /* | ||||
|    * Dense matrix versions of routines | ||||
|    */ | ||||
|  | ||||
|   /* | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   this->MooeeInternal(psi,chi,DaggerYes,InverseYes); | ||||
| } | ||||
|    | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   this->MooeeInternal(psi,chi,DaggerNo,InverseYes); | ||||
| } | ||||
|   */ | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   int LLs = psi._grid->_rdimensions[0]; | ||||
|   int vol = psi._grid->oSites()/LLs; | ||||
|    | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|    | ||||
|   assert(Ls==LLs); | ||||
|    | ||||
|   Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls); | ||||
|   Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); | ||||
|    | ||||
|   for(int s=0;s<Ls;s++){ | ||||
|     Pplus(s,s) = bee[s]; | ||||
|     Pminus(s,s)= bee[s]; | ||||
|   } | ||||
|    | ||||
|   for(int s=0;s<Ls-1;s++){ | ||||
|     Pminus(s,s+1) = -cee[s]; | ||||
|   } | ||||
|    | ||||
|   for(int s=0;s<Ls-1;s++){ | ||||
|     Pplus(s+1,s) = -cee[s+1]; | ||||
|   } | ||||
|   Pplus (0,Ls-1) = mass*cee[0]; | ||||
|   Pminus(Ls-1,0) = mass*cee[Ls-1]; | ||||
|    | ||||
|   Eigen::MatrixXd PplusMat ; | ||||
|   Eigen::MatrixXd PminusMat; | ||||
|    | ||||
|   if ( inv ) { | ||||
|     PplusMat =Pplus.inverse(); | ||||
|     PminusMat=Pminus.inverse(); | ||||
|   } else {  | ||||
|     PplusMat =Pplus; | ||||
|     PminusMat=Pminus; | ||||
|   } | ||||
|    | ||||
|   if(dag){ | ||||
|     PplusMat.adjointInPlace(); | ||||
|     PminusMat.adjointInPlace(); | ||||
|   } | ||||
|  | ||||
|   // For the non-vectorised s-direction this is simple | ||||
|    | ||||
|   for(auto site=0;site<vol;site++){ | ||||
|      | ||||
|     SiteSpinor     SiteChi; | ||||
|     SiteHalfSpinor SitePplus; | ||||
|     SiteHalfSpinor SitePminus; | ||||
|      | ||||
|     for(int s1=0;s1<Ls;s1++){ | ||||
|       SiteChi =zero; | ||||
|       for(int s2=0;s2<Ls;s2++){ | ||||
| 	int lex2 = s2+Ls*site; | ||||
| 	 | ||||
| 	if ( PplusMat(s1,s2) != 0.0 ) { | ||||
| 	  spProj5p(SitePplus,psi[lex2]); | ||||
| 	  accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus); | ||||
| 	} | ||||
| 	 | ||||
| 	if ( PminusMat(s1,s2) != 0.0 ) { | ||||
| 	  spProj5m(SitePminus,psi[lex2]); | ||||
| 	  accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus); | ||||
| 	} | ||||
|       } | ||||
|       chi[s1+Ls*site] = SiteChi*0.5; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template void CayleyFermion5D<GparityWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
| template void CayleyFermion5D<GparityWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
| template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
| template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
|  | ||||
| }} | ||||
							
								
								
									
										149
									
								
								lib/qcd/action/fermion/CayleyFermion5Dssp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								lib/qcd/action/fermion/CayleyFermion5Dssp.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,149 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid.h> | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
|   // FIXME -- make a version of these routines with site loop outermost for cache reuse. | ||||
|  | ||||
|   // Pminus fowards | ||||
|   // Pplus  backwards | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5D(const FermionField &psi, | ||||
| 				const FermionField &phi,  | ||||
| 				FermionField &chi, | ||||
| 				std::vector<Coeff_t> &lower, | ||||
| 				std::vector<Coeff_t> &diag, | ||||
| 				std::vector<Coeff_t> &upper) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   for(int s=0;s<Ls;s++){ | ||||
|     if ( s==0 ) { | ||||
|       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1); | ||||
|       axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,Ls-1); | ||||
|     } else if ( s==(Ls-1)) {  | ||||
|       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,0); | ||||
|       axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,s-1); | ||||
|     } else { | ||||
|       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1); | ||||
|       axpby_ssp_pplus(chi,1.0,chi,lower[s],psi,s,s-1); | ||||
|     } | ||||
|   } | ||||
| } | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, | ||||
| 				   const FermionField &phi,  | ||||
| 				   FermionField &chi, | ||||
| 				   std::vector<Coeff_t> &lower, | ||||
| 				   std::vector<Coeff_t> &diag, | ||||
| 				   std::vector<Coeff_t> &upper) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   for(int s=0;s<Ls;s++){ | ||||
|     if ( s==0 ) { | ||||
|       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1); | ||||
|       axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,Ls-1); | ||||
|     } else if ( s==(Ls-1)) {  | ||||
|       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,0); | ||||
|       axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1); | ||||
|     } else { | ||||
|       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1); | ||||
|       axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|   int Ls=this->Ls; | ||||
|   // Apply (L^{\prime})^{-1} | ||||
|   axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0] | ||||
|   for (int s=1;s<Ls;s++){ | ||||
|     axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1] | ||||
|   } | ||||
|   // L_m^{-1}  | ||||
|   for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||
|     axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s); | ||||
|   } | ||||
|   // U_m^{-1} D^{-1} | ||||
|   for (int s=0;s<Ls-1;s++){ | ||||
|     // Chi[s] + 1/d chi[s]  | ||||
|     axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1); | ||||
|   }	 | ||||
|   axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable  | ||||
|    | ||||
|   // Apply U^{-1} | ||||
|   for (int s=Ls-2;s>=0;s--){ | ||||
|     axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1);  // chi[Ls] | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|   int Ls=this->Ls; | ||||
|   // Apply (U^{\prime})^{-dagger} | ||||
|   axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0] | ||||
|   for (int s=1;s<Ls;s++){ | ||||
|     axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1); | ||||
|   } | ||||
|   // U_m^{-\dagger}  | ||||
|   for (int s=0;s<Ls-1;s++){ | ||||
|     axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s); | ||||
|   } | ||||
|   // L_m^{-\dagger} D^{-dagger} | ||||
|   for (int s=0;s<Ls-1;s++){ | ||||
|     axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1); | ||||
|   }	 | ||||
|   axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable  | ||||
|    | ||||
|   // Apply L^{-dagger} | ||||
|   for (int s=Ls-2;s>=0;s--){ | ||||
|     axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1);  // chi[Ls] | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| #ifdef CAYLEY_DPERP_LINALG | ||||
|   INSTANTIATE(WilsonImplF); | ||||
|   INSTANTIATE(WilsonImplD); | ||||
|   INSTANTIATE(GparityWilsonImplF); | ||||
|   INSTANTIATE(GparityWilsonImplD); | ||||
| #endif | ||||
|  | ||||
| } | ||||
| } | ||||
							
								
								
									
										309
									
								
								lib/qcd/action/fermion/CayleyFermion5Dvec.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										309
									
								
								lib/qcd/action/fermion/CayleyFermion5Dvec.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,309 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid/Eigen/Dense> | ||||
| #include <Grid.h> | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|   /* | ||||
|    * Dense matrix versions of routines | ||||
|    */ | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   this->MooeeInternal(psi,chi,DaggerYes,InverseYes); | ||||
| } | ||||
|    | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   this->MooeeInternal(psi,chi,DaggerNo,InverseYes); | ||||
| } | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5D(const FermionField &psi, | ||||
| 				const FermionField &phi,  | ||||
| 				FermionField &chi, | ||||
| 				std::vector<Coeff_t> &lower, | ||||
| 				std::vector<Coeff_t> &diag, | ||||
| 				std::vector<Coeff_t> &upper) | ||||
| { | ||||
|   GridBase *grid=psi._grid; | ||||
|   int Ls   = this->Ls; | ||||
|   int LLs  = grid->_rdimensions[0]; | ||||
|   int nsimd= Simd::Nsimd(); | ||||
|  | ||||
|   Vector<iSinglet<Simd> > u(LLs); | ||||
|   Vector<iSinglet<Simd> > l(LLs); | ||||
|   Vector<iSinglet<Simd> > d(LLs); | ||||
|  | ||||
|   assert(Ls/LLs==nsimd); | ||||
|   assert(phi.checkerboard == psi.checkerboard); | ||||
|  | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|  | ||||
|   // just directly address via type pun | ||||
|   typedef typename Simd::scalar_type scalar_type; | ||||
|   scalar_type * u_p = (scalar_type *)&u[0]; | ||||
|   scalar_type * l_p = (scalar_type *)&l[0]; | ||||
|   scalar_type * d_p = (scalar_type *)&d[0]; | ||||
|  | ||||
|   for(int o=0;o<LLs;o++){ // outer | ||||
|   for(int i=0;i<nsimd;i++){ //inner | ||||
|     int s  = o+i*LLs; | ||||
|     int ss = o*nsimd+i; | ||||
|     u_p[ss] = upper[s]; | ||||
|     l_p[ss] = lower[s]; | ||||
|     d_p[ss] = diag[s]; | ||||
|   }} | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs | ||||
|  | ||||
|     alignas(64) SiteHalfSpinor hp; | ||||
|     alignas(64) SiteHalfSpinor hm; | ||||
|     alignas(64) SiteSpinor fp; | ||||
|     alignas(64) SiteSpinor fm; | ||||
|  | ||||
|     for(int v=0;v<LLs;v++){ | ||||
|  | ||||
|       int vp=(v+1)%LLs; | ||||
|       int vm=(v+LLs-1)%LLs; | ||||
|  | ||||
|       spProj5m(hp,psi[ss+vp]); | ||||
|       spProj5p(hm,psi[ss+vm]); | ||||
|        | ||||
|       if ( vp<=v ) rotate(hp,hp,1); | ||||
|       if ( vm>=v ) rotate(hm,hm,nsimd-1); | ||||
|  | ||||
|       hp=hp*0.5; | ||||
|       hm=hm*0.5; | ||||
|       spRecon5m(fp,hp); | ||||
|       spRecon5p(fm,hm); | ||||
|  | ||||
|       chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; | ||||
|       chi[ss+v] = chi[ss+v]     +l[v]*fm; | ||||
|  | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, | ||||
| 				   const FermionField &phi,  | ||||
| 				   FermionField &chi, | ||||
| 				   std::vector<Coeff_t> &lower, | ||||
| 				   std::vector<Coeff_t> &diag, | ||||
| 				   std::vector<Coeff_t> &upper) | ||||
| { | ||||
|   GridBase *grid=psi._grid; | ||||
|   int Ls   = this->Ls; | ||||
|   int LLs  = grid->_rdimensions[0]; | ||||
|   int nsimd= Simd::Nsimd(); | ||||
|  | ||||
|   Vector<iSinglet<Simd> > u(LLs); | ||||
|   Vector<iSinglet<Simd> > l(LLs); | ||||
|   Vector<iSinglet<Simd> > d(LLs); | ||||
|  | ||||
|   assert(Ls/LLs==nsimd); | ||||
|   assert(phi.checkerboard == psi.checkerboard); | ||||
|  | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|  | ||||
|   // just directly address via type pun | ||||
|   typedef typename Simd::scalar_type scalar_type; | ||||
|   scalar_type * u_p = (scalar_type *)&u[0]; | ||||
|   scalar_type * l_p = (scalar_type *)&l[0]; | ||||
|   scalar_type * d_p = (scalar_type *)&d[0]; | ||||
|  | ||||
|   for(int o=0;o<LLs;o++){ // outer | ||||
|   for(int i=0;i<nsimd;i++){ //inner | ||||
|     int s  = o+i*LLs; | ||||
|     int ss = o*nsimd+i; | ||||
|     u_p[ss] = upper[s]; | ||||
|     l_p[ss] = lower[s]; | ||||
|     d_p[ss] = diag[s]; | ||||
|   }} | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs | ||||
|  | ||||
|     alignas(64) SiteHalfSpinor hp; | ||||
|     alignas(64) SiteHalfSpinor hm; | ||||
|     alignas(64) SiteSpinor fp; | ||||
|     alignas(64) SiteSpinor fm; | ||||
|  | ||||
|     for(int v=0;v<LLs;v++){ | ||||
|  | ||||
|       int vp=(v+1)%LLs; | ||||
|       int vm=(v+LLs-1)%LLs; | ||||
|  | ||||
|       spProj5p(hp,psi[ss+vp]); | ||||
|       spProj5m(hm,psi[ss+vm]); | ||||
|  | ||||
|       if ( vp<=v ) rotate(hp,hp,1); | ||||
|       if ( vm>=v ) rotate(hm,hm,nsimd-1); | ||||
|        | ||||
|       hp=hp*0.5; | ||||
|       hm=hm*0.5; | ||||
|       spRecon5p(fp,hp); | ||||
|       spRecon5m(fm,hm); | ||||
|  | ||||
|       chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; | ||||
|       chi[ss+v] = chi[ss+v]     +l[v]*fm; | ||||
|  | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) | ||||
| { | ||||
|   int Ls=this->Ls; | ||||
|   int LLs = psi._grid->_rdimensions[0]; | ||||
|   int vol = psi._grid->oSites()/LLs; | ||||
|  | ||||
|   chi.checkerboard=psi.checkerboard; | ||||
|    | ||||
|   Eigen::MatrixXcd Pplus  = Eigen::MatrixXcd::Zero(Ls,Ls); | ||||
|   Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); | ||||
|    | ||||
|   for(int s=0;s<Ls;s++){ | ||||
|     Pplus(s,s) = bee[s]; | ||||
|     Pminus(s,s)= bee[s]; | ||||
|   } | ||||
|    | ||||
|   for(int s=0;s<Ls-1;s++){ | ||||
|     Pminus(s,s+1) = -cee[s]; | ||||
|   } | ||||
|    | ||||
|   for(int s=0;s<Ls-1;s++){ | ||||
|     Pplus(s+1,s) = -cee[s+1]; | ||||
|   } | ||||
|   Pplus (0,Ls-1) = mass*cee[0]; | ||||
|   Pminus(Ls-1,0) = mass*cee[Ls-1]; | ||||
|    | ||||
|   Eigen::MatrixXcd PplusMat ; | ||||
|   Eigen::MatrixXcd PminusMat; | ||||
|    | ||||
|   if ( inv ) { | ||||
|     PplusMat =Pplus.inverse(); | ||||
|     PminusMat=Pminus.inverse(); | ||||
|   } else {  | ||||
|     PplusMat =Pplus; | ||||
|     PminusMat=Pminus; | ||||
|   } | ||||
|    | ||||
|   if(dag){ | ||||
|     PplusMat.adjointInPlace(); | ||||
|     PminusMat.adjointInPlace(); | ||||
|   } | ||||
|    | ||||
|   typedef typename SiteHalfSpinor::scalar_type scalar_type; | ||||
|   const int Nsimd=Simd::Nsimd(); | ||||
|   Vector<iSinglet<Simd> > Matp(Ls*LLs); | ||||
|   Vector<iSinglet<Simd> > Matm(Ls*LLs); | ||||
|  | ||||
|   for(int s2=0;s2<Ls;s2++){ | ||||
|   for(int s1=0;s1<LLs;s1++){ | ||||
|     int istride = LLs; | ||||
|     int ostride = 1; | ||||
|       Simd Vp; | ||||
|       Simd Vm; | ||||
|       scalar_type *sp = (scalar_type *)&Vp; | ||||
|       scalar_type *sm = (scalar_type *)&Vm; | ||||
|       for(int l=0;l<Nsimd;l++){ | ||||
| 	sp[l] = PplusMat (l*istride+s1*ostride ,s2); | ||||
| 	sm[l] = PminusMat(l*istride+s1*ostride,s2); | ||||
|       } | ||||
|       Matp[LLs*s2+s1] = Vp; | ||||
|       Matm[LLs*s2+s1] = Vm; | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   // Dynamic allocate on stack to get per thread without serialised heap acces | ||||
| PARALLEL_FOR_LOOP | ||||
|   for(auto site=0;site<vol;site++){ | ||||
|      | ||||
|     //    SiteHalfSpinor *SitePplus =(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor)); | ||||
|     //    SiteHalfSpinor *SitePminus=(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor)); | ||||
|     //    SiteSpinor     *SiteChi   =(SiteSpinor *)     alloca(LLs*sizeof(SiteSpinor)); | ||||
|  | ||||
|     Vector<SiteHalfSpinor> SitePplus(LLs); | ||||
|     Vector<SiteHalfSpinor> SitePminus(LLs); | ||||
|     Vector<SiteHalfSpinor> SiteChiP(LLs); | ||||
|     Vector<SiteHalfSpinor> SiteChiM(LLs); | ||||
|     Vector<SiteSpinor>     SiteChi(LLs); | ||||
|  | ||||
|     SiteHalfSpinor BcastP; | ||||
|     SiteHalfSpinor BcastM; | ||||
|  | ||||
|     for(int s=0;s<LLs;s++){ | ||||
|       int lex = s+LLs*site; | ||||
|       spProj5p(SitePplus[s] ,psi[lex]); | ||||
|       spProj5m(SitePminus[s],psi[lex]); | ||||
|       SiteChiP[s]=zero; | ||||
|       SiteChiM[s]=zero; | ||||
|     } | ||||
|        | ||||
|     int s=0; | ||||
|     for(int  l=0; l<Simd::Nsimd();l++){ // simd lane | ||||
|       for(int s2=0;s2<LLs;s2++){ // Column loop of right hand side | ||||
| 	vbroadcast(BcastP,SitePplus [s2],l); | ||||
| 	vbroadcast(BcastM,SitePminus[s2],l); | ||||
| 	for(int s1=0;s1<LLs;s1++){ // Column loop of reduction variables | ||||
| 	  SiteChiP[s1]=SiteChiP[s1]+Matp[LLs*s+s1]*BcastP; | ||||
| 	  SiteChiM[s1]=SiteChiM[s1]+Matm[LLs*s+s1]*BcastM; | ||||
| 	} | ||||
|       s++; | ||||
|     }} | ||||
|  | ||||
|     for(int s=0;s<LLs;s++){ | ||||
|       int lex = s+LLs*site; | ||||
|       spRecon5p(SiteChi[s],SiteChiP[s]); | ||||
|       accumRecon5m(SiteChi[s],SiteChiM[s]); | ||||
|       chi[lex] = SiteChi[s]*0.5; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| INSTANTIATE_DPERP(DomainWallVec5dImplD); | ||||
| INSTANTIATE_DPERP(DomainWallVec5dImplF); | ||||
| INSTANTIATE_DPERP(ZDomainWallVec5dImplD); | ||||
| INSTANTIATE_DPERP(ZDomainWallVec5dImplF); | ||||
|  | ||||
| template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
| template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
| template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
| template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||
|  | ||||
| }} | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_QCD_DOMAIN_WALL_FERMION_H | ||||
| #define  GRID_QCD_DOMAIN_WALL_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| @@ -63,7 +63,7 @@ namespace Grid { | ||||
| 	Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham | ||||
| 	assert(zdata->n==this->Ls); | ||||
| 	 | ||||
| 	std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl; | ||||
| 	//	std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl; | ||||
| 	// Call base setter | ||||
| 	this->SetCoefficientsTanh(zdata,1.0,0.0); | ||||
|  | ||||
|   | ||||
| @@ -1,35 +1,36 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h | ||||
| Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef  GRID_QCD_FERMION_OPERATOR_IMPL_H | ||||
| #define  GRID_QCD_FERMION_OPERATOR_IMPL_H | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H | ||||
| #define GRID_QCD_FERMION_OPERATOR_IMPL_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| @@ -75,7 +76,7 @@ namespace Grid { | ||||
|     // | ||||
|     // | ||||
|     // template<class Impl> | ||||
|     // class MyOp : pubic<Impl> {  | ||||
|     // class MyOp : public<Impl> {  | ||||
|     // public: | ||||
|     // | ||||
|     //    INHERIT_ALL_IMPL_TYPES(Impl); | ||||
| @@ -99,247 +100,281 @@ namespace Grid { | ||||
|     typedef typename Impl::SiteSpinor               SiteSpinor;		\ | ||||
|     typedef typename Impl::SiteHalfSpinor       SiteHalfSpinor;		\ | ||||
|     typedef typename Impl::Compressor               Compressor;		\ | ||||
|     typedef typename Impl::StencilImpl              StencilImpl;	\ | ||||
|     typedef typename Impl::ImplParams ImplParams; | ||||
|     typedef typename Impl::StencilImpl             StencilImpl;		\ | ||||
|     typedef typename Impl::ImplParams ImplParams;			\ | ||||
|     typedef typename Impl::Coeff_t       Coeff_t; | ||||
|  | ||||
| #define INHERIT_IMPL_TYPES(Base) \ | ||||
|     INHERIT_GIMPL_TYPES(Base)\ | ||||
|     INHERIT_GIMPL_TYPES(Base)	 \ | ||||
|     INHERIT_FIMPL_TYPES(Base) | ||||
|  | ||||
|      | ||||
|     /////// | ||||
|     // Single flavour four spinors with colour index | ||||
|     /////// | ||||
|     template<class S,int Nrepresentation=Nc> | ||||
|     class WilsonImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {  | ||||
|     template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD > | ||||
|     class WilsonImpl | ||||
|       : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > { | ||||
|     public: | ||||
|       static const int Dimension = Representation::Dimension; | ||||
|       typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl; | ||||
|        | ||||
|       //Necessary? | ||||
|       constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} | ||||
|  | ||||
|       const bool LsVectorised=false; | ||||
|       typedef _Coeff_t Coeff_t; | ||||
|  | ||||
|       typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; | ||||
|  | ||||
|       INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|       template<typename vtype> using iImplSpinor             = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; | ||||
|       template<typename vtype> using iImplHalfSpinor         = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; | ||||
|       template<typename vtype> using iImplDoubledGaugeField  = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >; | ||||
|      | ||||
|       typedef iImplSpinor    <Simd>           SiteSpinor; | ||||
|       typedef iImplHalfSpinor<Simd>           SiteHalfSpinor; | ||||
|       typedef iImplDoubledGaugeField<Simd>    SiteDoubledGaugeField; | ||||
|  | ||||
|       typedef Lattice<SiteSpinor>                 FermionField; | ||||
|        | ||||
|       template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Dimension>, Ns> >; | ||||
|       template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >; | ||||
|       template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; | ||||
|        | ||||
|       typedef iImplSpinor<Simd>            SiteSpinor; | ||||
|       typedef iImplHalfSpinor<Simd>        SiteHalfSpinor; | ||||
|       typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; | ||||
|        | ||||
|       typedef Lattice<SiteSpinor>            FermionField; | ||||
|       typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||
|  | ||||
|       typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor; | ||||
|        | ||||
|       typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; | ||||
|       typedef WilsonImplParams ImplParams; | ||||
|       typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl; | ||||
|  | ||||
|       typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; | ||||
|        | ||||
|       ImplParams Params; | ||||
|  | ||||
|       WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};  | ||||
|  | ||||
|        | ||||
|       WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; | ||||
|        | ||||
|       bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; | ||||
|      | ||||
|       inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ | ||||
|         mult(&phi(),&U(mu),&chi()); | ||||
|       } | ||||
|  | ||||
|       template<class ref> | ||||
|       inline void loadLinkElement(Simd & reg,ref &memory){ | ||||
| 	reg = memory; | ||||
|       } | ||||
|       inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) | ||||
|       { | ||||
|         conformable(Uds._grid,GaugeGrid); | ||||
|         conformable(Umu._grid,GaugeGrid); | ||||
|         GaugeLinkField U(GaugeGrid); | ||||
|         for(int mu=0;mu<Nd;mu++){ | ||||
|   	  U = PeekIndex<LorentzIndex>(Umu,mu); | ||||
| 	  PokeIndex<LorentzIndex>(Uds,U,mu); | ||||
| 	  U = adj(Cshift(U,mu,-1)); | ||||
| 	  PokeIndex<LorentzIndex>(Uds,U,mu+4); | ||||
| 	} | ||||
|        | ||||
|       inline void multLink(SiteHalfSpinor &phi, | ||||
| 			   const SiteDoubledGaugeField &U, | ||||
| 			   const SiteHalfSpinor &chi, | ||||
| 			   int mu, | ||||
| 			   StencilEntry *SE, | ||||
| 			   StencilImpl &St) { | ||||
| 	mult(&phi(), &U(mu), &chi()); | ||||
|       } | ||||
|        | ||||
|       template <class ref> | ||||
|       inline void loadLinkElement(Simd ®, | ||||
| 				  ref &memory) { | ||||
| 	reg = memory; | ||||
|       } | ||||
|        | ||||
|       inline void DoubleStore(GridBase *GaugeGrid, | ||||
| 			      DoubledGaugeField &Uds, | ||||
| 			      const GaugeField &Umu) { | ||||
| 	conformable(Uds._grid, GaugeGrid); | ||||
| 	conformable(Umu._grid, GaugeGrid); | ||||
| 	GaugeLinkField U(GaugeGrid); | ||||
| 	for (int mu = 0; mu < Nd; mu++) { | ||||
| 	  U = PeekIndex<LorentzIndex>(Umu, mu); | ||||
| 	  PokeIndex<LorentzIndex>(Uds, U, mu); | ||||
| 	  U = adj(Cshift(U, mu, -1)); | ||||
| 	  PokeIndex<LorentzIndex>(Uds, U, mu + 4); | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ | ||||
| 	GaugeLinkField link(mat._grid); | ||||
| 	link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));  | ||||
| 	PokeIndex<LorentzIndex>(mat,link,mu); | ||||
|       }    | ||||
|  | ||||
|        | ||||
|       inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ | ||||
|  | ||||
| 	 | ||||
| 	int Ls=Btilde._grid->_fdimensions[0]; | ||||
|  | ||||
| 	GaugeLinkField tmp(mat._grid); | ||||
| 	tmp = zero; | ||||
| PARALLEL_FOR_LOOP | ||||
| 	for(int sss=0;sss<tmp._grid->oSites();sss++){ | ||||
| 	  int sU=sss; | ||||
| 	  for(int s=0;s<Ls;s++){ | ||||
| 	    int sF = s+Ls*sU; | ||||
| 	    tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here | ||||
|  | ||||
|         PARALLEL_FOR_LOOP | ||||
| 	  for(int sss=0;sss<tmp._grid->oSites();sss++){ | ||||
| 	    int sU=sss; | ||||
| 	    for(int s=0;s<Ls;s++){ | ||||
| 	      int sF = s+Ls*sU; | ||||
| 	      tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here | ||||
| 	    } | ||||
| 	  } | ||||
| 	} | ||||
| 	PokeIndex<LorentzIndex>(mat,tmp,mu); | ||||
| 	 | ||||
|  | ||||
|       } | ||||
|  | ||||
|     }; | ||||
|  | ||||
|  | ||||
|  | ||||
|     /////// | ||||
|     // Single flavour four spinors with colour index, 5d redblack | ||||
|     /////// | ||||
|     template<class S,int Nrepresentation=Nc> | ||||
|     class DomainWallRedBlack5dImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {  | ||||
|     template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD> | ||||
|     class DomainWallVec5dImpl :  public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {  | ||||
|     public: | ||||
|  | ||||
|       typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; | ||||
|        | ||||
|       static const int Dimension = Nrepresentation; | ||||
|       const bool LsVectorised=true; | ||||
|       typedef _Coeff_t Coeff_t;       | ||||
|       typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl; | ||||
|  | ||||
|       INHERIT_GIMPL_TYPES(Gimpl); | ||||
|        | ||||
|       template<typename vtype> using iImplSpinor             = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; | ||||
|       template<typename vtype> using iImplHalfSpinor         = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; | ||||
|       template<typename vtype> using iImplDoubledGaugeField  = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >; | ||||
|       template<typename vtype> using iImplGaugeField         = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >; | ||||
|       template<typename vtype> using iImplGaugeLink          = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; | ||||
|      | ||||
|       typedef iImplSpinor    <Simd>           SiteSpinor; | ||||
|       typedef iImplHalfSpinor<Simd>           SiteHalfSpinor; | ||||
|       typedef Lattice<SiteSpinor>             FermionField; | ||||
|  | ||||
|       template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; | ||||
|       template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; | ||||
|       template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>; | ||||
|       template <typename vtype> using iImplGaugeField        = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>; | ||||
|       template <typename vtype> using iImplGaugeLink         = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; | ||||
|        | ||||
|       typedef iImplSpinor<Simd> SiteSpinor; | ||||
|       typedef iImplHalfSpinor<Simd> SiteHalfSpinor; | ||||
|       typedef Lattice<SiteSpinor> FermionField; | ||||
|        | ||||
|       // Make the doubled gauge field a *scalar* | ||||
|       typedef iImplDoubledGaugeField<typename Simd::scalar_type>    SiteDoubledGaugeField; // This is a scalar | ||||
|       typedef iImplGaugeField<typename Simd::scalar_type>           SiteScalarGaugeField;  // scalar | ||||
|       typedef iImplGaugeLink <typename Simd::scalar_type>           SiteScalarGaugeLink;   // scalar | ||||
|  | ||||
|       typedef Lattice<SiteDoubledGaugeField>                  DoubledGaugeField; | ||||
|  | ||||
|       typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor; | ||||
|       typedef iImplDoubledGaugeField<typename Simd::scalar_type> | ||||
|       SiteDoubledGaugeField;  // This is a scalar | ||||
|       typedef iImplGaugeField<typename Simd::scalar_type> | ||||
|       SiteScalarGaugeField;  // scalar | ||||
|       typedef iImplGaugeLink<typename Simd::scalar_type> | ||||
|       SiteScalarGaugeLink;  // scalar | ||||
|        | ||||
|       typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||
|        | ||||
|       typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; | ||||
|       typedef WilsonImplParams ImplParams; | ||||
|       typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl; | ||||
|  | ||||
|       typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; | ||||
|        | ||||
|       ImplParams Params; | ||||
|  | ||||
|       DomainWallRedBlack5dImpl(const ImplParams &p= ImplParams()) : Params(p) {};  | ||||
|  | ||||
|        | ||||
|       DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; | ||||
|        | ||||
|       bool overlapCommsCompute(void) { return false; }; | ||||
|      | ||||
|       template<class ref> | ||||
|       inline void loadLinkElement(Simd & reg,ref &memory){ | ||||
| 	vsplat(reg,memory); | ||||
|        | ||||
|       template <class ref> | ||||
|       inline void loadLinkElement(Simd ®, ref &memory) { | ||||
| 	vsplat(reg, memory); | ||||
|       } | ||||
|       inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) | ||||
|       { | ||||
|       inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, | ||||
| 			   const SiteHalfSpinor &chi, int mu, StencilEntry *SE, | ||||
| 			   StencilImpl &St) { | ||||
| 	SiteGaugeLink UU; | ||||
| 	for(int i=0;i<Nrepresentation;i++){ | ||||
| 	  for(int j=0;j<Nrepresentation;j++){ | ||||
| 	    vsplat(UU()()(i,j),U(mu)()(i,j)); | ||||
| 	for (int i = 0; i < Nrepresentation; i++) { | ||||
| 	  for (int j = 0; j < Nrepresentation; j++) { | ||||
| 	    vsplat(UU()()(i, j), U(mu)()(i, j)); | ||||
| 	  } | ||||
| 	} | ||||
|         mult(&phi(),&UU(),&chi()); | ||||
| 	mult(&phi(), &UU(), &chi()); | ||||
|       } | ||||
|  | ||||
|       inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) | ||||
|       { | ||||
| 	SiteScalarGaugeField  ScalarUmu; | ||||
|        | ||||
|       inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds, | ||||
| 			      const GaugeField &Umu) { | ||||
| 	SiteScalarGaugeField ScalarUmu; | ||||
| 	SiteDoubledGaugeField ScalarUds; | ||||
|  | ||||
|         GaugeLinkField U   (Umu._grid); | ||||
| 	GaugeField     Uadj(Umu._grid); | ||||
|         for(int mu=0;mu<Nd;mu++){ | ||||
|   	  U = PeekIndex<LorentzIndex>(Umu,mu); | ||||
| 	  U = adj(Cshift(U,mu,-1)); | ||||
| 	  PokeIndex<LorentzIndex>(Uadj,U,mu); | ||||
| 	} | ||||
|  | ||||
| 	for(int lidx=0;lidx<GaugeGrid->lSites();lidx++){ | ||||
| 	  std::vector<int> lcoor; | ||||
| 	  GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor); | ||||
|  | ||||
| 	  peekLocalSite(ScalarUmu,Umu,lcoor); | ||||
| 	  for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu); | ||||
|  | ||||
| 	  peekLocalSite(ScalarUmu,Uadj,lcoor); | ||||
| 	  for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu); | ||||
|  | ||||
| 	  pokeLocalSite(ScalarUds,Uds,lcoor); | ||||
| 	} | ||||
|  | ||||
|       } | ||||
| 	 | ||||
|       inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ | ||||
| 	assert(0); | ||||
|       }    | ||||
|  | ||||
|       inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ | ||||
| 	GaugeLinkField U(Umu._grid); | ||||
| 	GaugeField Uadj(Umu._grid); | ||||
| 	for (int mu = 0; mu < Nd; mu++) { | ||||
| 	  U = PeekIndex<LorentzIndex>(Umu, mu); | ||||
| 	  U = adj(Cshift(U, mu, -1)); | ||||
| 	  PokeIndex<LorentzIndex>(Uadj, U, mu); | ||||
| 	} | ||||
| 	 | ||||
| 	for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { | ||||
| 	  std::vector<int> lcoor; | ||||
| 	  GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); | ||||
| 	   | ||||
| 	  peekLocalSite(ScalarUmu, Umu, lcoor); | ||||
| 	  for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); | ||||
| 	   | ||||
| 	  peekLocalSite(ScalarUmu, Uadj, lcoor); | ||||
| 	  for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); | ||||
| 	   | ||||
| 	  pokeLocalSite(ScalarUds, Uds, lcoor); | ||||
| 	} | ||||
|       } | ||||
|        | ||||
|       inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, | ||||
| 				FermionField &A, int mu) { | ||||
| 	assert(0); | ||||
|       } | ||||
|        | ||||
|       inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, | ||||
| 				FermionField Ã, int mu) { | ||||
| 	assert(0); | ||||
|       } | ||||
|  | ||||
|     }; | ||||
|  | ||||
|  | ||||
|      | ||||
|     //////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Flavour doubled spinors; is Gparity the only? what about C*? | ||||
|     //////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     template<class S,int Nrepresentation> | ||||
|     class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{  | ||||
|     public: | ||||
|  | ||||
|       typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl; | ||||
|  | ||||
|       INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|       template<typename vtype> using iImplSpinor             = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >; | ||||
|       template<typename vtype> using iImplHalfSpinor         = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >; | ||||
|       template<typename vtype> using iImplDoubledGaugeField  = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >; | ||||
|      | ||||
|       typedef iImplSpinor    <Simd>           SiteSpinor; | ||||
|       typedef iImplHalfSpinor<Simd>           SiteHalfSpinor; | ||||
|       typedef iImplDoubledGaugeField<Simd>    SiteDoubledGaugeField; | ||||
|     template <class S, int Nrepresentation,class _Coeff_t = RealD> | ||||
|     class GparityWilsonImpl | ||||
|       : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > { | ||||
|     public: | ||||
|       static const int Dimension = Nrepresentation; | ||||
|  | ||||
|       typedef Lattice<SiteSpinor>                 FermionField; | ||||
|       const bool LsVectorised=false; | ||||
|  | ||||
|       typedef _Coeff_t Coeff_t; | ||||
|       typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl; | ||||
|        | ||||
|       INHERIT_GIMPL_TYPES(Gimpl); | ||||
|        | ||||
|       template <typename vtype> | ||||
|       using iImplSpinor = | ||||
|       iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>; | ||||
|       template <typename vtype> | ||||
|       using iImplHalfSpinor = | ||||
| 	iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>; | ||||
|       template <typename vtype> | ||||
|       using iImplDoubledGaugeField = | ||||
| 	iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>; | ||||
|        | ||||
|       typedef iImplSpinor<Simd> SiteSpinor; | ||||
|       typedef iImplHalfSpinor<Simd> SiteHalfSpinor; | ||||
|       typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; | ||||
|        | ||||
|       typedef Lattice<SiteSpinor> FermionField; | ||||
|       typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||
|  | ||||
|       typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor; | ||||
|       typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl; | ||||
|        | ||||
|       typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; | ||||
|       typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; | ||||
|  | ||||
|       typedef GparityWilsonImplParams ImplParams; | ||||
|  | ||||
|        | ||||
|       ImplParams Params; | ||||
|  | ||||
|       GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};  | ||||
|        | ||||
|  | ||||
|       GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; | ||||
|  | ||||
|       bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; | ||||
|  | ||||
|       // provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity | ||||
|       inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ | ||||
|  | ||||
|       // provide the multiply by link that is differentiated between Gparity (with | ||||
|       // flavour index) and non-Gparity | ||||
|       inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, | ||||
| 			   const SiteHalfSpinor &chi, int mu, StencilEntry *SE, | ||||
| 			   StencilImpl &St) { | ||||
| 	typedef SiteHalfSpinor vobj; | ||||
| 	typedef typename SiteHalfSpinor::scalar_object sobj; | ||||
|  | ||||
| 	 | ||||
| 	vobj vtmp; | ||||
| 	sobj stmp; | ||||
| 	 | ||||
| 	GridBase *grid = St._grid; | ||||
|        | ||||
| 	 | ||||
| 	const int Nsimd = grid->Nsimd(); | ||||
| 	 | ||||
| 	int direction    = St._directions[mu]; | ||||
| 	int distance     = St._distances[mu]; | ||||
| 	int ptype        = St._permute_type[mu];  | ||||
| 	int sl           = St._grid->_simd_layout[direction]; | ||||
|  | ||||
| 	int direction = St._directions[mu]; | ||||
| 	int distance = St._distances[mu]; | ||||
| 	int ptype = St._permute_type[mu]; | ||||
| 	int sl = St._grid->_simd_layout[direction]; | ||||
| 	 | ||||
| 	// Fixme X.Y.Z.T hardcode in stencil | ||||
| 	int mmu          = mu % Nd; | ||||
|  | ||||
| 	int mmu = mu % Nd; | ||||
| 	 | ||||
| 	// assert our assumptions | ||||
| 	assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code | ||||
| 	assert((sl==1)||(sl==2)); | ||||
| 	assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code | ||||
| 	assert((sl == 1) || (sl == 2)); | ||||
| 	 | ||||
| 	std::vector<int> icoor; | ||||
|        | ||||
| 	 | ||||
| 	if ( SE->_around_the_world && Params.twists[mmu] ) { | ||||
|  | ||||
| 	  if ( sl == 2 ) { | ||||
| @@ -380,7 +415,7 @@ PARALLEL_FOR_LOOP | ||||
| 	  mult(&phi(1),&U(1)(mu),&chi(1)); | ||||
| 	} | ||||
| 	 | ||||
|       } | ||||
|   } | ||||
|  | ||||
|       inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) | ||||
|       { | ||||
| @@ -393,7 +428,7 @@ PARALLEL_FOR_LOOP | ||||
| 	GaugeLinkField Uconj(GaugeGrid); | ||||
| 	 | ||||
| 	Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||
|  | ||||
| 	 | ||||
| 	 | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	   | ||||
| @@ -401,19 +436,19 @@ PARALLEL_FOR_LOOP | ||||
| 	   | ||||
| 	  U     = PeekIndex<LorentzIndex>(Umu,mu); | ||||
| 	  Uconj = conjugate(U); | ||||
|  | ||||
| 	   | ||||
| 	  // This phase could come from a simple bc 1,1,-1,1 .. | ||||
| 	  int neglink = GaugeGrid->GlobalDimensions()[mu]-1; | ||||
| 	  if ( Params.twists[mu] ) {  | ||||
| 	    Uconj = where(coor==neglink,-Uconj,Uconj); | ||||
| 	  } | ||||
|  | ||||
| 	   | ||||
| PARALLEL_FOR_LOOP | ||||
| 	  for(auto ss=U.begin();ss<U.end();ss++){ | ||||
| 	    Uds[ss](0)(mu) = U[ss](); | ||||
| 	    Uds[ss](1)(mu) = Uconj[ss](); | ||||
| 	  } | ||||
| 	   | ||||
| 	  PARALLEL_FOR_LOOP | ||||
| 	    for(auto ss=U.begin();ss<U.end();ss++){ | ||||
| 	      Uds[ss](0)(mu) = U[ss](); | ||||
| 	      Uds[ss](1)(mu) = Uconj[ss](); | ||||
| 	    } | ||||
| 	   | ||||
| 	  U     = adj(Cshift(U    ,mu,-1));      // correct except for spanning the boundary | ||||
| 	  Uconj = adj(Cshift(Uconj,mu,-1)); | ||||
| @@ -423,68 +458,86 @@ PARALLEL_FOR_LOOP | ||||
| 	    Utmp = where(coor==0,Uconj,Utmp); | ||||
| 	  } | ||||
| 	   | ||||
| PARALLEL_FOR_LOOP | ||||
| 	  for(auto ss=U.begin();ss<U.end();ss++){ | ||||
| 	    Uds[ss](0)(mu+4) = Utmp[ss](); | ||||
| 	  } | ||||
| 	  PARALLEL_FOR_LOOP | ||||
| 	    for(auto ss=U.begin();ss<U.end();ss++){ | ||||
| 	      Uds[ss](0)(mu+4) = Utmp[ss](); | ||||
| 	    } | ||||
| 	   | ||||
| 	  Utmp = Uconj; | ||||
| 	  if ( Params.twists[mu] ) {  | ||||
| 	    Utmp = where(coor==0,U,Utmp); | ||||
| 	  } | ||||
| 	   | ||||
| PARALLEL_FOR_LOOP | ||||
| 	  for(auto ss=U.begin();ss<U.end();ss++){ | ||||
| 	    Uds[ss](1)(mu+4) = Utmp[ss](); | ||||
| 	  } | ||||
| 	  PARALLEL_FOR_LOOP | ||||
| 	    for(auto ss=U.begin();ss<U.end();ss++){ | ||||
| 	      Uds[ss](1)(mu+4) = Utmp[ss](); | ||||
| 	    } | ||||
| 	   | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ | ||||
| 	 | ||||
|        | ||||
|        | ||||
|       inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, | ||||
| 				FermionField &A, int mu) { | ||||
| 	// DhopDir provides U or Uconj depending on coor/flavour. | ||||
| 	GaugeLinkField link(mat._grid); | ||||
| 	// use lorentz for flavour as hack. | ||||
| 	auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde,A));   | ||||
| PARALLEL_FOR_LOOP | ||||
|         for(auto ss=tmp.begin();ss<tmp.end();ss++){ | ||||
| 	  link[ss]() = tmp[ss](0,0) - conjugate(tmp[ss](1,1)) ; | ||||
| 	} | ||||
| 	PokeIndex<LorentzIndex>(mat,link,mu); | ||||
| 	auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A)); | ||||
| 	PARALLEL_FOR_LOOP | ||||
| 	  for (auto ss = tmp.begin(); ss < tmp.end(); ss++) { | ||||
| 	    link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1)); | ||||
| 	  } | ||||
| 	PokeIndex<LorentzIndex>(mat, link, mu); | ||||
| 	return; | ||||
|       } | ||||
|       inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ | ||||
|  | ||||
| 	int Ls=Btilde._grid->_fdimensions[0]; | ||||
|  | ||||
|        | ||||
|       inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, | ||||
| 				FermionField Ã, int mu) { | ||||
| 	int Ls = Btilde._grid->_fdimensions[0]; | ||||
| 	 | ||||
| 	GaugeLinkField tmp(mat._grid); | ||||
| 	tmp = zero; | ||||
| PARALLEL_FOR_LOOP | ||||
| 	for(int ss=0;ss<tmp._grid->oSites();ss++){ | ||||
| 	  for(int s=0;s<Ls;s++){ | ||||
| 	    int sF = s+Ls*ss; | ||||
| 	    auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); | ||||
| 	    tmp[ss]() = tmp[ss]()+ ttmp(0,0) + conjugate(ttmp(1,1)); | ||||
| 	PARALLEL_FOR_LOOP | ||||
| 	  for (int ss = 0; ss < tmp._grid->oSites(); ss++) { | ||||
| 	    for (int s = 0; s < Ls; s++) { | ||||
| 	      int sF = s + Ls * ss; | ||||
| 	      auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])); | ||||
| 	      tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); | ||||
| 	    } | ||||
| 	  } | ||||
| 	} | ||||
| 	PokeIndex<LorentzIndex>(mat,tmp,mu); | ||||
| 	PokeIndex<LorentzIndex>(mat, tmp, mu); | ||||
| 	return; | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     typedef WilsonImpl<vComplex ,Nc> WilsonImplR; // Real.. whichever prec | ||||
|     typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float | ||||
|     typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double | ||||
|     typedef WilsonImpl<vComplex,  FundamentalRepresentation > WilsonImplR;   // Real.. whichever prec | ||||
|     typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF;  // Float | ||||
|     typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD;  // Double | ||||
|  | ||||
|     typedef DomainWallRedBlack5dImpl<vComplex ,Nc> DomainWallRedBlack5dImplR; // Real.. whichever prec | ||||
|     typedef DomainWallRedBlack5dImpl<vComplexF,Nc> DomainWallRedBlack5dImplF; // Float | ||||
|     typedef DomainWallRedBlack5dImpl<vComplexD,Nc> DomainWallRedBlack5dImplD; // Double | ||||
|  | ||||
|     typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec | ||||
|     typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float | ||||
|     typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double | ||||
|     typedef WilsonImpl<vComplex,  FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec | ||||
|     typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float | ||||
|     typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double | ||||
|  | ||||
|   } | ||||
|     typedef WilsonImpl<vComplex,  AdjointRepresentation > WilsonAdjImplR;   // Real.. whichever prec | ||||
|     typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF;  // Float | ||||
|     typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD;  // Double | ||||
|  | ||||
|     typedef WilsonImpl<vComplex,  TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR;   // Real.. whichever prec | ||||
|     typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF;  // Float | ||||
|     typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD;  // Double | ||||
|  | ||||
|     typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec | ||||
|     typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float | ||||
|     typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double | ||||
|      | ||||
|     typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec | ||||
|     typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float | ||||
|     typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double | ||||
|  | ||||
|     typedef GparityWilsonImpl<vComplex, Nc>  GparityWilsonImplR;  // Real.. whichever prec | ||||
|     typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF;  // Float | ||||
|     typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD;  // Double | ||||
| } | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_QCD_MOBIUS_FERMION_H | ||||
| #define  GRID_QCD_MOBIUS_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H | ||||
| #define  GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H | ||||
| #define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H | ||||
| #define  OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H | ||||
| #define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H | ||||
| #define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H | ||||
| #define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H | ||||
| #define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_QCD_SCALED_SHAMIR_FERMION_H | ||||
| #define  GRID_QCD_SCALED_SHAMIR_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H | ||||
| #define  GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H | ||||
|  | ||||
| #include <Grid.h> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   | ||||
| @@ -1,337 +1,315 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/WilsonFermion.cc | ||||
| Source file: ./lib/qcd/action/fermion/WilsonFermion.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
|   const std::vector<int> WilsonFermionStatic::directions   ({0,1,2,3, 0, 1, 2, 3}); | ||||
|   const std::vector<int> WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1}); | ||||
|   int WilsonFermionStatic::HandOptDslash; | ||||
| const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, | ||||
|                                                         3}); | ||||
| const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, | ||||
|                                                            -1, -1}); | ||||
| int WilsonFermionStatic::HandOptDslash; | ||||
|  | ||||
|   ///////////////////////////////// | ||||
|   // Constructor and gauge import | ||||
|   ///////////////////////////////// | ||||
| ///////////////////////////////// | ||||
| // Constructor and gauge import | ||||
| ///////////////////////////////// | ||||
|  | ||||
|   template<class Impl> | ||||
|   WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, | ||||
| 				     GridCartesian         &Fgrid, | ||||
| 				     GridRedBlackCartesian &Hgrid,  | ||||
| 				     RealD _mass,const ImplParams &p) : | ||||
|         Kernels(p), | ||||
|         _grid(&Fgrid), | ||||
| 	_cbgrid(&Hgrid), | ||||
| 	Stencil    (&Fgrid,npoint,Even,directions,displacements), | ||||
| 	StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even | ||||
| 	StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd | ||||
| 	mass(_mass), | ||||
| 	Umu(&Fgrid), | ||||
| 	UmuEven(&Hgrid), | ||||
| 	UmuOdd (&Hgrid)  | ||||
|   { | ||||
|     // Allocate the required comms buffer | ||||
|     ImportGauge(_Umu); | ||||
| template <class Impl> | ||||
| WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, | ||||
|                                    GridRedBlackCartesian &Hgrid, RealD _mass, | ||||
|                                    const ImplParams &p) | ||||
|     : Kernels(p), | ||||
|       _grid(&Fgrid), | ||||
|       _cbgrid(&Hgrid), | ||||
|       Stencil(&Fgrid, npoint, Even, directions, displacements), | ||||
|       StencilEven(&Hgrid, npoint, Even, directions, | ||||
|                   displacements),  // source is Even | ||||
|       StencilOdd(&Hgrid, npoint, Odd, directions, | ||||
|                  displacements),  // source is Odd | ||||
|       mass(_mass), | ||||
|       Lebesgue(_grid), | ||||
|       LebesgueEvenOdd(_cbgrid), | ||||
|       Umu(&Fgrid), | ||||
|       UmuEven(&Hgrid), | ||||
|       UmuOdd(&Hgrid) { | ||||
|   // Allocate the required comms buffer | ||||
|   ImportGauge(_Umu); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) { | ||||
|   GaugeField HUmu(_Umu._grid); | ||||
|   HUmu = _Umu * (-0.5); | ||||
|   Impl::DoubleStore(GaugeGrid(), Umu, HUmu); | ||||
|   pickCheckerboard(Even, UmuEven, Umu); | ||||
|   pickCheckerboard(Odd, UmuOdd, Umu); | ||||
| } | ||||
|  | ||||
| ///////////////////////////// | ||||
| // Implement the interface | ||||
| ///////////////////////////// | ||||
|  | ||||
| template <class Impl> | ||||
| RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) { | ||||
|   out.checkerboard = in.checkerboard; | ||||
|   Dhop(in, out, DaggerNo); | ||||
|   return axpy_norm(out, 4 + mass, in, out); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) { | ||||
|   out.checkerboard = in.checkerboard; | ||||
|   Dhop(in, out, DaggerYes); | ||||
|   return axpy_norm(out, 4 + mass, in, out); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) { | ||||
|   if (in.checkerboard == Odd) { | ||||
|     DhopEO(in, out, DaggerNo); | ||||
|   } else { | ||||
|     DhopOE(in, out, DaggerNo); | ||||
|   } | ||||
|  | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) | ||||
|   { | ||||
|     GaugeField HUmu(_Umu._grid); | ||||
|     HUmu = _Umu*(-0.5); | ||||
|     Impl::DoubleStore(GaugeGrid(),Umu,HUmu); | ||||
|     pickCheckerboard(Even,UmuEven,Umu); | ||||
|     pickCheckerboard(Odd ,UmuOdd,Umu); | ||||
|   } | ||||
|    | ||||
|   ///////////////////////////// | ||||
|   // Implement the interface | ||||
|   ///////////////////////////// | ||||
|        | ||||
|   template<class Impl> | ||||
|   RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)  | ||||
|   { | ||||
|     out.checkerboard=in.checkerboard; | ||||
|     Dhop(in,out,DaggerNo); | ||||
|     return axpy_norm(out,4+mass,in,out); | ||||
| } | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) { | ||||
|   if (in.checkerboard == Odd) { | ||||
|     DhopEO(in, out, DaggerYes); | ||||
|   } else { | ||||
|     DhopOE(in, out, DaggerYes); | ||||
|   } | ||||
| } | ||||
|  | ||||
|   template<class Impl> | ||||
|   RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)  | ||||
|   { | ||||
|     out.checkerboard=in.checkerboard; | ||||
|     Dhop(in,out,DaggerYes); | ||||
|     return axpy_norm(out,4+mass,in,out); | ||||
|   } | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) { | ||||
|   out.checkerboard = in.checkerboard; | ||||
|   typename FermionField::scalar_type scal(4.0 + mass); | ||||
|   out = scal * in; | ||||
| } | ||||
|  | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)  | ||||
|   { | ||||
|     if ( in.checkerboard == Odd ) { | ||||
|       DhopEO(in,out,DaggerNo); | ||||
|     } else { | ||||
|       DhopOE(in,out,DaggerNo); | ||||
|     } | ||||
|   } | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)  | ||||
|   { | ||||
|     if ( in.checkerboard == Odd ) { | ||||
|       DhopEO(in,out,DaggerYes); | ||||
|     } else { | ||||
|       DhopOE(in,out,DaggerYes); | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) { | ||||
|   out.checkerboard = in.checkerboard; | ||||
|   Mooee(in, out); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) { | ||||
|   out.checkerboard = in.checkerboard; | ||||
|   out = (1.0 / (4.0 + mass)) * in; | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, | ||||
|                                       FermionField &out) { | ||||
|   out.checkerboard = in.checkerboard; | ||||
|   MooeeInv(in, out); | ||||
| } | ||||
|  | ||||
| /////////////////////////////////// | ||||
| // Internal | ||||
| /////////////////////////////////// | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U, | ||||
|                                         GaugeField &mat, const FermionField &A, | ||||
|                                         const FermionField &B, int dag) { | ||||
|   assert((dag == DaggerNo) || (dag == DaggerYes)); | ||||
|  | ||||
|   Compressor compressor(dag); | ||||
|  | ||||
|   FermionField Btilde(B._grid); | ||||
|   FermionField Atilde(B._grid); | ||||
|   Atilde = A; | ||||
|  | ||||
|   st.HaloExchange(B, compressor); | ||||
|  | ||||
|   for (int mu = 0; mu < Nd; mu++) { | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     // Flip gamma (1+g)<->(1-g) if dag | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     int gamma = mu; | ||||
|     if (!dag) gamma += Nd; | ||||
|  | ||||
|     //////////////////////// | ||||
|     // Call the single hop | ||||
|     //////////////////////// | ||||
|     PARALLEL_FOR_LOOP | ||||
|     for (int sss = 0; sss < B._grid->oSites(); sss++) { | ||||
|       Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu, | ||||
|                                gamma); | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////////////////// | ||||
|     // spin trace outer product | ||||
|     ////////////////////////////////////////////////// | ||||
|     Impl::InsertForce4D(mat, Btilde, Atilde, mu); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, | ||||
|                                     const FermionField &V, int dag) { | ||||
|   conformable(U._grid, _grid); | ||||
|   conformable(U._grid, V._grid); | ||||
|   conformable(U._grid, mat._grid); | ||||
|  | ||||
|   mat.checkerboard = U.checkerboard; | ||||
|  | ||||
|   DerivInternal(Stencil, Umu, mat, U, V, dag); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, | ||||
|                                       const FermionField &V, int dag) { | ||||
|   conformable(U._grid, _cbgrid); | ||||
|   conformable(U._grid, V._grid); | ||||
|   conformable(U._grid, mat._grid); | ||||
|  | ||||
|   assert(V.checkerboard == Even); | ||||
|   assert(U.checkerboard == Odd); | ||||
|   mat.checkerboard = Odd; | ||||
|  | ||||
|   DerivInternal(StencilEven, UmuOdd, mat, U, V, dag); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, | ||||
|                                       const FermionField &V, int dag) { | ||||
|   conformable(U._grid, _cbgrid); | ||||
|   conformable(U._grid, V._grid); | ||||
|   conformable(U._grid, mat._grid); | ||||
|  | ||||
|   assert(V.checkerboard == Odd); | ||||
|   assert(U.checkerboard == Even); | ||||
|   mat.checkerboard = Even; | ||||
|  | ||||
|   DerivInternal(StencilOdd, UmuEven, mat, U, V, dag); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, | ||||
|                                int dag) { | ||||
|   conformable(in._grid, _grid);  // verifies full grid | ||||
|   conformable(in._grid, out._grid); | ||||
|  | ||||
|   out.checkerboard = in.checkerboard; | ||||
|  | ||||
|   DhopInternal(Stencil, Lebesgue, Umu, in, out, dag); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, | ||||
|                                  int dag) { | ||||
|   conformable(in._grid, _cbgrid);    // verifies half grid | ||||
|   conformable(in._grid, out._grid);  // drops the cb check | ||||
|  | ||||
|   assert(in.checkerboard == Even); | ||||
|   out.checkerboard = Odd; | ||||
|  | ||||
|   DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, | ||||
|                                  int dag) { | ||||
|   conformable(in._grid, _cbgrid);    // verifies half grid | ||||
|   conformable(in._grid, out._grid);  // drops the cb check | ||||
|  | ||||
|   assert(in.checkerboard == Odd); | ||||
|   out.checkerboard = Even; | ||||
|  | ||||
|   DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, | ||||
|                                int dir, int disp) { | ||||
|   DhopDir(in, out, dir, disp); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, | ||||
|                                   int dir, int disp) { | ||||
|   int skip = (disp == 1) ? 0 : 1; | ||||
|   int dirdisp = dir + skip * 4; | ||||
|   int gamma = dir + (1 - skip) * 4; | ||||
|  | ||||
|   DhopDirDisp(in, out, dirdisp, gamma, DaggerNo); | ||||
| }; | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out, | ||||
|                                       int dirdisp, int gamma, int dag) { | ||||
|   Compressor compressor(dag); | ||||
|  | ||||
|   Stencil.HaloExchange(in, compressor); | ||||
|  | ||||
|   PARALLEL_FOR_LOOP | ||||
|   for (int sss = 0; sss < in._grid->oSites(); sss++) { | ||||
|     Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out, | ||||
|                              dirdisp, gamma); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, | ||||
|                                        DoubledGaugeField &U, | ||||
|                                        const FermionField &in, | ||||
|                                        FermionField &out, int dag) { | ||||
|   assert((dag == DaggerNo) || (dag == DaggerYes)); | ||||
|  | ||||
|   Compressor compressor(dag); | ||||
|   st.HaloExchange(in, compressor); | ||||
|  | ||||
|   if (dag == DaggerYes) { | ||||
|     PARALLEL_FOR_LOOP | ||||
|     for (int sss = 0; sss < in._grid->oSites(); sss++) { | ||||
|       Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in, | ||||
|                                    out); | ||||
|     } | ||||
|   } else { | ||||
|     PARALLEL_FOR_LOOP | ||||
|     for (int sss = 0; sss < in._grid->oSites(); sss++) { | ||||
|       Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in, | ||||
|                                 out); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) { | ||||
|     out.checkerboard = in.checkerboard; | ||||
|     typename FermionField::scalar_type scal(4.0+mass); | ||||
|     out = scal*in; | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) { | ||||
|     out.checkerboard = in.checkerboard; | ||||
|     Mooee(in,out); | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) { | ||||
|     out.checkerboard = in.checkerboard; | ||||
|     out = (1.0/(4.0+mass))*in; | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) { | ||||
|     out.checkerboard = in.checkerboard; | ||||
|     MooeeInv(in,out); | ||||
|   } | ||||
|    | ||||
|   /////////////////////////////////// | ||||
|   // Internal | ||||
|   /////////////////////////////////// | ||||
|  | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DerivInternal(StencilImpl & st, | ||||
| 					  DoubledGaugeField & U, | ||||
| 					  GaugeField &mat, | ||||
| 					  const FermionField &A, | ||||
| 					  const FermionField &B,int dag) { | ||||
| 	 | ||||
|     assert((dag==DaggerNo) ||(dag==DaggerYes)); | ||||
|      | ||||
|     Compressor compressor(dag); | ||||
|      | ||||
|     FermionField Btilde(B._grid); | ||||
|     FermionField Atilde(B._grid); | ||||
|     Atilde = A; | ||||
|  | ||||
|     st.HaloExchange(B,compressor); | ||||
|      | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|        | ||||
|       //////////////////////////////////////////////////////////////////////// | ||||
|       // Flip gamma (1+g)<->(1-g) if dag | ||||
|       //////////////////////////////////////////////////////////////////////// | ||||
|       int gamma = mu; | ||||
|       if ( !dag ) gamma+= Nd; | ||||
|        | ||||
|       //////////////////////// | ||||
|       // Call the single hop | ||||
|       //////////////////////// | ||||
| PARALLEL_FOR_LOOP | ||||
| 	for(int sss=0;sss<B._grid->oSites();sss++){ | ||||
| 	  Kernels::DiracOptDhopDir(st,U,st.comm_buf,sss,sss,B,Btilde,mu,gamma); | ||||
| 	} | ||||
|        | ||||
|       ////////////////////////////////////////////////// | ||||
|       // spin trace outer product | ||||
|       ////////////////////////////////////////////////// | ||||
|       Impl::InsertForce4D(mat,Btilde,Atilde,mu); | ||||
|  | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
|   { | ||||
|     conformable(U._grid,_grid);   | ||||
|     conformable(U._grid,V._grid); | ||||
|     conformable(U._grid,mat._grid); | ||||
|      | ||||
|     mat.checkerboard = U.checkerboard; | ||||
|      | ||||
|     DerivInternal(Stencil,Umu,mat,U,V,dag); | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
|   { | ||||
|     conformable(U._grid,_cbgrid);   | ||||
|     conformable(U._grid,V._grid); | ||||
|     conformable(U._grid,mat._grid); | ||||
|      | ||||
|     assert(V.checkerboard==Even); | ||||
|     assert(U.checkerboard==Odd); | ||||
|     mat.checkerboard = Odd; | ||||
|      | ||||
|     DerivInternal(StencilEven,UmuOdd,mat,U,V,dag); | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
|   { | ||||
|     conformable(U._grid,_cbgrid);   | ||||
|     conformable(U._grid,V._grid); | ||||
|     conformable(U._grid,mat._grid); | ||||
| 	 | ||||
|     assert(V.checkerboard==Odd); | ||||
|     assert(U.checkerboard==Even); | ||||
|     mat.checkerboard = Even; | ||||
| 	 | ||||
|     DerivInternal(StencilOdd,UmuEven,mat,U,V,dag); | ||||
|   } | ||||
|    | ||||
|  | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,int dag) { | ||||
|     conformable(in._grid,_grid); // verifies full grid | ||||
|     conformable(in._grid,out._grid); | ||||
|      | ||||
|     out.checkerboard = in.checkerboard; | ||||
|      | ||||
|     DhopInternal(Stencil,Umu,in,out,dag); | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) { | ||||
|     conformable(in._grid,_cbgrid);    // verifies half grid | ||||
|     conformable(in._grid,out._grid); // drops the cb check | ||||
|      | ||||
|     assert(in.checkerboard==Even); | ||||
|     out.checkerboard = Odd; | ||||
|      | ||||
|     DhopInternal(StencilEven,UmuOdd,in,out,dag); | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) { | ||||
|     conformable(in._grid,_cbgrid);    // verifies half grid | ||||
|     conformable(in._grid,out._grid); // drops the cb check | ||||
|      | ||||
|     assert(in.checkerboard==Odd); | ||||
|     out.checkerboard = Even; | ||||
|      | ||||
|     DhopInternal(StencilOdd,UmuEven,in,out,dag); | ||||
|   } | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::Mdir (const FermionField &in, FermionField &out,int dir,int disp) { | ||||
|     DhopDir(in,out,dir,disp); | ||||
|   } | ||||
|    | ||||
|  | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){ | ||||
|      | ||||
|     int skip = (disp==1) ? 0 : 1; | ||||
|     int dirdisp  = dir+skip*4; | ||||
|     int gamma    = dir+(1-skip)*4; | ||||
|      | ||||
|     DhopDirDisp(in,out,dirdisp,gamma,DaggerNo); | ||||
|      | ||||
|   }; | ||||
|    | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) { | ||||
|      | ||||
|     Compressor compressor(dag); | ||||
|      | ||||
|     Stencil.HaloExchange(in,compressor); | ||||
|      | ||||
| PARALLEL_FOR_LOOP | ||||
|       for(int sss=0;sss<in._grid->oSites();sss++){ | ||||
| 	Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sss,sss,in,out,dirdisp,gamma); | ||||
|       } | ||||
|      | ||||
|   }; | ||||
|  | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,DoubledGaugeField & U, | ||||
| 					 const FermionField &in, FermionField &out,int dag)  | ||||
|   { | ||||
|     DhopInternalCommsThenCompute(st,U,in,out,dag); | ||||
|   } | ||||
|   template<class Impl> | ||||
|   void WilsonFermion<Impl>::DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U, | ||||
| 							 const FermionField &in, FermionField &out,int dag) { | ||||
|  | ||||
|     assert((dag==DaggerNo) ||(dag==DaggerYes)); | ||||
|  | ||||
|     Compressor compressor(dag); | ||||
|     st.HaloExchange(in,compressor); | ||||
|      | ||||
|     if ( dag == DaggerYes ) { | ||||
|       if( HandOptDslash ) { | ||||
| PARALLEL_FOR_LOOP | ||||
|         for(int sss=0;sss<in._grid->oSites();sss++){ | ||||
| 	  Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out); | ||||
| 	} | ||||
|       } else {  | ||||
| PARALLEL_FOR_LOOP | ||||
|         for(int sss=0;sss<in._grid->oSites();sss++){ | ||||
| 	  Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out); | ||||
| 	} | ||||
|       } | ||||
|     } else { | ||||
|       if( HandOptDslash ) { | ||||
| PARALLEL_FOR_LOOP | ||||
|         for(int sss=0;sss<in._grid->oSites();sss++){ | ||||
| 	  Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sss,sss,in,out); | ||||
| 	} | ||||
|       } else {  | ||||
| PARALLEL_FOR_LOOP | ||||
|         for(int sss=0;sss<in._grid->oSites();sss++){ | ||||
| 	  Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,in,out); | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   | ||||
|   FermOpTemplateInstantiate(WilsonFermion); | ||||
|   GparityFermOpTemplateInstantiate(WilsonFermion); | ||||
|  | ||||
|  | ||||
| }} | ||||
|  | ||||
|  | ||||
|  | ||||
| FermOpTemplateInstantiate(WilsonFermion); | ||||
| AdjointFermOpTemplateInstantiate(WilsonFermion); | ||||
| TwoIndexFermOpTemplateInstantiate(WilsonFermion); | ||||
| GparityFermOpTemplateInstantiate(WilsonFermion); | ||||
| } | ||||
| } | ||||
|   | ||||
| @@ -1,160 +1,155 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/WilsonFermion.h | ||||
| Source file: ./lib/qcd/action/fermion/WilsonFermion.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef  GRID_QCD_WILSON_FERMION_H | ||||
| #define  GRID_QCD_WILSON_FERMION_H | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_QCD_WILSON_FERMION_H | ||||
| #define GRID_QCD_WILSON_FERMION_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   namespace QCD { | ||||
| namespace QCD { | ||||
|  | ||||
|     class WilsonFermionStatic { | ||||
|     public: | ||||
|       static int HandOptDslash; // these are a temporary hack | ||||
|       static int MortonOrder; | ||||
|       static const std::vector<int> directions   ; | ||||
|       static const std::vector<int> displacements; | ||||
|       static const int npoint=8; | ||||
|     }; | ||||
| class WilsonFermionStatic { | ||||
|  public: | ||||
|   static int HandOptDslash;  // these are a temporary hack | ||||
|   static int MortonOrder; | ||||
|   static const std::vector<int> directions; | ||||
|   static const std::vector<int> displacements; | ||||
|   static const int npoint = 8; | ||||
| }; | ||||
|  | ||||
|     template<class Impl> | ||||
|     class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic | ||||
|     { | ||||
|     public: | ||||
|     INHERIT_IMPL_TYPES(Impl); | ||||
|     typedef WilsonKernels<Impl> Kernels; | ||||
| template <class Impl> | ||||
| class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic { | ||||
|  public: | ||||
|   INHERIT_IMPL_TYPES(Impl); | ||||
|   typedef WilsonKernels<Impl> Kernels; | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Implement the abstract base | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       GridBase *GaugeGrid(void)              { return _grid ;} | ||||
|       GridBase *GaugeRedBlackGrid(void)      { return _cbgrid ;} | ||||
|       GridBase *FermionGrid(void)            { return _grid;} | ||||
|       GridBase *FermionRedBlackGrid(void)    { return _cbgrid;} | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Implement the abstract base | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   GridBase *GaugeGrid(void) { return _grid; } | ||||
|   GridBase *GaugeRedBlackGrid(void) { return _cbgrid; } | ||||
|   GridBase *FermionGrid(void) { return _grid; } | ||||
|   GridBase *FermionRedBlackGrid(void) { return _cbgrid; } | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////////// | ||||
|       // override multiply; cut number routines if pass dagger argument | ||||
|       // and also make interface more uniformly consistent | ||||
|       ////////////////////////////////////////////////////////////////// | ||||
|       RealD M(const FermionField &in, FermionField &out); | ||||
|       RealD Mdag(const FermionField &in, FermionField &out); | ||||
|   ////////////////////////////////////////////////////////////////// | ||||
|   // override multiply; cut number routines if pass dagger argument | ||||
|   // and also make interface more uniformly consistent | ||||
|   ////////////////////////////////////////////////////////////////// | ||||
|   RealD M(const FermionField &in, FermionField &out); | ||||
|   RealD Mdag(const FermionField &in, FermionField &out); | ||||
|  | ||||
|       ///////////////////////////////////////////////////////// | ||||
|       // half checkerboard operations | ||||
|       // could remain virtual so we  can derive Clover from Wilson base | ||||
|       ///////////////////////////////////////////////////////// | ||||
|       void Meooe(const FermionField &in, FermionField &out) ; | ||||
|       void MeooeDag(const FermionField &in, FermionField &out) ; | ||||
|   ///////////////////////////////////////////////////////// | ||||
|   // half checkerboard operations | ||||
|   // could remain virtual so we  can derive Clover from Wilson base | ||||
|   ///////////////////////////////////////////////////////// | ||||
|   void Meooe(const FermionField &in, FermionField &out); | ||||
|   void MeooeDag(const FermionField &in, FermionField &out); | ||||
|  | ||||
|       // allow override for twisted mass and clover | ||||
|       virtual void Mooee(const FermionField &in, FermionField &out) ; | ||||
|       virtual void MooeeDag(const FermionField &in, FermionField &out) ; | ||||
|       virtual void MooeeInv(const FermionField &in, FermionField &out) ; | ||||
|       virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; | ||||
|   // allow override for twisted mass and clover | ||||
|   virtual void Mooee(const FermionField &in, FermionField &out); | ||||
|   virtual void MooeeDag(const FermionField &in, FermionField &out); | ||||
|   virtual void MooeeInv(const FermionField &in, FermionField &out); | ||||
|   virtual void MooeeInvDag(const FermionField &in, FermionField &out); | ||||
|  | ||||
|       //////////////////////// | ||||
|       // Derivative interface | ||||
|       //////////////////////// | ||||
|       // Interface calls an internal routine | ||||
|       void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); | ||||
|       void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); | ||||
|       void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); | ||||
|   //////////////////////// | ||||
|   // Derivative interface | ||||
|   //////////////////////// | ||||
|   // Interface calls an internal routine | ||||
|   void DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, | ||||
|                  int dag); | ||||
|   void DhopDerivOE(GaugeField &mat, const FermionField &U, | ||||
|                    const FermionField &V, int dag); | ||||
|   void DhopDerivEO(GaugeField &mat, const FermionField &U, | ||||
|                    const FermionField &V, int dag); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // non-hermitian hopping term; half cb or both | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   void Dhop(const FermionField &in, FermionField &out, int dag); | ||||
|   void DhopOE(const FermionField &in, FermionField &out, int dag); | ||||
|   void DhopEO(const FermionField &in, FermionField &out, int dag); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Multigrid assistance; force term uses too | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   void Mdir(const FermionField &in, FermionField &out, int dir, int disp); | ||||
|   void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); | ||||
|   void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp, | ||||
|                    int gamma, int dag); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Extra methods added by derived | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   void DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat, | ||||
|                      const FermionField &A, const FermionField &B, int dag); | ||||
|  | ||||
|   void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
|                     const FermionField &in, FermionField &out, int dag); | ||||
|  | ||||
|   // Constructor | ||||
|   WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, | ||||
|                 GridRedBlackCartesian &Hgrid, RealD _mass, | ||||
|                 const ImplParams &p = ImplParams()); | ||||
|  | ||||
|   // DoubleStore impl dependent | ||||
|   void ImportGauge(const GaugeField &_Umu); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Data members require to support the functionality | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   //    protected: | ||||
|  public: | ||||
|   RealD mass; | ||||
|  | ||||
|   GridBase *_grid; | ||||
|   GridBase *_cbgrid; | ||||
|  | ||||
|   // Defines the stencils for even and odd | ||||
|   StencilImpl Stencil; | ||||
|   StencilImpl StencilEven; | ||||
|   StencilImpl StencilOdd; | ||||
|  | ||||
|   // Copy of the gauge field , with even and odd subsets | ||||
|   DoubledGaugeField Umu; | ||||
|   DoubledGaugeField UmuEven; | ||||
|   DoubledGaugeField UmuOdd; | ||||
|  | ||||
|   LebesgueOrder Lebesgue; | ||||
|   LebesgueOrder LebesgueEvenOdd; | ||||
| }; | ||||
|  | ||||
| typedef WilsonFermion<WilsonImplF> WilsonFermionF; | ||||
| typedef WilsonFermion<WilsonImplD> WilsonFermionD; | ||||
|  | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // non-hermitian hopping term; half cb or both | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       void Dhop(const FermionField &in, FermionField &out,int dag) ; | ||||
|       void DhopOE(const FermionField &in, FermionField &out,int dag) ; | ||||
|       void DhopEO(const FermionField &in, FermionField &out,int dag) ; | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Multigrid assistance; force term uses too | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ; | ||||
|       void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); | ||||
|       void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ; | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Extra methods added by derived | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       void DerivInternal(StencilImpl & st, | ||||
| 			 DoubledGaugeField & U, | ||||
| 			 GaugeField &mat, | ||||
| 			 const FermionField &A, | ||||
| 			 const FermionField &B, | ||||
| 			 int dag); | ||||
|  | ||||
|       void DhopInternal(StencilImpl & st,DoubledGaugeField & U, | ||||
| 			const FermionField &in, FermionField &out,int dag) ; | ||||
|  | ||||
|       void DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U, | ||||
| 				    const FermionField &in, FermionField &out,int dag) ; | ||||
|  | ||||
|       // Constructor | ||||
|       WilsonFermion(GaugeField &_Umu, | ||||
| 		    GridCartesian         &Fgrid, | ||||
| 		    GridRedBlackCartesian &Hgrid,  | ||||
| 		    RealD _mass, | ||||
| 		    const ImplParams &p= ImplParams() | ||||
| 		    ) ; | ||||
|  | ||||
|       // DoubleStore impl dependent | ||||
|       void ImportGauge(const GaugeField &_Umu); | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Data members require to support the functionality | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|  | ||||
|       //    protected: | ||||
|     public: | ||||
|  | ||||
|       RealD                        mass; | ||||
|  | ||||
|       GridBase                     *    _grid;  | ||||
|       GridBase                     *  _cbgrid; | ||||
|  | ||||
|       //Defines the stencils for even and odd | ||||
|       StencilImpl Stencil;  | ||||
|       StencilImpl StencilEven;  | ||||
|       StencilImpl StencilOdd;  | ||||
|  | ||||
|       // Copy of the gauge field , with even and odd subsets | ||||
|       DoubledGaugeField Umu; | ||||
|       DoubledGaugeField UmuEven; | ||||
|       DoubledGaugeField UmuOdd; | ||||
|        | ||||
|     }; | ||||
|  | ||||
|     typedef WilsonFermion<WilsonImplF> WilsonFermionF; | ||||
|     typedef WilsonFermion<WilsonImplD> WilsonFermionD; | ||||
|  | ||||
|   } | ||||
| } | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
|  | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| @@ -39,79 +38,15 @@ namespace QCD { | ||||
| // S-direction is INNERMOST and takes no part in the parity. | ||||
| const std::vector<int> WilsonFermion5DStatic::directions   ({1,2,3,4, 1, 2, 3, 4}); | ||||
| const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1}); | ||||
| int WilsonFermion5DStatic::HandOptDslash; | ||||
| int WilsonFermion5DStatic::AsmOptDslash; | ||||
|  | ||||
|   // 5d lattice for DWF. | ||||
| template<class Impl> | ||||
| WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, | ||||
| 				       GridCartesian         &FiveDimGrid, | ||||
| 				       GridRedBlackCartesian &FiveDimRedBlackGrid, | ||||
| 				       GridCartesian         &FourDimGrid, | ||||
| 				       GridRedBlackCartesian &FourDimRedBlackGrid, | ||||
| 				       RealD _M5,const ImplParams &p) : | ||||
|   Kernels(p), | ||||
|   _FiveDimGrid(&FiveDimGrid), | ||||
|   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), | ||||
|   _FourDimGrid(&FourDimGrid), | ||||
|   _FourDimRedBlackGrid(&FourDimRedBlackGrid), | ||||
|   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements), | ||||
|   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even | ||||
|   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd | ||||
|   M5(_M5), | ||||
|   Umu(_FourDimGrid), | ||||
|   UmuEven(_FourDimRedBlackGrid), | ||||
|   UmuOdd (_FourDimRedBlackGrid), | ||||
|   Lebesgue(_FourDimGrid), | ||||
|   LebesgueEvenOdd(_FourDimRedBlackGrid) | ||||
| { | ||||
|   // some assertions | ||||
|   assert(FiveDimGrid._ndimension==5); | ||||
|   assert(FourDimGrid._ndimension==4); | ||||
|   assert(FiveDimRedBlackGrid._ndimension==5); | ||||
|   assert(FourDimRedBlackGrid._ndimension==4); | ||||
|   assert(FiveDimRedBlackGrid._checker_dim==1); | ||||
|  | ||||
|   // Dimension zero of the five-d is the Ls direction | ||||
|   Ls=FiveDimGrid._fdimensions[0]; | ||||
|   assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); | ||||
|   assert(FiveDimRedBlackGrid._processors[0] ==1); | ||||
|   assert(FiveDimRedBlackGrid._simd_layout[0]==1); | ||||
|   assert(FiveDimGrid._processors[0]         ==1); | ||||
|   assert(FiveDimGrid._simd_layout[0]        ==1); | ||||
|  | ||||
|   // Other dimensions must match the decomposition of the four-D fields  | ||||
|   for(int d=0;d<4;d++){ | ||||
|     assert(FourDimRedBlackGrid._fdimensions[d]  ==FourDimGrid._fdimensions[d]); | ||||
|     assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); | ||||
|  | ||||
|     assert(FourDimRedBlackGrid._processors[d]   ==FourDimGrid._processors[d]); | ||||
|     assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); | ||||
|  | ||||
|     assert(FourDimRedBlackGrid._simd_layout[d]  ==FourDimGrid._simd_layout[d]); | ||||
|     assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]); | ||||
|  | ||||
|     assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]); | ||||
|     assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]); | ||||
|     assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]); | ||||
|   } | ||||
|  | ||||
|   // Allocate the required comms buffer | ||||
|   ImportGauge(_Umu); | ||||
|   alltime=0; | ||||
|   commtime=0; | ||||
|   jointime=0; | ||||
|   dslashtime=0; | ||||
|   dslash1time=0; | ||||
| }   | ||||
|  | ||||
| template<class Impl> | ||||
| WilsonFermion5D<Impl>::WilsonFermion5D(int simd, GaugeField &_Umu, | ||||
| 				       GridCartesian         &FiveDimGrid, | ||||
| 				       GridRedBlackCartesian &FiveDimRedBlackGrid, | ||||
| 				       GridCartesian         &FourDimGrid, | ||||
| 				       GridRedBlackCartesian &FourDimRedBlackGrid, | ||||
| 				       RealD _M5,const ImplParams &p) : | ||||
|                GridCartesian         &FiveDimGrid, | ||||
|                GridRedBlackCartesian &FiveDimRedBlackGrid, | ||||
|                GridCartesian         &FourDimGrid, | ||||
|                GridRedBlackCartesian &FourDimRedBlackGrid, | ||||
|                RealD _M5,const ImplParams &p) : | ||||
|   Kernels(p), | ||||
|   _FiveDimGrid        (&FiveDimGrid), | ||||
|   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), | ||||
| @@ -126,6 +61,84 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd, GaugeField &_Umu, | ||||
|   UmuOdd (_FourDimRedBlackGrid), | ||||
|   Lebesgue(_FourDimGrid), | ||||
|   LebesgueEvenOdd(_FourDimRedBlackGrid) | ||||
| { | ||||
|   if (Impl::LsVectorised) {  | ||||
|  | ||||
|     int nsimd = Simd::Nsimd(); | ||||
|      | ||||
|     // some assertions | ||||
|     assert(FiveDimGrid._ndimension==5); | ||||
|     assert(FiveDimRedBlackGrid._ndimension==5); | ||||
|     assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction | ||||
|     assert(FourDimGrid._ndimension==4); | ||||
|  | ||||
|     // Dimension zero of the five-d is the Ls direction | ||||
|     Ls=FiveDimGrid._fdimensions[0]; | ||||
|     assert(FiveDimGrid._processors[0]         ==1); | ||||
|     assert(FiveDimGrid._simd_layout[0]        ==nsimd); | ||||
|  | ||||
|     assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); | ||||
|     assert(FiveDimRedBlackGrid._processors[0] ==1); | ||||
|     assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd); | ||||
|  | ||||
|     // Other dimensions must match the decomposition of the four-D fields  | ||||
|     for(int d=0;d<4;d++){ | ||||
|       assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); | ||||
|       assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); | ||||
|        | ||||
|       assert(FourDimGrid._simd_layout[d]=1); | ||||
|       assert(FourDimRedBlackGrid._simd_layout[d]=1); | ||||
|       assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); | ||||
|  | ||||
|       assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]); | ||||
|       assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]); | ||||
|       assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]); | ||||
|     } | ||||
|  | ||||
|   } else { | ||||
|  | ||||
|     // some assertions | ||||
|     assert(FiveDimGrid._ndimension==5); | ||||
|     assert(FourDimGrid._ndimension==4); | ||||
|     assert(FiveDimRedBlackGrid._ndimension==5); | ||||
|     assert(FourDimRedBlackGrid._ndimension==4); | ||||
|     assert(FiveDimRedBlackGrid._checker_dim==1); | ||||
|      | ||||
|     // Dimension zero of the five-d is the Ls direction | ||||
|     Ls=FiveDimGrid._fdimensions[0]; | ||||
|     assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); | ||||
|     assert(FiveDimRedBlackGrid._processors[0] ==1); | ||||
|     assert(FiveDimRedBlackGrid._simd_layout[0]==1); | ||||
|     assert(FiveDimGrid._processors[0]         ==1); | ||||
|     assert(FiveDimGrid._simd_layout[0]        ==1); | ||||
|      | ||||
|     // Other dimensions must match the decomposition of the four-D fields  | ||||
|     for(int d=0;d<4;d++){ | ||||
|       assert(FourDimRedBlackGrid._fdimensions[d]  ==FourDimGrid._fdimensions[d]); | ||||
|       assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); | ||||
|        | ||||
|       assert(FourDimRedBlackGrid._processors[d]   ==FourDimGrid._processors[d]); | ||||
|       assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); | ||||
|        | ||||
|       assert(FourDimRedBlackGrid._simd_layout[d]  ==FourDimGrid._simd_layout[d]); | ||||
|       assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]); | ||||
|        | ||||
|       assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]); | ||||
|       assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]); | ||||
|       assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]); | ||||
|     } | ||||
|   } | ||||
|      | ||||
|   // Allocate the required comms buffer | ||||
|   ImportGauge(_Umu); | ||||
| } | ||||
|   /* | ||||
| template<class Impl> | ||||
| WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu, | ||||
|                GridCartesian         &FiveDimGrid, | ||||
|                GridRedBlackCartesian &FiveDimRedBlackGrid, | ||||
|                GridCartesian         &FourDimGrid, | ||||
|                RealD _M5,const ImplParams &p) : | ||||
| { | ||||
|   int nsimd = Simd::Nsimd(); | ||||
|  | ||||
| @@ -134,7 +147,6 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd, GaugeField &_Umu, | ||||
|   assert(FiveDimRedBlackGrid._ndimension==5); | ||||
|   assert(FiveDimRedBlackGrid._checker_dim==0); // Checkerboard the s-direction | ||||
|   assert(FourDimGrid._ndimension==4); | ||||
|   assert(FourDimRedBlackGrid._ndimension==4); | ||||
|  | ||||
|   // Dimension zero of the five-d is the Ls direction | ||||
|   Ls=FiveDimGrid._fdimensions[0]; | ||||
| @@ -147,15 +159,10 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd, GaugeField &_Umu, | ||||
|  | ||||
|   // Other dimensions must match the decomposition of the four-D fields  | ||||
|   for(int d=0;d<4;d++){ | ||||
|     assert(FourDimRedBlackGrid._fdimensions[d]  ==FourDimGrid._fdimensions[d]); | ||||
|     assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); | ||||
|  | ||||
|     assert(FourDimRedBlackGrid._processors[d]   ==FourDimGrid._processors[d]); | ||||
|     assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); | ||||
|  | ||||
|     assert(FourDimGrid._simd_layout[d]=1); | ||||
|     assert(FourDimRedBlackGrid._simd_layout[d]  ==1); | ||||
|     assert(FourDimRedBlackGrid._simd_layout[d]  ==1); | ||||
|     assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); | ||||
|  | ||||
|     assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]); | ||||
| @@ -163,9 +170,76 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd, GaugeField &_Umu, | ||||
|     assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]); | ||||
|   } | ||||
|  | ||||
|   // Allocate the required comms buffer | ||||
|   ImportGauge(_Umu); | ||||
|   { | ||||
|   } | ||||
| }   | ||||
|   */ | ||||
|       | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::Report(void) | ||||
| { | ||||
|     std::vector<int> latt = GridDefaultLatt();           | ||||
|     RealD volume = Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu]; | ||||
|     RealD NP = _FourDimGrid->_Nprocessors; | ||||
|  | ||||
|   if ( DhopCalls > 0 ) { | ||||
|     std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls     : " << DhopCalls  << std::endl; | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime | ||||
|               << " us" << std::endl; | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : " | ||||
|               << DhopCommTime / DhopCalls << " us" << std::endl; | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : " | ||||
|               << DhopComputeTime << " us" << std::endl; | ||||
|     std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : " | ||||
|               << DhopComputeTime / DhopCalls << " us" << std::endl; | ||||
|  | ||||
|     RealD mflops = 1344*volume*DhopCalls/DhopComputeTime; | ||||
|     std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl; | ||||
|     std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NP << std::endl; | ||||
|  | ||||
|    } | ||||
|  | ||||
|   if ( DerivCalls > 0 ) { | ||||
|   std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls    : " <<DerivCalls <<std::endl; | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl; | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls           : " <<DerivCommTime/DerivCalls<<" us" <<std::endl; | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D Total Compute time       : " <<DerivComputeTime <<" us"<<std::endl; | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls        : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl; | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time  : " <<DerivDhopComputeTime <<" us"<<std::endl; | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls   : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl; | ||||
|  | ||||
|  | ||||
|  | ||||
|   RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime; | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call                : " << mflops << std::endl; | ||||
|   std::cout << GridLogMessage << "Average mflops/s per call per node       : " << mflops/NP << std::endl; | ||||
|  | ||||
|   } | ||||
|  | ||||
|   if (DerivCalls > 0 || DhopCalls > 0){ | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl;  Stencil.Report(); | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl;  StencilEven.Report(); | ||||
|   std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl;  StencilOdd.Report(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::ZeroCounters(void) { | ||||
|   DhopCalls       = 0; | ||||
|   DhopCommTime    = 0; | ||||
|   DhopComputeTime = 0; | ||||
|  | ||||
|   DerivCalls       = 0; | ||||
|   DerivCommTime    = 0; | ||||
|   DerivComputeTime = 0; | ||||
|   DerivDhopComputeTime = 0; | ||||
|  | ||||
|   Stencil.ZeroCounters(); | ||||
|   StencilEven.ZeroCounters(); | ||||
|   StencilOdd.ZeroCounters(); | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class Impl> | ||||
| @@ -208,12 +282,13 @@ PARALLEL_FOR_LOOP | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st, | ||||
| 					  DoubledGaugeField & U, | ||||
| 					  GaugeField &mat, | ||||
| 					  const FermionField &A, | ||||
| 					  const FermionField &B, | ||||
| 					  int dag) | ||||
|             DoubledGaugeField & U, | ||||
|             GaugeField &mat, | ||||
|             const FermionField &A, | ||||
|             const FermionField &B, | ||||
|             int dag) | ||||
| { | ||||
|   DerivCalls++; | ||||
|   assert((dag==DaggerNo) ||(dag==DaggerYes)); | ||||
|  | ||||
|   conformable(st._grid,A._grid); | ||||
| @@ -224,51 +299,53 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st, | ||||
|   FermionField Btilde(B._grid); | ||||
|   FermionField Atilde(B._grid); | ||||
|  | ||||
|   DerivCommTime-=usecond(); | ||||
|   st.HaloExchange(B,compressor); | ||||
|   DerivCommTime+=usecond(); | ||||
|  | ||||
|   Atilde=A; | ||||
|  | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|        | ||||
|   DerivComputeTime-=usecond(); | ||||
|   for (int mu = 0; mu < Nd; mu++) { | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     // Flip gamma if dag | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     int gamma = mu; | ||||
|     if ( !dag ) gamma+= Nd; | ||||
|     if (!dag) gamma += Nd; | ||||
|  | ||||
|     //////////////////////// | ||||
|     // Call the single hop | ||||
|     //////////////////////// | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|     for(int sss=0;sss<U._grid->oSites();sss++){ | ||||
|       for(int s=0;s<Ls;s++){ | ||||
| 	int sU=sss; | ||||
| 	int sF = s+Ls*sU; | ||||
|     DerivDhopComputeTime -= usecond(); | ||||
|     PARALLEL_FOR_LOOP | ||||
|     for (int sss = 0; sss < U._grid->oSites(); sss++) { | ||||
|       for (int s = 0; s < Ls; s++) { | ||||
|         int sU = sss; | ||||
|         int sF = s + Ls * sU; | ||||
|  | ||||
| 	assert ( sF< B._grid->oSites()); | ||||
| 	assert ( sU< U._grid->oSites()); | ||||
|         assert(sF < B._grid->oSites()); | ||||
|         assert(sU < U._grid->oSites()); | ||||
|  | ||||
| 	Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma); | ||||
|  | ||||
|     //////////////////////////// | ||||
|     // spin trace outer product | ||||
|     //////////////////////////// | ||||
|         Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu, | ||||
|                                  gamma); | ||||
|  | ||||
|         //////////////////////////// | ||||
|         // spin trace outer product | ||||
|         //////////////////////////// | ||||
|       } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     Impl::InsertForce5D(mat,Btilde,Atilde,mu); | ||||
|  | ||||
|     DerivDhopComputeTime += usecond(); | ||||
|     Impl::InsertForce5D(mat, Btilde, Atilde, mu); | ||||
|   } | ||||
|   DerivComputeTime += usecond(); | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopDeriv(      GaugeField &mat, | ||||
| 					    const FermionField &A, | ||||
| 					    const FermionField &B, | ||||
| 					    int dag) | ||||
|               const FermionField &A, | ||||
|               const FermionField &B, | ||||
|               int dag) | ||||
| { | ||||
|   conformable(A._grid,FermionGrid());   | ||||
|   conformable(A._grid,B._grid); | ||||
| @@ -281,9 +358,9 @@ void WilsonFermion5D<Impl>::DhopDeriv(      GaugeField &mat, | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat, | ||||
| 					const FermionField &A, | ||||
| 					const FermionField &B, | ||||
| 					int dag) | ||||
|           const FermionField &A, | ||||
|           const FermionField &B, | ||||
|           int dag) | ||||
| { | ||||
|   conformable(A._grid,FermionRedBlackGrid()); | ||||
|   conformable(GaugeRedBlackGrid(),mat._grid); | ||||
| @@ -297,35 +374,11 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat, | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::Report(void) | ||||
| { | ||||
|   std::cout<<GridLogMessage << "******************** WilsonFermion"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Wilson5d      time "<<alltime <<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "HaloBegin     time "<<commtime <<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Dslash        time "<<dslashtime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Dslash1       time "<<dslash1time<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "HaloComplete  time "<<jointime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "******************** Stencil"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil all gather      time "<<Stencil.halogtime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil nosplice gather time "<<Stencil.nosplicetime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil splice   gather time "<<Stencil.splicetime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "********************"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil gather        "<<Stencil.gathertime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil gather simd   "<<Stencil.gathermtime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil merge  simd   "<<Stencil.mergetime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil spin   simd   "<<Stencil.spintime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "********************"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil MB/s          "<<(double)Stencil.comms_bytes/Stencil.commtime<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil comm     time "<<Stencil.commtime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Stencil join     time "<<Stencil.jointime<<" us"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "********************"<<std::endl; | ||||
| } | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, | ||||
| 				  const FermionField &A, | ||||
| 				  const FermionField &B, | ||||
| 				  int dag) | ||||
|           const FermionField &A, | ||||
|           const FermionField &B, | ||||
|           int dag) | ||||
| { | ||||
|   conformable(A._grid,FermionRedBlackGrid()); | ||||
|   conformable(GaugeRedBlackGrid(),mat._grid); | ||||
| @@ -340,92 +393,61 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, | ||||
| 					 DoubledGaugeField & U, | ||||
| 					 const FermionField &in, FermionField &out,int dag) | ||||
| { | ||||
|     DhopInternalCommsThenCompute(st,lo,U,in,out,dag); | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopInternalCommsThenCompute(StencilImpl & st, LebesgueOrder &lo, | ||||
| 					 DoubledGaugeField & U, | ||||
| 					 const FermionField &in, FermionField &out,int dag) | ||||
|            DoubledGaugeField & U, | ||||
|            const FermionField &in, FermionField &out,int dag) | ||||
| { | ||||
|   DhopCalls++; | ||||
|   //  assert((dag==DaggerNo) ||(dag==DaggerYes)); | ||||
|   alltime-=usecond(); | ||||
|   Compressor compressor(dag); | ||||
|  | ||||
|   // Assume balanced KMP_AFFINITY; this is forced in GridThread.h | ||||
|   int LLs = in._grid->_rdimensions[0]; | ||||
|    | ||||
|   commtime -=usecond(); | ||||
|   //  auto handle = st.HaloExchangeBegin(in,compressor); | ||||
|   //  st.HaloExchangeComplete(handle); | ||||
|   DhopCommTime-=usecond(); | ||||
|   st.HaloExchange(in,compressor); | ||||
|   commtime +=usecond(); | ||||
|  | ||||
|   jointime -=usecond(); | ||||
|   jointime +=usecond(); | ||||
|   DhopCommTime+=usecond(); | ||||
|    | ||||
|   DhopComputeTime-=usecond(); | ||||
|   // Dhop takes the 4d grid from U, and makes a 5d index for fermion | ||||
|   // Not loop ordering and data layout. | ||||
|   // Designed to create  | ||||
|   // - per thread reuse in L1 cache for U | ||||
|   // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable. | ||||
|   dslashtime -=usecond(); | ||||
|   if ( dag == DaggerYes ) { | ||||
|     if( this->HandOptDslash ) { | ||||
| PARALLEL_FOR_LOOP | ||||
|       for(int ss=0;ss<U._grid->oSites();ss++){ | ||||
| 	for(int s=0;s<LLs;s++){ | ||||
| 	  int sU=ss; | ||||
| 	  int sF = s+LLs*sU; | ||||
| 	  Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out); | ||||
| 	  } | ||||
|       } | ||||
|     } else {  | ||||
| PARALLEL_FOR_LOOP | ||||
|       for(int ss=0;ss<U._grid->oSites();ss++){ | ||||
| 	for(int s=0;s<LLs;s++){ | ||||
| 	  int sU=ss; | ||||
| 	  int sF = s+LLs*sU; | ||||
| 	  Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out); | ||||
| 	} | ||||
|       } | ||||
|   if (dag == DaggerYes) { | ||||
|     PARALLEL_FOR_LOOP | ||||
|     for (int ss = 0; ss < U._grid->oSites(); ss++) { | ||||
|       int sU = ss; | ||||
|       int sF = LLs * sU; | ||||
|       Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, | ||||
|                                    out); | ||||
|     } | ||||
|   } else { | ||||
|     if( this->AsmOptDslash ) { | ||||
| #ifdef AVX512 | ||||
|   } else if (stat.is_init() ) { | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|       for(int ss=0;ss<U._grid->oSites();ss++){ | ||||
| 	for(int s=0;s<LLs;s++){ | ||||
| 	  int sU=ss; | ||||
| 	  int sF = s+LLs*sU; | ||||
| 	  Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out); | ||||
| 	} | ||||
|       } | ||||
|     } else if( this->HandOptDslash ) { | ||||
| PARALLEL_FOR_LOOP      | ||||
|       for(int ss=0;ss<U._grid->oSites();ss++){ | ||||
| 	for(int s=0;s<LLs;s++){ | ||||
| 	  int sU=ss; | ||||
| 	  int sF = s+LLs*sU; | ||||
| 	  Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out); | ||||
| 	} | ||||
|       } | ||||
|     } else {  | ||||
| PARALLEL_FOR_LOOP | ||||
|       for(int ss=0;ss<U._grid->oSites();ss++){ | ||||
| 	for(int s=0;s<LLs;s++){ | ||||
| 	  int sU=ss; | ||||
| 	  int sF = s+LLs*sU;  | ||||
| 	  Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out); | ||||
| 	} | ||||
|       } | ||||
|     int nthreads; | ||||
|     stat.start(); | ||||
|     #pragma omp parallel | ||||
|     { | ||||
|     #pragma omp master | ||||
|     nthreads = omp_get_num_threads(); | ||||
|     int mythread = omp_get_thread_num(); | ||||
|     stat.enter(mythread); | ||||
|     #pragma omp for nowait | ||||
|    for(int ss=0;ss<U._grid->oSites();ss++) | ||||
|     { | ||||
|        int sU=ss; | ||||
|        int sF=LLs*sU; | ||||
|        Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); | ||||
|      } | ||||
|     stat.exit(mythread); | ||||
|     } | ||||
|     stat.accum(nthreads); | ||||
| #endif | ||||
|   } else { | ||||
|     PARALLEL_FOR_LOOP | ||||
|     for (int ss = 0; ss < U._grid->oSites(); ss++) { | ||||
|       int sU = ss; | ||||
|       int sF = LLs * sU; | ||||
|       Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, | ||||
|                                 out); | ||||
|     } | ||||
|   } | ||||
|   dslashtime +=usecond(); | ||||
|   alltime+=usecond(); | ||||
|   DhopComputeTime+=usecond(); | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -471,9 +493,7 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag | ||||
|  | ||||
| FermOpTemplateInstantiate(WilsonFermion5D); | ||||
| GparityFermOpTemplateInstantiate(WilsonFermion5D); | ||||
| template class WilsonFermion5D<DomainWallRedBlack5dImplF>;		 | ||||
| template class WilsonFermion5D<DomainWallRedBlack5dImplD>; | ||||
|  | ||||
|    | ||||
| }} | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -31,6 +31,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef  GRID_QCD_WILSON_FERMION_5D_H | ||||
| #define  GRID_QCD_WILSON_FERMION_5D_H | ||||
|  | ||||
| #include <Grid/Stat.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   namespace QCD { | ||||
| @@ -49,8 +51,6 @@ namespace Grid { | ||||
|     class WilsonFermion5DStatic {  | ||||
|     public: | ||||
|       // S-direction is INNERMOST and takes no part in the parity. | ||||
|       static int AsmOptDslash; // these are a temporary hack | ||||
|       static int HandOptDslash; // these are a temporary hack | ||||
|       static const std::vector<int> directions; | ||||
|       static const std::vector<int> displacements; | ||||
|       const int npoint = 8; | ||||
| @@ -62,11 +62,19 @@ namespace Grid { | ||||
|     public: | ||||
|      INHERIT_IMPL_TYPES(Impl); | ||||
|      typedef WilsonKernels<Impl> Kernels; | ||||
|      double alltime; | ||||
|      double jointime; | ||||
|      double commtime; | ||||
|      double dslashtime; | ||||
|      double dslash1time; | ||||
|      PmuStat stat; | ||||
|  | ||||
|      void Report(void); | ||||
|      void ZeroCounters(void); | ||||
|      double DhopCalls; | ||||
|      double DhopCommTime; | ||||
|      double DhopComputeTime; | ||||
|  | ||||
|      double DerivCalls; | ||||
|      double DerivCommTime; | ||||
|      double DerivComputeTime; | ||||
|      double DerivDhopComputeTime; | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Implement the abstract base | ||||
|       /////////////////////////////////////////////////////////////// | ||||
| @@ -122,13 +130,6 @@ namespace Grid { | ||||
| 			FermionField &out, | ||||
| 			int dag); | ||||
|  | ||||
|       void DhopInternalCommsThenCompute(StencilImpl & st, | ||||
| 			LebesgueOrder &lo, | ||||
| 			DoubledGaugeField &U, | ||||
| 			const FermionField &in,  | ||||
| 			FermionField &out, | ||||
| 			int dag); | ||||
|  | ||||
|       // Constructors | ||||
|       WilsonFermion5D(GaugeField &_Umu, | ||||
| 		      GridCartesian         &FiveDimGrid, | ||||
| @@ -138,18 +139,18 @@ namespace Grid { | ||||
| 		      double _M5,const ImplParams &p= ImplParams()); | ||||
|  | ||||
|       // Constructors | ||||
|       /* | ||||
|       WilsonFermion5D(int simd,  | ||||
| 		      GaugeField &_Umu, | ||||
| 		      GridCartesian         &FiveDimGrid, | ||||
| 		      GridRedBlackCartesian &FiveDimRedBlackGrid, | ||||
| 		      GridCartesian         &FourDimGrid, | ||||
| 		      GridRedBlackCartesian &FourDimRedBlackGrid, | ||||
| 		      double _M5,const ImplParams &p= ImplParams()); | ||||
|       */ | ||||
|  | ||||
|       // DoubleStore | ||||
|       void ImportGauge(const GaugeField &_Umu); | ||||
|  | ||||
|       void Report(void); | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Data members require to support the functionality | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -1,47 +1,54 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | ||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid.h> | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| template<class Impl>  | ||||
| WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {}; | ||||
| int WilsonKernelsStatic::HandOpt; | ||||
| int WilsonKernelsStatic::AsmOpt; | ||||
|  | ||||
|   // Need controls to do interior, exterior, or both | ||||
| template<class Impl>  | ||||
| void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, | ||||
| 					   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 					   int sF,int sU,const FermionField &in, FermionField &out) | ||||
| { | ||||
|   SiteHalfSpinor  tmp;     | ||||
|   SiteHalfSpinor  chi;     | ||||
| template <class Impl> | ||||
| WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){}; | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Generic implementation; move to different file? | ||||
| //////////////////////////////////////////// | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag( | ||||
|     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
|     std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF, | ||||
|     int sU, const FermionField &in, FermionField &out) { | ||||
|   SiteHalfSpinor tmp; | ||||
|   SiteHalfSpinor chi; | ||||
|   SiteHalfSpinor *chi_p; | ||||
|   SiteHalfSpinor Uchi; | ||||
|   SiteSpinor result; | ||||
| @@ -51,176 +58,175 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField | ||||
|   /////////////////////////// | ||||
|   // Xp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Xp,sF); | ||||
|   SE = st.GetEntry(ptype, Xp, sF); | ||||
|  | ||||
|   if (SE->_is_local ) {  | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjXp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjXp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjXp(chi,in._odata[SE->_offset]); | ||||
|       spProjXp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else {  | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|   } else { | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|    | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); | ||||
|   spReconXp(result,Uchi); | ||||
|      | ||||
|  | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st); | ||||
|   spReconXp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Yp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Yp,sF); | ||||
|   SE = st.GetEntry(ptype, Yp, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) {  | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjYp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjYp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjYp(chi,in._odata[SE->_offset]); | ||||
|       spProjYp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else {  | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|   } else { | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); | ||||
|   accumReconYp(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st); | ||||
|   accumReconYp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Zp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Zp,sF); | ||||
|   SE = st.GetEntry(ptype, Zp, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) {  | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjZp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjZp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjZp(chi,in._odata[SE->_offset]); | ||||
|       spProjZp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else {  | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|   } else { | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); | ||||
|   accumReconZp(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st); | ||||
|   accumReconZp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Tp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Tp,sF); | ||||
|   SE = st.GetEntry(ptype, Tp, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjTp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjTp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjTp(chi,in._odata[SE->_offset]); | ||||
|       spProjTp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); | ||||
|   accumReconTp(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st); | ||||
|   accumReconTp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Xm | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Xm,sF); | ||||
|   SE = st.GetEntry(ptype, Xm, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjXm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjXm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjXm(chi,in._odata[SE->_offset]); | ||||
|       spProjXm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); | ||||
|   accumReconXm(result,Uchi); | ||||
|    | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st); | ||||
|   accumReconXm(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Ym | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Ym,sF); | ||||
|   SE = st.GetEntry(ptype, Ym, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjYm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjYm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjYm(chi,in._odata[SE->_offset]); | ||||
|       spProjYm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); | ||||
|   accumReconYm(result,Uchi); | ||||
|    | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st); | ||||
|   accumReconYm(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Zm | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Zm,sF); | ||||
|   SE = st.GetEntry(ptype, Zm, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjZm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjZm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjZm(chi,in._odata[SE->_offset]); | ||||
|       spProjZm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); | ||||
|   accumReconZm(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st); | ||||
|   accumReconZm(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Tm | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Tm,sF); | ||||
|   SE = st.GetEntry(ptype, Tm, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjTm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else {  | ||||
|       spProjTm(chi,in._odata[SE->_offset]); | ||||
|     if (SE->_permute) { | ||||
|       spProjTm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjTm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); | ||||
|   accumReconTm(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st); | ||||
|   accumReconTm(result, Uchi); | ||||
|  | ||||
|   vstream(out._odata[sF],result); | ||||
|   vstream(out._odata[sF], result); | ||||
| }; | ||||
|  | ||||
|  | ||||
|   // Need controls to do interior, exterior, or both | ||||
| template<class Impl>  | ||||
| void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, | ||||
| 					   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 					   int sF,int sU,const FermionField &in, FermionField &out) | ||||
| { | ||||
|   SiteHalfSpinor  tmp;     | ||||
|   SiteHalfSpinor  chi;     | ||||
|   SiteHalfSpinor *chi_p;     | ||||
| // Need controls to do interior, exterior, or both | ||||
| template <class Impl> | ||||
| void WilsonKernels<Impl>::DiracOptGenericDhopSite( | ||||
|     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
|     std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF, | ||||
|     int sU, const FermionField &in, FermionField &out) { | ||||
|   SiteHalfSpinor tmp; | ||||
|   SiteHalfSpinor chi; | ||||
|   SiteHalfSpinor *chi_p; | ||||
|   SiteHalfSpinor Uchi; | ||||
|   SiteSpinor result; | ||||
|   StencilEntry *SE; | ||||
| @@ -229,307 +235,298 @@ void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, | ||||
|   /////////////////////////// | ||||
|   // Xp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Xm,sF); | ||||
|   SE = st.GetEntry(ptype, Xm, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) {  | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjXp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjXp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjXp(chi,in._odata[SE->_offset]); | ||||
|       spProjXp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else {  | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|   } else { | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|    | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); | ||||
|   spReconXp(result,Uchi); | ||||
|      | ||||
|  | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st); | ||||
|   spReconXp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Yp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Ym,sF); | ||||
|   SE = st.GetEntry(ptype, Ym, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) {  | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjYp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjYp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjYp(chi,in._odata[SE->_offset]); | ||||
|       spProjYp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else {  | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|   } else { | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); | ||||
|   accumReconYp(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st); | ||||
|   accumReconYp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Zp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Zm,sF); | ||||
|   SE = st.GetEntry(ptype, Zm, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) {  | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjZp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjZp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjZp(chi,in._odata[SE->_offset]); | ||||
|       spProjZp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else {  | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|   } else { | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); | ||||
|   accumReconZp(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st); | ||||
|   accumReconZp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Tp | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Tm,sF); | ||||
|   SE = st.GetEntry(ptype, Tm, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjTp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjTp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjTp(chi,in._odata[SE->_offset]); | ||||
|       spProjTp(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); | ||||
|   accumReconTp(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st); | ||||
|   accumReconTp(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Xm | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Xp,sF); | ||||
|   SE = st.GetEntry(ptype, Xp, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjXm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjXm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjXm(chi,in._odata[SE->_offset]); | ||||
|       spProjXm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); | ||||
|   accumReconXm(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st); | ||||
|   accumReconXm(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Ym | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Yp,sF); | ||||
|   SE = st.GetEntry(ptype, Yp, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjYm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjYm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjYm(chi,in._odata[SE->_offset]); | ||||
|       spProjYm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); | ||||
|   accumReconYm(result,Uchi); | ||||
|    | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st); | ||||
|   accumReconYm(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Zm | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Zp,sF); | ||||
|   SE = st.GetEntry(ptype, Zp, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjZm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     if (SE->_permute) { | ||||
|       spProjZm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjZm(chi,in._odata[SE->_offset]); | ||||
|       spProjZm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); | ||||
|   accumReconZm(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st); | ||||
|   accumReconZm(result, Uchi); | ||||
|  | ||||
|   /////////////////////////// | ||||
|   // Tm | ||||
|   /////////////////////////// | ||||
|   SE=st.GetEntry(ptype,Tp,sF); | ||||
|   SE = st.GetEntry(ptype, Tp, sF); | ||||
|  | ||||
|   if ( SE->_is_local ) { | ||||
|   if (SE->_is_local) { | ||||
|     chi_p = χ | ||||
|     if ( SE->_permute ) { | ||||
|       spProjTm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else {  | ||||
|       spProjTm(chi,in._odata[SE->_offset]); | ||||
|     if (SE->_permute) { | ||||
|       spProjTm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else { | ||||
|       spProjTm(chi, in._odata[SE->_offset]); | ||||
|     } | ||||
|   } else { | ||||
|     chi_p=&buf[SE->_offset]; | ||||
|     chi_p = &buf[SE->_offset]; | ||||
|   } | ||||
|  | ||||
|   Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); | ||||
|   accumReconTm(result,Uchi); | ||||
|   Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st); | ||||
|   accumReconTm(result, Uchi); | ||||
|  | ||||
|   vstream(out._odata[sF],result); | ||||
|   vstream(out._odata[sF], result); | ||||
| }; | ||||
|  | ||||
| template<class Impl>  | ||||
| void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, | ||||
| 					  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 					  int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma) | ||||
| { | ||||
|   SiteHalfSpinor  tmp;     | ||||
|   SiteHalfSpinor  chi;     | ||||
|   SiteSpinor   result; | ||||
| template <class Impl> | ||||
| void WilsonKernels<Impl>::DiracOptDhopDir( | ||||
|     StencilImpl &st, DoubledGaugeField &U, | ||||
|     std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF, | ||||
|     int sU, const FermionField &in, FermionField &out, int dir, int gamma) { | ||||
|   SiteHalfSpinor tmp; | ||||
|   SiteHalfSpinor chi; | ||||
|   SiteSpinor result; | ||||
|   SiteHalfSpinor Uchi; | ||||
|   StencilEntry *SE; | ||||
|   int ptype; | ||||
|  | ||||
|   SE=st.GetEntry(ptype,dir,sF); | ||||
|   SE = st.GetEntry(ptype, dir, sF); | ||||
|  | ||||
|   // Xp | ||||
|   if(gamma==Xp){ | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjXp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjXp(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Xp) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjXp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjXp(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconXp(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconXp(result, Uchi); | ||||
|   } | ||||
|  | ||||
|   // Yp | ||||
|   if ( gamma==Yp ){ | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjYp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjYp(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Yp) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjYp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjYp(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconYp(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconYp(result, Uchi); | ||||
|   } | ||||
|    | ||||
|  | ||||
|   // Zp | ||||
|   if ( gamma ==Zp ){ | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjZp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjZp(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Zp) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjZp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjZp(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconZp(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconZp(result, Uchi); | ||||
|   } | ||||
|    | ||||
|  | ||||
|   // Tp | ||||
|   if ( gamma ==Tp ){ | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjTp(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjTp(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Tp) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjTp(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjTp(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconTp(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconTp(result, Uchi); | ||||
|   } | ||||
|  | ||||
|   // Xm | ||||
|   if ( gamma==Xm ){ | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjXm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjXm(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Xm) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjXm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjXm(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconXm(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconXm(result, Uchi); | ||||
|   } | ||||
|  | ||||
|   // Ym | ||||
|   if ( gamma == Ym ){ | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjYm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjYm(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Ym) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjYm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjYm(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconYm(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconYm(result, Uchi); | ||||
|   } | ||||
|  | ||||
|   // Zm | ||||
|   if ( gamma == Zm ){ | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjZm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjZm(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Zm) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjZm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjZm(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconZm(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconZm(result, Uchi); | ||||
|   } | ||||
|    | ||||
|  | ||||
|   // Tm | ||||
|   if ( gamma==Tm ) { | ||||
|     if (  SE->_is_local && SE->_permute ) { | ||||
|       spProjTm(tmp,in._odata[SE->_offset]); | ||||
|       permute(chi,tmp,ptype); | ||||
|     } else if ( SE->_is_local ) { | ||||
|       spProjTm(chi,in._odata[SE->_offset]); | ||||
|     } else {  | ||||
|       chi=buf[SE->_offset]; | ||||
|   if (gamma == Tm) { | ||||
|     if (SE->_is_local && SE->_permute) { | ||||
|       spProjTm(tmp, in._odata[SE->_offset]); | ||||
|       permute(chi, tmp, ptype); | ||||
|     } else if (SE->_is_local) { | ||||
|       spProjTm(chi, in._odata[SE->_offset]); | ||||
|     } else { | ||||
|       chi = buf[SE->_offset]; | ||||
|     } | ||||
|     Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); | ||||
|     spReconTm(result,Uchi); | ||||
|     Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); | ||||
|     spReconTm(result, Uchi); | ||||
|   } | ||||
|  | ||||
|   vstream(out._odata[sF],result); | ||||
|   vstream(out._odata[sF], result); | ||||
| } | ||||
|  | ||||
| #if ( ! defined(AVX512) ) | ||||
| template<class Impl>  | ||||
| void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, | ||||
| 					      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 					      int sF,int sU,const FermionField &in, FermionField &out) | ||||
| { | ||||
|   DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 | ||||
| } | ||||
| #endif | ||||
|  | ||||
|   FermOpTemplateInstantiate(WilsonKernels); | ||||
| template class WilsonKernels<DomainWallRedBlack5dImplF>;		 | ||||
| template class WilsonKernels<DomainWallRedBlack5dImplD>; | ||||
| FermOpTemplateInstantiate(WilsonKernels); | ||||
| AdjointFermOpTemplateInstantiate(WilsonKernels); | ||||
| TwoIndexFermOpTemplateInstantiate(WilsonKernels); | ||||
|  | ||||
| }} | ||||
|  | ||||
|   | ||||
| @@ -1,34 +1,35 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/WilsonKernels.h | ||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef  GRID_QCD_DHOP_H | ||||
| #define  GRID_QCD_DHOP_H | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_QCD_DHOP_H | ||||
| #define GRID_QCD_DHOP_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| @@ -38,42 +39,168 @@ namespace Grid { | ||||
|     // Helper routines that implement Wilson stencil for a single site. | ||||
|     // Common to both the WilsonFermion and WilsonFermion5D | ||||
|     //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     template<class Impl> class WilsonKernels : public FermionOperator<Impl> {  | ||||
|     class WilsonKernelsStatic {  | ||||
|     public: | ||||
|  | ||||
|      INHERIT_IMPL_TYPES(Impl); | ||||
|      typedef FermionOperator<Impl> Base; | ||||
|       | ||||
|     public: | ||||
|      void DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, | ||||
| 			   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 			   int sF,int sU,const FermionField &in, FermionField &out); | ||||
|        | ||||
|      void DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, | ||||
| 			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 			      int sF,int sU,const FermionField &in,FermionField &out); | ||||
|  | ||||
|      void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, | ||||
| 			  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 			  int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma); | ||||
|  | ||||
|      void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, | ||||
| 			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 			      int sF,int sU,const FermionField &in, FermionField &out); | ||||
|  | ||||
|      int DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, | ||||
| 			      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 			      int sF,int sU,const FermionField &in, FermionField &out); | ||||
|       | ||||
|      int DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, | ||||
| 				 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 				 int sF,int sU,const FermionField &in, FermionField &out); | ||||
|  | ||||
|      WilsonKernels(const ImplParams &p= ImplParams()); | ||||
|       | ||||
|       // S-direction is INNERMOST and takes no part in the parity. | ||||
|       static int AsmOpt;  // these are a temporary hack | ||||
|       static int HandOpt; // these are a temporary hack | ||||
|     }; | ||||
|  | ||||
|   } | ||||
| } | ||||
|     template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {  | ||||
|     public: | ||||
|  | ||||
|       INHERIT_IMPL_TYPES(Impl); | ||||
|       typedef FermionOperator<Impl> Base; | ||||
|       | ||||
|     public: | ||||
|  | ||||
|       template <bool EnableBool = true> | ||||
|       typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type | ||||
| 	DiracOptDhopSite( | ||||
| 			 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 			 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 			 int sF, int sU, int Ls, int Ns, const FermionField &in, | ||||
| 			 FermionField &out) { | ||||
| #ifdef AVX512 | ||||
| 	if (AsmOpt) { | ||||
| 	  WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, | ||||
| 						   in, out); | ||||
|  | ||||
| 	} else { | ||||
| #else | ||||
| 	  { | ||||
| #endif | ||||
| 	    for (int site = 0; site < Ns; site++) { | ||||
| 	      for (int s = 0; s < Ls; s++) { | ||||
| 		if (HandOpt) | ||||
| 		  WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, | ||||
| 							    in, out); | ||||
| 		else | ||||
| 		  WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, | ||||
| 							       in, out); | ||||
| 		sF++; | ||||
| 	      } | ||||
| 	      sU++; | ||||
| 	    } | ||||
| 	  } | ||||
| 	} | ||||
|  | ||||
| 	template <bool EnableBool = true> | ||||
| 	  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type | ||||
| 	  DiracOptDhopSite( | ||||
| 			   StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 			   std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 			   int sF, int sU, int Ls, int Ns, const FermionField &in, | ||||
| 			   FermionField &out) { | ||||
| 	  for (int site = 0; site < Ns; site++) { | ||||
| 	    for (int s = 0; s < Ls; s++) { | ||||
| 	      WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, | ||||
| 							   out); | ||||
| 	      sF++; | ||||
| 	    } | ||||
| 	    sU++; | ||||
| 	  } | ||||
| 	} | ||||
|  | ||||
| 	template <bool EnableBool = true> | ||||
| 	  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool, | ||||
| 				  void>::type | ||||
| 	  DiracOptDhopSiteDag( | ||||
| 			      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 			      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 			      int sF, int sU, int Ls, int Ns, const FermionField &in, | ||||
| 			      FermionField &out) { | ||||
| #ifdef AVX512 | ||||
| 				    if (AsmOpt) { | ||||
| 				      WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls, | ||||
| 										  Ns, in, out); | ||||
| 				    } else { | ||||
| #else | ||||
| 				      { | ||||
| #endif | ||||
| 					for (int site = 0; site < Ns; site++) { | ||||
| 					  for (int s = 0; s < Ls; s++) { | ||||
| 					    if (HandOpt) | ||||
| 					      WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU, | ||||
| 											   in, out); | ||||
| 					    else | ||||
| 					      WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, | ||||
| 											      sU, in, out); | ||||
| 					    sF++; | ||||
| 					  } | ||||
| 					  sU++; | ||||
| 					} | ||||
| 				      } | ||||
| 				    } | ||||
|  | ||||
| 				    template <bool EnableBool = true> | ||||
| 				      typename std::enable_if< | ||||
| 				      (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, | ||||
| 				      void>::type | ||||
| 				      DiracOptDhopSiteDag( | ||||
| 							  StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 							  std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 							  int sF, int sU, int Ls, int Ns, const FermionField &in, | ||||
| 							  FermionField &out) { | ||||
| 					for (int site = 0; site < Ns; site++) { | ||||
| 					  for (int s = 0; s < Ls; s++) { | ||||
| 					    WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU, | ||||
| 											    in, out); | ||||
| 					    sF++; | ||||
| 					  } | ||||
| 					  sU++; | ||||
| 					} | ||||
| 				      } | ||||
|  | ||||
| 				    void DiracOptDhopDir( | ||||
| 							 StencilImpl &st, DoubledGaugeField &U, | ||||
| 							 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 							 int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, | ||||
| 							 int gamma); | ||||
|  | ||||
| 	private: | ||||
| 				    // Specialised variants | ||||
| 				    void DiracOptGenericDhopSite( | ||||
| 								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 								 int sF, int sU, const FermionField &in, FermionField &out); | ||||
|  | ||||
| 				    void DiracOptGenericDhopSiteDag( | ||||
| 								    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 								    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 								    int sF, int sU, const FermionField &in, FermionField &out); | ||||
|  | ||||
| 				    void DiracOptAsmDhopSite( | ||||
| 							     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 							     std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 							     int sF, int sU, int Ls, int Ns, const FermionField &in, | ||||
| 							     FermionField &out); | ||||
|  | ||||
| 				    void DiracOptAsmDhopSiteDag( | ||||
| 								StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 								std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 								int sF, int sU, int Ls, int Ns, const FermionField &in, | ||||
| 								FermionField &out); | ||||
|  | ||||
| 				    void DiracOptHandDhopSite( | ||||
| 							      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 							      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 							      int sF, int sU, const FermionField &in, FermionField &out); | ||||
|  | ||||
| 				    void DiracOptHandDhopSiteDag( | ||||
| 								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, | ||||
| 								 int sF, int sU, const FermionField &in, FermionField &out); | ||||
|  | ||||
| 	public: | ||||
| 				    WilsonKernels(const ImplParams &p = ImplParams()); | ||||
| 				  }; | ||||
|      | ||||
|       } | ||||
|     } | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
|     /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|  | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
| @@ -24,239 +26,124 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid.h> | ||||
| #if defined(AVX512)  | ||||
| //#if defined (IMCI) | ||||
|  | ||||
| #include <simd/Intel512wilson.h> | ||||
|  | ||||
| #include <simd/Intel512single.h> | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, | ||||
| 						   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 					       int ss,int sU,const FermionField &in, FermionField &out) | ||||
| { | ||||
|   uint64_t  now; | ||||
|   uint64_t first ; | ||||
|   int offset,local,perm, ptype; | ||||
|   const SiteHalfSpinor *pbuf = & buf[0]; | ||||
|   const SiteSpinor   *plocal = & in._odata[0]; | ||||
|   void *pf; | ||||
|   int osites = in._grid->oSites(); | ||||
|  | ||||
|    | ||||
|   StencilEntry *SE; | ||||
|  | ||||
|   //#define STAMP(i) timers[i] = cyclecount() ;  | ||||
| #define STAMP(i) //timers[i] = cyclecount() ;  | ||||
|  | ||||
|   MASK_REGS; | ||||
|  | ||||
|   first = cyclecount(); | ||||
|  | ||||
|   SE=st.GetEntry(ptype,Xm,ss); | ||||
|  | ||||
|   // Xm | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   perm   = SE->_permute; | ||||
|  | ||||
|   // Prefetch | ||||
|   SE=st.GetEntry(ptype,Ym,ss); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|    | ||||
|   if ( local ) { | ||||
|     XP_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR3; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFXM(Xm,pf); | ||||
|   } | ||||
|   XP_RECON; | ||||
|  | ||||
|   // Ym | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   perm   = SE->_permute; | ||||
|  | ||||
|   // Prefetch | ||||
|   SE=st.GetEntry(ptype,Zm,ss); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|    | ||||
|   if ( local ) { | ||||
|     YP_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR2; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFYM(Ym,pf); | ||||
|   } | ||||
|   YP_RECON_ACCUM; | ||||
|  | ||||
|   // Zm | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   perm   = SE->_permute; | ||||
|  | ||||
|   // Prefetch | ||||
|   SE=st.GetEntry(ptype,Tm,ss); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|  | ||||
|   if ( local ) { | ||||
|     ZP_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR1; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFZM(Zm,pf); | ||||
|   } | ||||
|   ZP_RECON_ACCUM; | ||||
|  | ||||
|   // Tm | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   perm   = SE->_permute; | ||||
|    | ||||
|   SE=st.GetEntry(ptype,Tp,ss); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|  | ||||
|  | ||||
|   if ( local ) { | ||||
|     TP_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR0; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFTM(Tm,pf); | ||||
|   } | ||||
|   TP_RECON_ACCUM; | ||||
|  | ||||
|   // Tp | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   perm   = SE->_permute; | ||||
|  | ||||
|   // Prefetch | ||||
|   SE=st.GetEntry(ptype,Zp,ss); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|    | ||||
|   if ( local ) { | ||||
|     TM_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR0; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFTP(Tp,pf); | ||||
|   } | ||||
|   TM_RECON_ACCUM; | ||||
|  | ||||
|   // Zp | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   perm   = SE->_permute; | ||||
|  | ||||
|   // Prefetch | ||||
|   SE=st.GetEntry(ptype,Yp,ss); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|  | ||||
|   if ( local ) { | ||||
|     ZM_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR1; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFZP(Zp,pf); | ||||
|   } | ||||
|   ZM_RECON_ACCUM; | ||||
|  | ||||
|  | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   perm   = SE->_permute; | ||||
|  | ||||
|   // Prefetch | ||||
|   SE=st.GetEntry(ptype,Xp,ss); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|    | ||||
|   if ( local ) { | ||||
|     YM_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR2; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFYP(Yp,pf); | ||||
|   } | ||||
|   YM_RECON_ACCUM; | ||||
|  | ||||
|   // Xp | ||||
|   perm   = SE->_permute; | ||||
|   offset = SE->_offset; | ||||
|   local  = SE->_is_local; | ||||
|   namespace QCD { | ||||
|      | ||||
|   // Prefetch | ||||
|   SE=st.GetEntry(ptype,Xm,(ss+1)%osites); | ||||
|   if (SE->_is_local) pf=(void *)&plocal[SE->_offset]; | ||||
|   else               pf=(void *)&pbuf[SE->_offset]; | ||||
|  | ||||
|   if ( local ) { | ||||
|     XM_PROJMEM(&plocal[offset]); | ||||
|     if ( perm) { | ||||
|       PERMUTE_DIR3; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | ||||
|     /////////////////////////////////////////////////////////// | ||||
|     // Default to no assembler implementation | ||||
|     /////////////////////////////////////////////////////////// | ||||
|     template<class Impl> | ||||
|       void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, | ||||
|                              std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
|                              int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) | ||||
|     { | ||||
|       assert(0); | ||||
|     } | ||||
|     template<class Impl> | ||||
|       void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, | ||||
|                                 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
|                                 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) | ||||
|     { | ||||
|       assert(0); | ||||
|     } | ||||
|   } else {  | ||||
|     LOAD_CHI(&pbuf[offset]); | ||||
|   } | ||||
|   { | ||||
|     MULT_2SPIN_DIR_PFXP(Xp,pf); | ||||
|   } | ||||
|   XM_RECON_ACCUM; | ||||
|  | ||||
|  debug: | ||||
|   SAVE_RESULT(&out._odata[ss]); | ||||
|  | ||||
|  | ||||
| #if defined(AVX512)  | ||||
|      | ||||
|      | ||||
|     /////////////////////////////////////////////////////////// | ||||
|     // If we are AVX512 specialise the single precision routine | ||||
|     /////////////////////////////////////////////////////////// | ||||
|      | ||||
| #include <simd/Intel512wilson.h> | ||||
| #include <simd/Intel512single.h> | ||||
|      | ||||
|     static Vector<vComplexF> signs; | ||||
|      | ||||
|     int setupSigns(void ){ | ||||
|       Vector<vComplexF> bother(2); | ||||
|       signs = bother; | ||||
|       vrsign(signs[0]); | ||||
|       visign(signs[1]); | ||||
|       return 1; | ||||
|     } | ||||
|     static int signInit = setupSigns(); | ||||
|    | ||||
| #define label(A)  ilabel(A) | ||||
| #define ilabel(A) ".globl\n"  #A ":\n"  | ||||
|    | ||||
| #define MAYBEPERM(A,perm) if (perm) { A ; } | ||||
| #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) | ||||
| #define FX(A) WILSONASM_ ##A | ||||
|    | ||||
| #undef KERNEL_DAG | ||||
|     template<> | ||||
|     void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, | ||||
| 							 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) | ||||
| #include <qcd/action/fermion/WilsonKernelsAsmBody.h> | ||||
|        | ||||
| #define KERNEL_DAG | ||||
|     template<> | ||||
|     void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, | ||||
| 							    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) | ||||
| #include <qcd/action/fermion/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| #undef VMOVIDUP | ||||
| #undef VMOVRDUP | ||||
| #undef MAYBEPERM | ||||
| #undef MULT_2SPIN | ||||
| #undef FX  | ||||
| #define FX(A) DWFASM_ ## A | ||||
| #define MAYBEPERM(A,B)  | ||||
| #define VMOVIDUP(A,B,C)                                  VBCASTIDUPf(A,B,C) | ||||
| #define VMOVRDUP(A,B,C)                                  VBCASTRDUPf(A,B,C) | ||||
| #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) | ||||
| 				     | ||||
| #undef KERNEL_DAG | ||||
|     template<> | ||||
|     void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, | ||||
| 								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 								  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) | ||||
| #include <qcd/action/fermion/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| #define KERNEL_DAG | ||||
|     template<> | ||||
|     void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, | ||||
| 								     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | ||||
| 								     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) | ||||
| #include <qcd/action/fermion/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| #endif | ||||
|  | ||||
|  | ||||
| #define INSTANTIATE_ASM(A)\ | ||||
| template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ | ||||
|                                    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\ | ||||
|                                   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ | ||||
| template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ | ||||
|                                    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\ | ||||
|                                   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ | ||||
|  | ||||
|  | ||||
| INSTANTIATE_ASM(WilsonImplF); | ||||
| INSTANTIATE_ASM(WilsonImplD); | ||||
| INSTANTIATE_ASM(ZWilsonImplF); | ||||
| INSTANTIATE_ASM(ZWilsonImplD); | ||||
| INSTANTIATE_ASM(GparityWilsonImplF); | ||||
| INSTANTIATE_ASM(GparityWilsonImplD); | ||||
| INSTANTIATE_ASM(DomainWallVec5dImplF); | ||||
| INSTANTIATE_ASM(DomainWallVec5dImplD); | ||||
| INSTANTIATE_ASM(ZDomainWallVec5dImplF); | ||||
| INSTANTIATE_ASM(ZDomainWallVec5dImplD); | ||||
|   } | ||||
| } | ||||
|  | ||||
|   template class WilsonKernels<WilsonImplF>;		 | ||||
|   template class WilsonKernels<WilsonImplD>;  | ||||
|   template class WilsonKernels<GparityWilsonImplF>; | ||||
|   template class WilsonKernels<GparityWilsonImplD>; | ||||
|   template class WilsonKernels<DomainWallRedBlack5dImplF>; | ||||
|   template class WilsonKernels<DomainWallRedBlack5dImplD>; | ||||
| }} | ||||
| #endif | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user