mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 10:09:34 +01:00 
			
		
		
		
	Compare commits
	
		
			431 Commits
		
	
	
		
			feature/bg
			...
			feature/la
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 9e56c65730 | ||
|  | ef4f2b8c41 | ||
|  | e8b95bd35b | ||
|  | 7e35286860 | ||
|  | 0486ff8e79 | ||
|  | e9cc21900f | ||
|  | 0a8faac271 | ||
|  | abc4de0fd2 | ||
|  | cfe3cd76d1 | ||
|  | 3fa5e3109f | ||
|  | 8b7049f737 | ||
|  | c85024683e | ||
|  | 1300b0b04b | ||
|  | e6d984b484 | ||
|  | 1d18d95d4f | ||
|  | ae39ec85a3 | ||
|  | b96daf53a0 | ||
|  | 46879e1658 | ||
|  | ae4de94798 | ||
|  | 0ab555b4f5 | ||
|  | 8e9be9f84f | ||
|  | d572170170 | ||
|  | 12ccc73cf5 | ||
|  | e7564f8330 | ||
|  | 91199a8ea0 | ||
|  | 0494feec98 | ||
|  | a16b1e134e | ||
|  | 769ad578f5 | ||
|  | eaac0044b5 | ||
|  | 56042f002c | ||
|  | 3bfd1f13e6 | ||
|  | 70ab598c96 | ||
|  | 1d0ca65e28 | ||
|  | 2bc4d0a20e | ||
|  | 092dcd4e04 | ||
|  | 4a8c4ccfba | ||
|  | 9b44189d5a | ||
|  | 7da4856e8e | ||
|  | aaf1e33a77 | ||
|  | 094c3d091a | ||
|  | 4b98e524a0 | ||
|  | 1a1f6d55f9 | ||
|  | 21421656ab | ||
|  | 6f687a67cd | ||
|  | b30754e762 | ||
|  | 1e429a0d57 | ||
|  | d38a4de36c | ||
|  | ef1b7db374 | ||
|  | 53a9aeb965 | ||
|  | e30fa9f4b8 | ||
|  | 58e8d0a10d | ||
|  | 62cf9cf638 | ||
|  | 0fb458879d | ||
|  | 725c513d94 | ||
| d8648307ff | |||
| 064315c00b | |||
|  | 7c6cc85df6 | ||
|  | a6691ef87c | ||
|  | 8e0ced627a | ||
|  | 0de314870d | ||
|  | ffb91e53d2 | ||
|  | f4e8bf2858 | ||
| a74c34315c | |||
|  | 69470ccc10 | ||
|  | b8b5934193 | ||
|  | 75856f2945 | ||
|  | 3c112a7a25 | ||
|  | ab3596d4d3 | ||
|  | a8c10b1933 | ||
|  | 15e801af3f | ||
|  | 0ffc235741 | ||
|  | 8e19c99c7d | ||
|  | a0bc0ad06f | ||
|  | a8fb2835ca | ||
|  | bc862ce3ab | ||
|  | 3267683e22 | ||
|  | f46a67ffb3 | ||
|  | f7b8383ef5 | ||
|  | 10f2872aae | ||
|  | cd73897b8d | ||
|  | 7a8f6af5f8 | ||
|  | 49a5d9bac7 | ||
|  | 2b3fdd4a58 | ||
|  | 34502ec471 | ||
|  | 8a43e88b4f | ||
|  | 238df20370 | ||
|  | 97a32a6145 | ||
|  | 655492a443 | ||
|  | 1cab06f6bd | ||
|  | f8024c262b | ||
|  | 4cc5f01f4a | ||
|  | 9c12c37aaf | ||
|  | 806eaa0530 | ||
|  | 01d0e54594 | ||
|  | 5aafa335fe | ||
|  | 8ba0494485 | ||
|  | d99d98d9fd | ||
|  | 95a017a4ae | ||
|  | 92f92379e6 | ||
|  | 529e78d43f | ||
|  | 4ec746d262 | ||
|  | 66d819c054 | ||
|  | 3f3686f869 | ||
|  | 26bb829f8c | ||
|  | 67cb04fc66 | ||
|  | a40bd68aed | ||
|  | 36495e0fd2 | ||
|  | 93f6c15772 | ||
|  | cb93eeff21 | ||
|  | c7cc7e6101 | ||
|  | c349aa6511 | ||
|  | 3bae0a2d5c | ||
|  | c1c7566089 | ||
|  | 2439999ec8 | ||
|  | 1d96f662e3 | ||
|  | 41d1889941 | ||
|  | 0c3981e0c3 | ||
|  | c727bd4609 | ||
|  | db23749b67 | ||
|  | 751f2b9703 | ||
|  | 697c0603ce | ||
|  | 14bedebb11 | ||
|  | 47b5c07ffb | ||
|  | da86a2bf54 | ||
|  | c1cb60a0b3 | ||
|  | 5ed5b4bfbf | ||
|  | de84aacdfd | ||
|  | 2888003765 | ||
|  | da06bf5b95 | ||
|  | 20999c1370 | ||
|  | 33f0ed1a33 | ||
|  | 50be56433b | ||
|  | 43924007db | ||
|  | 78ef10e60f | ||
| 679ae98b14 | |||
|  | 90f6bc16bb | ||
|  | 9b5b639546 | ||
|  | 945767c6d8 | ||
|  | 422cdf4979 | ||
|  | 38db174f3b | ||
|  | 92e364a35f | ||
| 58299b8ba2 | |||
| 124bf4d829 | |||
| e8e56b3414 | |||
| 89c430136d | |||
| ea9aef7baa | |||
| c9e9e8061d | |||
|  | 453cf2a1c6 | ||
|  | de7bbfa5f9 | ||
| dda8d77c87 | |||
| aa29f4346a | |||
|  | 86116dbed6 | ||
|  | 7bd31e3f7c | ||
|  | 74f451715f | ||
|  | 655be8ed76 | ||
|  | 4063238943 | ||
|  | 3344788fa1 | ||
|  | 99220f6531 | ||
|  | 2a6d093749 | ||
|  | c947947fad | ||
|  | f555b50547 | ||
|  | 738c1a11c2 | ||
|  | f8797e1e3e | ||
|  | fd1eb7de13 | ||
|  | 2ce898efa3 | ||
|  | ab66bac4e6 | ||
|  | 56277a11c8 | ||
|  | 916e9e1d3e | ||
|  | 5b55867a7a | ||
|  | 3accb1ef89 | ||
|  | e3d0e31525 | ||
|  | 5812eb8a8c | ||
|  | 4dd3763294 | ||
|  | c429ace748 | ||
|  | ac58565d0a | ||
|  | 3703b718aa | ||
|  | b722889234 | ||
|  | abba44a837 | ||
|  | f301be94ce | ||
|  | 1d1b225497 | ||
|  | 53a785a3dd | ||
|  | 736bf3c866 | ||
|  | b9bbe5d188 | ||
|  | 3844bcf800 | ||
|  | e1a2319d01 | ||
|  | 180c732b4c | ||
|  | 957a706d0b | ||
|  | d2312e9874 | ||
|  | fc4ab9ccd5 | ||
|  | 4a340aa5ca | ||
|  | 3b7de792d5 | ||
|  | 557c3fa109 | ||
|  | ec18e9f7f6 | ||
|  | a839d5bc55 | ||
|  | de41b84c5c | ||
|  | 8e161152e4 | ||
|  | 3141ebac10 | ||
|  | 7ede696126 | ||
|  | bf516c3b81 | ||
|  | 441a52ee5d | ||
|  | a8db024c92 | ||
|  | a9c22d5f43 | ||
|  | 3ca41458a3 | ||
|  | 9e2d29c644 | ||
|  | 951be75292 | ||
|  | b9113ed310 | ||
|  | 42fb49d3fd | ||
|  | 2a54c9aaab | ||
|  | 0957378679 | ||
|  | 2ed6c76fc5 | ||
|  | d3b9a7fa14 | ||
|  | 75ea306ce9 | ||
|  | 4226c633c4 | ||
|  | 5a4eafbf7e | ||
|  | eb8e26018b | ||
|  | db5ea001a3 | ||
|  | 2846f079e5 | ||
|  | 1d502e4ed6 | ||
|  | 73cdf0fffe | ||
|  | 1c25773319 | ||
|  | c38400b26f | ||
|  | 9c3065b860 | ||
|  | 94eb829d08 | ||
|  | 68392ddb5b | ||
|  | cb6b81ae82 | ||
| 90ec6eda0c | |||
| fe8d625694 | |||
| 53e76b41d2 | |||
| 8ef4300412 | |||
| 98a24ebf31 | |||
|  | b12dc89d26 | ||
|  | d80d802f9d | ||
|  | 3d99b09dba | ||
|  | db5f6d3ae3 | ||
|  | 683550f116 | ||
|  | 5e477ec553 | ||
|  | 55d0329624 | ||
|  | 86aaa35294 | ||
|  | 172d3dc93a | ||
|  | 8c540333d5 | ||
|  | 5592f7b8c1 | ||
|  | 35da4ece0b | ||
|  | 061b15b9e9 | ||
| ff4e54ef80 | |||
|  | 561426f6eb | ||
|  | 83f6fab8fa | ||
|  | 0fade84ab2 | ||
|  | 9dc7ca4c3b | ||
|  | 935d82f5b1 | ||
|  | 9cbcdd65d7 | ||
|  | f18f5ed926 | ||
|  | d1d63a4f2d | ||
|  | 7e5faa0f34 | ||
|  | 6af459cae4 | ||
|  | 1c4bc7ed38 | ||
|  | cd1bd921bd | ||
|  | fff5751b1a | ||
|  | 2c81696fdd | ||
|  | c9dc22efa1 | ||
|  | 0ab04a000f | ||
|  | 93ea5d9468 | ||
|  | 1ec5d32369 | ||
|  | 9fd23faadf | ||
|  | 10e4fa0dc8 | ||
|  | c4aca1dde4 | ||
|  | b9e8ea3aaa | ||
|  | 077aa728b9 | ||
|  | a8d83d886e | ||
|  | 7fd46eeec4 | ||
|  | e0c4eeb3ec | ||
|  | cb9a297a0a | ||
|  | 2b115929dc | ||
|  | 5c6571dab1 | ||
|  | 4c1ea8677e | ||
|  | 120fb59978 | ||
|  | fd56b3ff38 | ||
|  | 0ec6829edc | ||
|  | 18b7845b7b | ||
|  | 3d0fe15374 | ||
|  | 91886068fe | ||
|  | 6d1e9e5f92 | ||
|  | b640230b1e | ||
|  | b3dede4dd3 | ||
|  | 4e34132f4d | ||
|  | c07cb10247 | ||
|  | d7767a2a62 | ||
|  | ec035983fd | ||
|  | 596dcd85b2 | ||
|  | 7270c6a150 | ||
|  | f8b9ad7d50 | ||
|  | 04a1959895 | ||
|  | 93cc270016 | ||
|  | 29b60f7e1a | ||
|  | 902afcfbaf | ||
|  | 97a6b61551 | ||
|  | f011bdb869 | ||
|  | bafb101e4f | ||
|  | 08fdf05528 | ||
|  | 9e72a6b22e | ||
|  | 1c12c5612c | ||
|  | a8193c4bcb | ||
|  | c3d7ec65fa | ||
|  | 8b6a6c8236 | ||
|  | e0571c872b | ||
|  | c67f41887b | ||
|  | 84687ccf1f | ||
|  | 3274561cf8 | ||
| e08fbb3771 | |||
|  | d7464aa0fe | ||
|  | 00d29153f0 | ||
| 2ce989f220 | |||
|  | d7a1dc85be | ||
|  | fc19503673 | ||
|  | beba824136 | ||
|  | 6ebf8b12b6 | ||
|  | e5a7ed4362 | ||
|  | b9f7ea47c3 | ||
|  | 06f7ee202e | ||
|  | 2b2fc6453f | ||
|  | bdd2765461 | ||
|  | 4a45c06dd7 | ||
|  | d6a7d7d1e0 | ||
|  | 1a122a0dd8 | ||
|  | 20e20733e8 | ||
|  | b7cd1a19e3 | ||
|  | f510002a62 | ||
|  | 1e257a1251 | ||
|  | 522f6bf91a | ||
|  | d35d87d2c2 | ||
|  | 74a5cda84b | ||
|  | 5be05d85b8 | ||
|  | 35ac85aea8 | ||
|  | fa237401ff | ||
|  | 97053adcb5 | ||
|  | f8fbe4d7a3 | ||
|  | ef31c012bf | ||
|  | 9e9f621d5d | ||
|  | 651e1a7cbc | ||
|  | c4d3672720 | ||
|  | 16be6d378c | ||
|  | f05d0565aa | ||
|  | 9bf4108d1f | ||
|  | 6929a84c70 | ||
|  | 5c779a789b | ||
|  | e863a948e3 | ||
|  | 977f34dca6 | ||
|  | 90ad956340 | ||
|  | 7996f06335 | ||
|  | 7b40a3e3e5 | ||
|  | f7fbbaaca3 | ||
|  | 17629b8d9e | ||
|  | 0baa20d292 | ||
|  | 4571c918a4 | ||
|  | 5251ea4d30 | ||
|  | 7f456b4173 | ||
|  | c291ef77b5 | ||
|  | 7dd2764bb2 | ||
|  | 244f8fb6dc | ||
|  | 27dfe816fa | ||
|  | af29be2c90 | ||
|  | f96fac0aee | ||
|  | 07f2ebea1b | ||
|  | 851f2ad8ef | ||
|  | 23e0561dd6 | ||
|  | 8ae1a95ec6 | ||
|  | 82b7d4eaf0 | ||
|  | 78774fbdc0 | ||
|  | 924130833e | ||
|  | 0157274762 | ||
|  | 87e8aad5a0 | ||
|  | c6f59c2933 | ||
|  | b7f90aa011 | ||
|  | f22b79da8f | ||
|  | 3855673ebf | ||
|  | 4db82da0db | ||
|  | 0cdc3d2fa5 | ||
|  | 0dfda4bb90 | ||
|  | 1189ebc8b5 | ||
|  | 1bb8578173 | ||
|  | 5214846341 | ||
|  | ce1a115e0b | ||
|  | 0bd296dda4 | ||
|  | af0ccdd8e9 | ||
|  | 2fb92dbc6e | ||
|  | 5c74b6028b | ||
|  | e0be2b6e6c | ||
|  | ef72f322d2 | ||
|  | 7bc2065113 | ||
|  | 2bd4233919 | ||
|  | 143c70e29f | ||
|  | b812d5e39c | ||
|  | 01480da0a8 | ||
|  | 62749d05a6 | ||
|  | 3834feb4b7 | ||
|  | 454302414d | ||
|  | 6f8b771a37 | ||
|  | 4e1ffdd17c | ||
|  | a783282b8b | ||
|  | 19b85d8486 | ||
|  | 1d666771f9 | ||
|  | d50055cd96 | ||
|  | 47c7159177 | ||
|  | f415db583a | ||
|  | f55c16f984 | ||
|  | df67e013ca | ||
|  | 3e990c9d0a | ||
|  | 4b740fc8fd | ||
|  | cccd14b09e | ||
|  | e6acffdfc2 | ||
|  | 392130a537 | ||
|  | deef2673b2 | ||
|  | 977b0a6dd9 | ||
|  | 977d844394 | ||
|  | 590675e2ca | ||
|  | 8c65bdf6d3 | ||
|  | 74f1ed3bc5 | ||
|  | 79270ef510 | ||
|  | e250e6b7bb | ||
|  | 261342c15f | ||
|  | eda4dd622e | ||
|  | c68a2b9637 | ||
|  | 293df6cd20 | ||
|  | 65f61bb3bf | ||
|  | 26b9740d53 | ||
|  | 6eb873dd96 | ||
|  | 11b4c80b27 | ||
|  | c065e454c3 | ||
|  | d9b5fbd374 | ||
|  | cfbc1a26b8 | ||
|  | 257f69f931 | ||
|  | e415260961 | 
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -92,6 +92,7 @@ build*/* | |||||||
| ##################### | ##################### | ||||||
| *.xcodeproj/* | *.xcodeproj/* | ||||||
| build.sh | build.sh | ||||||
|  | .vscode | ||||||
|  |  | ||||||
| # Eigen source # | # Eigen source # | ||||||
| ################ | ################ | ||||||
| @@ -106,6 +107,10 @@ lib/fftw/* | |||||||
| m4/lt* | m4/lt* | ||||||
| m4/libtool.m4 | m4/libtool.m4 | ||||||
|  |  | ||||||
|  | # github pages # | ||||||
|  | ################ | ||||||
|  | gh-pages/ | ||||||
|  |  | ||||||
| # Buck files # | # Buck files # | ||||||
| ############## | ############## | ||||||
| .buck* | .buck* | ||||||
| @@ -117,3 +122,4 @@ make-bin-BUCK.sh | |||||||
| ##################### | ##################### | ||||||
| lib/qcd/spin/gamma-gen/*.h | lib/qcd/spin/gamma-gen/*.h | ||||||
| lib/qcd/spin/gamma-gen/*.cc | lib/qcd/spin/gamma-gen/*.cc | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										26
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -7,9 +7,11 @@ cache: | |||||||
| matrix: | matrix: | ||||||
|   include: |   include: | ||||||
|     - os:        osx |     - os:        osx | ||||||
|       osx_image: xcode7.2 |       osx_image: xcode8.3 | ||||||
|       compiler: clang |       compiler: clang | ||||||
|     - compiler: gcc |     - compiler: gcc | ||||||
|  |       dist: trusty | ||||||
|  |       sudo: required | ||||||
|       addons: |       addons: | ||||||
|         apt: |         apt: | ||||||
|           sources: |           sources: | ||||||
| @@ -24,6 +26,8 @@ matrix: | |||||||
|             - binutils-dev |             - binutils-dev | ||||||
|       env: VERSION=-4.9 |       env: VERSION=-4.9 | ||||||
|     - compiler: gcc |     - compiler: gcc | ||||||
|  |       dist: trusty | ||||||
|  |       sudo: required | ||||||
|       addons: |       addons: | ||||||
|         apt: |         apt: | ||||||
|           sources: |           sources: | ||||||
| @@ -38,6 +42,7 @@ matrix: | |||||||
|             - binutils-dev |             - binutils-dev | ||||||
|       env: VERSION=-5 |       env: VERSION=-5 | ||||||
|     - compiler: clang |     - compiler: clang | ||||||
|  |       dist: trusty | ||||||
|       addons: |       addons: | ||||||
|         apt: |         apt: | ||||||
|           sources: |           sources: | ||||||
| @@ -52,6 +57,7 @@ matrix: | |||||||
|             - binutils-dev |             - binutils-dev | ||||||
|       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz |       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||||
|     - compiler: clang |     - compiler: clang | ||||||
|  |       dist: trusty | ||||||
|       addons: |       addons: | ||||||
|         apt: |         apt: | ||||||
|           sources: |           sources: | ||||||
| @@ -73,13 +79,15 @@ before_install: | |||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi |  | ||||||
|      |      | ||||||
| install: | install: | ||||||
|     - export CC=$CC$VERSION |     - export CC=$CC$VERSION | ||||||
|     - export CXX=$CXX$VERSION |     - export CXX=$CXX$VERSION | ||||||
|     - echo $PATH |     - echo $PATH | ||||||
|  |     - which autoconf | ||||||
|  |     - autoconf  --version | ||||||
|  |     - which automake | ||||||
|  |     - automake  --version | ||||||
|     - which $CC |     - which $CC | ||||||
|     - $CC  --version |     - $CC  --version | ||||||
|     - which $CXX |     - which $CXX | ||||||
| @@ -92,15 +100,15 @@ script: | |||||||
|     - cd build |     - cd build | ||||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4  |     - make -j4  | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 |     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||||
|     - echo make clean |     - echo make clean | ||||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4 |     - make -j4 | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 |     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||||
|  |     - make check | ||||||
|     - echo make clean |     - echo make clean | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi | ||||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi | ||||||
|     - make -j4 |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							| @@ -3,10 +3,15 @@ SUBDIRS = lib benchmarks tests extras | |||||||
|  |  | ||||||
| include $(top_srcdir)/doxygen.inc | include $(top_srcdir)/doxygen.inc | ||||||
|  |  | ||||||
| tests: all | bin_SCRIPTS=grid-config | ||||||
| 	$(MAKE) -C tests tests |  | ||||||
|  |  | ||||||
| .PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) |  | ||||||
|  | .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) | ||||||
|  |  | ||||||
|  | tests-local: all | ||||||
|  | bench-local: all | ||||||
|  | check-local: all | ||||||
|  |  | ||||||
| AM_CXXFLAGS += -I$(top_builddir)/include | AM_CXXFLAGS += -I$(top_builddir)/include | ||||||
|  |  | ||||||
| ACLOCAL_AMFLAGS = -I m4 | ACLOCAL_AMFLAGS = -I m4 | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								README.md
									
									
									
									
									
								
							| @@ -22,6 +22,26 @@ Last update Nov 2016. | |||||||
|  |  | ||||||
| _Please do not send pull requests to the `master` branch which is reserved for releases._ | _Please do not send pull requests to the `master` branch which is reserved for releases._ | ||||||
|  |  | ||||||
|  | ### Compilers | ||||||
|  |  | ||||||
|  | Intel ICPC v16.0.3 and later | ||||||
|  |  | ||||||
|  | Clang v3.5 and later (need 3.8 and later for OpenMP) | ||||||
|  |  | ||||||
|  | GCC   v4.9.x (recommended) | ||||||
|  |  | ||||||
|  | GCC   v6.3 and later | ||||||
|  |  | ||||||
|  | ### Important:  | ||||||
|  |  | ||||||
|  | Some versions of GCC appear to have a bug under high optimisation (-O2, -O3). | ||||||
|  |  | ||||||
|  | The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates. | ||||||
|  |  | ||||||
|  | GCC   v5.x | ||||||
|  |  | ||||||
|  | GCC   v6.1, v6.2 | ||||||
|  |  | ||||||
| ### Bug report | ### Bug report | ||||||
|  |  | ||||||
| _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | ||||||
| @@ -32,7 +52,7 @@ When you file an issue, please go though the following checklist: | |||||||
| 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | ||||||
| 3. Give the exact `configure` command used. | 3. Give the exact `configure` command used. | ||||||
| 4. Attach `config.log`. | 4. Attach `config.log`. | ||||||
| 5. Attach `config.summary`. | 5. Attach `grid.config.summary`. | ||||||
| 6. Attach the output of `make V=1`. | 6. Attach the output of `make V=1`. | ||||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||||
|  |  | ||||||
| @@ -95,10 +115,10 @@ install Grid. Other options are detailed in the next section, you can also use ` | |||||||
| `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | ||||||
| customise the build. | customise the build. | ||||||
|  |  | ||||||
| Finally, you can build and install Grid: | Finally, you can build, check, and install Grid: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| make; make install | make; make check; make install | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | ||||||
| @@ -121,7 +141,7 @@ If you want to build all the tests at once just use `make tests`. | |||||||
| - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | ||||||
| - `--enable-precision={single|double}`: set the default precision (default: `double`). | - `--enable-precision={single|double}`: set the default precision (default: `double`). | ||||||
| - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | ||||||
| - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). | - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). | ||||||
| - `--disable-timers`: disable system dependent high-resolution timers. | - `--disable-timers`: disable system dependent high-resolution timers. | ||||||
| - `--enable-chroma`: enable Chroma regression tests. | - `--enable-chroma`: enable Chroma regression tests. | ||||||
| - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | ||||||
| @@ -159,7 +179,6 @@ Alternatively, some CPU codenames can be directly used: | |||||||
|  |  | ||||||
| | `<code>`    | Description                            | | | `<code>`    | Description                            | | ||||||
| | ----------- | -------------------------------------- | | | ----------- | -------------------------------------- | | ||||||
| | `KNC`       | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | |  | ||||||
| | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | ||||||
| | `BGQ`       | Blue Gene/Q                            | | | `BGQ`       | Blue Gene/Q                            | | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										68
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										68
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,6 +1,33 @@ | |||||||
| TODO: | TODO: | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
|  | Large item work list: | ||||||
|  | 1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O | ||||||
|  |  | ||||||
|  | 2)- Christoph's local basis expansion Lanczos | ||||||
|  | 3)- BG/Q port and check | ||||||
|  | 4)- Precision conversion and sort out localConvert      <-- partial | ||||||
|  |   - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet | ||||||
|  | 5)- Physical propagator interface | ||||||
|  | 6)- Conserved currents | ||||||
|  | 7)- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||||
|  | 8)- HDCR resume | ||||||
|  |  | ||||||
|  | Recent DONE  | ||||||
|  | -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE | ||||||
|  | -- GaugeFix into central location                      <-- DONE | ||||||
|  | -- Scidac and Ildg metadata handling                   <-- DONE | ||||||
|  | -- Binary I/O MPI2 IO                                  <-- DONE | ||||||
|  | -- Binary I/O speed up & x-strips                      <-- DONE | ||||||
|  | -- Cut down the exterior overhead                      <-- DONE | ||||||
|  | -- Interior legs from SHM comms                        <-- DONE | ||||||
|  | -- Half-precision comms                                <-- DONE | ||||||
|  | -- Merge high precision reduction into develop         <-- DONE | ||||||
|  | -- BlockCG, BCGrQ                                      <-- DONE | ||||||
|  | -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE | ||||||
|  |    -- slice* linalg routines for multiRHS, BlockCG     | ||||||
|  |  | ||||||
|  | ----- | ||||||
| * Forces; the UdSdU  term in gauge force term is half of what I think it should | * Forces; the UdSdU  term in gauge force term is half of what I think it should | ||||||
|   be. This is a consequence of taking ONLY the first term in: |   be. This is a consequence of taking ONLY the first term in: | ||||||
|  |  | ||||||
| @@ -21,16 +48,8 @@ TODO: | |||||||
|   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. |   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. | ||||||
|   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. |   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. | ||||||
|  |  | ||||||
|  |  | ||||||
| Policies: |  | ||||||
|  |  | ||||||
| * Link smearing/boundary conds; Policy class based implementation ; framework more in place |  | ||||||
|  |  | ||||||
| * Support different boundary conditions (finite temp, chem. potential ... ) | * Support different boundary conditions (finite temp, chem. potential ... ) | ||||||
|  |  | ||||||
| * Support different fermion representations?  |  | ||||||
|   - contained entirely within the integrator presently |  | ||||||
|  |  | ||||||
| - Sign of force term. | - Sign of force term. | ||||||
|  |  | ||||||
| - Reversibility test. | - Reversibility test. | ||||||
| @@ -41,11 +60,6 @@ Policies: | |||||||
|  |  | ||||||
| - Audit oIndex usage for cb behaviour | - Audit oIndex usage for cb behaviour | ||||||
|  |  | ||||||
| - Rectangle gauge actions. |  | ||||||
|   Iwasaki, |  | ||||||
|   Symanzik, |  | ||||||
|   ... etc... |  | ||||||
|  |  | ||||||
| - Prepare multigrid for HMC. - Alternate setup schemes. | - Prepare multigrid for HMC. - Alternate setup schemes. | ||||||
|  |  | ||||||
| - Support for ILDG --- ugly, not done | - Support for ILDG --- ugly, not done | ||||||
| @@ -55,9 +69,11 @@ Policies: | |||||||
| - FFTnD ? | - FFTnD ? | ||||||
|  |  | ||||||
| - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | ||||||
|  |  | ||||||
| - Gparity force term; Gparity (R)HMC. | - Gparity force term; Gparity (R)HMC. | ||||||
| - Random number state save restore |  | ||||||
| - Mobius implementation clean up to rmove #if 0 stale code sequences | - Mobius implementation clean up to rmove #if 0 stale code sequences | ||||||
|  |  | ||||||
| - CG -- profile carefully, kernel fusion, whole CG performance measurements. | - CG -- profile carefully, kernel fusion, whole CG performance measurements. | ||||||
|  |  | ||||||
| ================================================================ | ================================================================ | ||||||
| @@ -90,6 +106,7 @@ Insert/Extract | |||||||
| Not sure of status of this -- reverify. Things are working nicely now though. | Not sure of status of this -- reverify. Things are working nicely now though. | ||||||
|  |  | ||||||
| * Make the Tensor types and Complex etc... play more nicely. | * Make the Tensor types and Complex etc... play more nicely. | ||||||
|  |  | ||||||
|   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > |   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > | ||||||
|     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I |     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I | ||||||
|     want to introduce a syntax that does not require this. |     want to introduce a syntax that does not require this. | ||||||
| @@ -112,6 +129,8 @@ Not sure of status of this -- reverify. Things are working nicely now though. | |||||||
| RECENT | RECENT | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
|  |   - Support different fermion representations? -- DONE | ||||||
|  |   - contained entirely within the integrator presently | ||||||
|   - Clean up HMC                                                             -- DONE |   - Clean up HMC                                                             -- DONE | ||||||
|   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE |   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE | ||||||
|   - Simplified the integrators a bit.                                        -- DONE |   - Simplified the integrators a bit.                                        -- DONE | ||||||
| @@ -123,6 +142,26 @@ RECENT | |||||||
|   - Parallel io improvements                                  -- DONE |   - Parallel io improvements                                  -- DONE | ||||||
|   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE |   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE | ||||||
|  |  | ||||||
|  |  | ||||||
|  | DONE: | ||||||
|  | - MultiArray -- MultiRHS done | ||||||
|  | - ConjugateGradientMultiShift -- DONE | ||||||
|  | - MCR                         -- DONE | ||||||
|  | - Remez -- Mike or Boost?     -- DONE | ||||||
|  | - Proto (ET)                  -- DONE | ||||||
|  | - uBlas                       -- DONE ; Eigen | ||||||
|  | - Potentially Useful Boost libraries -- DONE ; Eigen | ||||||
|  | - Aligned allocator; memory pool -- DONE | ||||||
|  | - Multiprecision              -- DONE | ||||||
|  | - Serialization               -- DONE | ||||||
|  | - Regex -- Not needed | ||||||
|  | - Tokenize -- Why? | ||||||
|  |  | ||||||
|  | - Random number state save restore -- DONE | ||||||
|  | - Rectangle gauge actions. -- DONE | ||||||
|  |   Iwasaki, | ||||||
|  |   Symanzik, | ||||||
|  |   ... etc... | ||||||
| Done: Cayley, Partial , ContFrac force terms. | Done: Cayley, Partial , ContFrac force terms. | ||||||
|  |  | ||||||
| DONE | DONE | ||||||
| @@ -207,6 +246,7 @@ Done | |||||||
| FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | ||||||
| ====================================================================================================== | ====================================================================================================== | ||||||
|  |  | ||||||
|  | * Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE | ||||||
| * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | ||||||
|   user pass these? Is this a QCD specific? |   user pass these? Is this a QCD specific? | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								VERSION
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								VERSION
									
									
									
									
									
								
							| @@ -1,6 +1,5 @@ | |||||||
| Version : 0.6.0 | Version : 0.7.0 | ||||||
|  |  | ||||||
| - AVX512, AVX2, AVX, SSE good | - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended | ||||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above | - MPI and MPI3 comms optimisations for KNL and OPA finished | ||||||
| - MPI and MPI3 | - Half precision comms | ||||||
| - HiRep, Smearing, Generic gauge group |  | ||||||
|   | |||||||
| @@ -31,6 +31,32 @@ using namespace std; | |||||||
| using namespace Grid; | using namespace Grid; | ||||||
| using namespace Grid::QCD; | using namespace Grid::QCD; | ||||||
|  |  | ||||||
|  | struct time_statistics{ | ||||||
|  |   double mean; | ||||||
|  |   double err; | ||||||
|  |   double min; | ||||||
|  |   double max; | ||||||
|  |  | ||||||
|  |   void statistics(std::vector<double> v){ | ||||||
|  |       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||||
|  |       mean = sum / v.size(); | ||||||
|  |  | ||||||
|  |       std::vector<double> diff(v.size()); | ||||||
|  |       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||||
|  |       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||||
|  |       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||||
|  |  | ||||||
|  |       auto result = std::minmax_element(v.begin(), v.end()); | ||||||
|  |       min = *result.first; | ||||||
|  |       max = *result.second; | ||||||
|  | } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | void header(){ | ||||||
|  |   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||||
|  |             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||||
|  | }; | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| @@ -40,15 +66,19 @@ int main (int argc, char ** argv) | |||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   int Nloop=10; |   int Nloop=100; | ||||||
|   int nmu=0; |   int nmu=0; | ||||||
|  |   int maxlat=24; | ||||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; | ||||||
|  |   std::vector<double> t_time(Nloop); | ||||||
|  |   time_statistics timestat; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|   int maxlat=24; |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
|  |  | ||||||
| @@ -58,6 +88,9 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
| @@ -65,8 +98,8 @@ int main (int argc, char ** argv) | |||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  |       double start=usecond(); | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| @@ -102,18 +135,24 @@ int main (int argc, char ** argv) | |||||||
| 	} | 	} | ||||||
| 	Grid.SendToRecvFromComplete(requests); | 	Grid.SendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  |  | ||||||
|       } |  | ||||||
| 	double stop=usecond(); | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
|  |       } | ||||||
|  |  | ||||||
|       double dbytes    = bytes; |       timestat.statistics(t_time); | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |  | ||||||
|  |       double dbytes    = bytes*ppn; | ||||||
|  |       double xbytes    = dbytes*2.0*ncomm; | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       double time = stop-start; // microseconds |       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
| @@ -121,8 +160,7 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|  |  | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
| @@ -130,6 +168,9 @@ int main (int argc, char ** argv) | |||||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); |       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||||
| @@ -138,8 +179,8 @@ int main (int argc, char ** argv) | |||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  |       double start=usecond(); | ||||||
|      |      | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| @@ -178,27 +219,34 @@ int main (int argc, char ** argv) | |||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       double stop=usecond(); |       timestat.statistics(t_time); | ||||||
|        |        | ||||||
|       double dbytes    = bytes; |       double dbytes    = bytes*ppn; | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |       double xbytes    = dbytes*2.0*ncomm; | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       double time = stop-start; |     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |  | ||||||
|        |        | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }   |   }   | ||||||
|  |  | ||||||
|  |  | ||||||
|   Nloop=10; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
| @@ -209,6 +257,9 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -216,27 +267,33 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double dbytes; | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  | 	double start=usecond(); | ||||||
|  |  | ||||||
|  | 	dbytes=0; | ||||||
|  | 	ncomm=0; | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| 	ncomm=0; |  | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { | 	  if (mpi_layout[mu]>1 ) { | ||||||
| 	   | 	   | ||||||
| 	    ncomm++; | 	    ncomm++; | ||||||
| 	    int comm_proc=1; | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     |  | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu][0], | 					      (void *)&xbuf[mu][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| @@ -247,6 +304,7 @@ int main (int argc, char ** argv) | |||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu+4][0], | 					      (void *)&xbuf[mu+4][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| @@ -258,28 +316,34 @@ int main (int argc, char ** argv) | |||||||
| 	} | 	} | ||||||
| 	Grid.StencilSendToRecvFromComplete(requests); | 	Grid.StencilSendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  |  | ||||||
|       } |  | ||||||
| 	double stop=usecond(); | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
| 	 | 	 | ||||||
|       double dbytes    = bytes; |       } | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |  | ||||||
|       double rbytes    = xbytes; |       timestat.statistics(t_time); | ||||||
|       double bidibytes = xbytes+rbytes; |  | ||||||
|  |       dbytes=dbytes*ppn; | ||||||
|  |       double xbytes    = dbytes*0.5; | ||||||
|  |       double rbytes    = dbytes*0.5; | ||||||
|  |       double bidibytes = dbytes; | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |  | ||||||
|       double time = stop-start; // microseconds |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   Nloop=100; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=32;Ls*=2){ | ||||||
| @@ -290,6 +354,9 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -297,16 +364,18 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |       double dbytes; | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  | 	double start=usecond(); | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  | 	dbytes=0; | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
| @@ -318,6 +387,7 @@ int main (int argc, char ** argv) | |||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu][0], | 					      (void *)&xbuf[mu][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| @@ -330,6 +400,7 @@ int main (int argc, char ** argv) | |||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu+4][0], | 					      (void *)&xbuf[mu+4][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| @@ -342,18 +413,26 @@ int main (int argc, char ** argv) | |||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
| 	 | 	 | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |  | ||||||
|  |  | ||||||
|       double dbytes    = bytes; |       timestat.statistics(t_time); | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |  | ||||||
|       double rbytes    = xbytes; |  | ||||||
|       double bidibytes = xbytes+rbytes; |  | ||||||
|  |  | ||||||
|       double time = stop-start; // microseconds |       dbytes=dbytes*ppn; | ||||||
|  |       double xbytes    = dbytes*0.5; | ||||||
|  |       double rbytes    = dbytes*0.5; | ||||||
|  |       double bidibytes = dbytes; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|   |   | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,28 +1,22 @@ | |||||||
|  /************************************************************************************* |  /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_dwf.cc |     Source file: ./benchmarks/Benchmark_dwf.cc | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or |     the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. |     (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |     This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |     You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| @@ -151,9 +145,7 @@ int main (int argc, char ** argv) | |||||||
|   RealD M5  =1.8; |   RealD M5  =1.8; | ||||||
|  |  | ||||||
|   RealD NP = UGrid->_Nprocessors; |   RealD NP = UGrid->_Nprocessors; | ||||||
|  |   RealD NN = UGrid->NodeCount(); | ||||||
|   std::cout << GridLogMessage << "Creating action operator " << std::endl; |  | ||||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; |   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; | ||||||
| @@ -163,16 +155,22 @@ int main (int argc, char ** argv) | |||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|  |  | ||||||
|  |   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|   int ncall =1000; |   int ncall =1000; | ||||||
|   if (1) { |   if (1) { | ||||||
|     FGrid->Barrier(); |     FGrid->Barrier(); | ||||||
|     Dw.ZeroCounters(); |     Dw.ZeroCounters(); | ||||||
|     Dw.Dhop(src,result,0); |     Dw.Dhop(src,result,0); | ||||||
|  |     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; | ||||||
|     double t0=usecond(); |     double t0=usecond(); | ||||||
|     for(int i=0;i<ncall;i++){ |     for(int i=0;i<ncall;i++){ | ||||||
|       __SSC_START; |       __SSC_START; | ||||||
| @@ -190,6 +188,7 @@ int main (int argc, char ** argv) | |||||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|     err = ref-result;  |     err = ref-result;  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
| @@ -206,6 +205,34 @@ int main (int argc, char ** argv) | |||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |   if (1) { | ||||||
|  |     FGrid->Barrier(); | ||||||
|  |     DwH.ZeroCounters(); | ||||||
|  |     DwH.Dhop(src,result,0); | ||||||
|  |     double t0=usecond(); | ||||||
|  |     for(int i=0;i<ncall;i++){ | ||||||
|  |       __SSC_START; | ||||||
|  |       DwH.Dhop(src,result,0); | ||||||
|  |       __SSC_STOP; | ||||||
|  |     } | ||||||
|  |     double t1=usecond(); | ||||||
|  |     FGrid->Barrier(); | ||||||
|  |      | ||||||
|  |     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||||
|  |     double flops=1344*volume*ncall; | ||||||
|  |  | ||||||
|  |     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|  |     err = ref-result;  | ||||||
|  |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
|  |     assert (norm2(err)< 1.0e-3 ); | ||||||
|  |     DwH.Report(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { |   { | ||||||
|  |  | ||||||
| @@ -214,6 +241,10 @@ int main (int argc, char ** argv) | |||||||
|     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -245,6 +276,7 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; |     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; | ||||||
|     sDw.Report(); |     sDw.Report(); | ||||||
|     RealD sum=0; |     RealD sum=0; | ||||||
| @@ -277,6 +309,10 @@ int main (int argc, char ** argv) | |||||||
|       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  | ||||||
| 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  | ||||||
| @@ -316,6 +352,7 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << "sDeo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|       sDw.Report(); |       sDw.Report(); | ||||||
|  |  | ||||||
|       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); |       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||||
| @@ -394,14 +431,15 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|  |  | ||||||
|   // S-direction is INNERMOST and takes no part in the parity. |   // S-direction is INNERMOST and takes no part in the parity. | ||||||
|   static int Opt;  // these are a temporary hack |  | ||||||
|   static int Comms;  // these are a temporary hack |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; |   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -422,6 +460,7 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|   Dw.DhopEO(src_o,r_e,DaggerNo); |   Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
| @@ -453,3 +492,4 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -66,7 +66,8 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     Vec tsum; tsum = zero; |     Vec tsum; tsum = zero; | ||||||
|  |  | ||||||
|     GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |     GridParallelRNG          pRNG(&Grid);       | ||||||
|  |     pRNG.SeedFixedIntegers(std::vector<int>({56,17,89,101})); | ||||||
|  |  | ||||||
|     std::vector<double> stop(threads); |     std::vector<double> stop(threads); | ||||||
|     Vector<Vec> sum(threads); |     Vector<Vec> sum(threads); | ||||||
|   | |||||||
| @@ -55,8 +55,8 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|   uint64_t lmax=44; |   uint64_t lmax=64; | ||||||
| #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | #define NLOOP (100*lmax*lmax*lmax*lmax/vol) | ||||||
|   for(int lat=4;lat<=lmax;lat+=4){ |   for(int lat=4;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
| @@ -65,7 +65,7 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
| @@ -100,7 +100,7 @@ int main (int argc, char ** argv) | |||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
| @@ -138,7 +138,7 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
| @@ -173,7 +173,7 @@ int main (int argc, char ** argv) | |||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid); //random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid); //random(pRNG,x); | ||||||
|       LatticeVec y(&Grid); //random(pRNG,y); |       LatticeVec y(&Grid); //random(pRNG,y); | ||||||
|   | |||||||
| @@ -51,7 +51,7 @@ int main (int argc, char ** argv) | |||||||
|   std::vector<int> seeds({1,2,3,4}); |   std::vector<int> seeds({1,2,3,4}); | ||||||
|   GridParallelRNG          pRNG(&Grid); |   GridParallelRNG          pRNG(&Grid); | ||||||
|   pRNG.SeedFixedIntegers(seeds); |   pRNG.SeedFixedIntegers(seeds); | ||||||
|   //  pRNG.SeedRandomDevice(); |   //  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|   typedef typename ImprovedStaggeredFermionR::FermionField FermionField;  |   typedef typename ImprovedStaggeredFermionR::FermionField FermionField;  | ||||||
|   typename ImprovedStaggeredFermionR::ImplParams params;  |   typename ImprovedStaggeredFermionR::ImplParams params;  | ||||||
|   | |||||||
| @@ -35,8 +35,9 @@ using namespace Grid::QCD; | |||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
|  | #define LMAX (64) | ||||||
|  |  | ||||||
|   int Nloop=1000; |   int Nloop=20; | ||||||
|  |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
| @@ -50,12 +51,12 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid);// random(pRNG,z); |       LatticeColourMatrix z(&Grid);// random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid);// random(pRNG,x); |       LatticeColourMatrix x(&Grid);// random(pRNG,x); | ||||||
| @@ -82,13 +83,13 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
| @@ -113,13 +114,13 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
| @@ -144,13 +145,13 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice(); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); |       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); |       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||||
|   | |||||||
| @@ -69,7 +69,7 @@ int main (int argc, char ** argv) | |||||||
|   std::vector<int> seeds({1,2,3,4}); |   std::vector<int> seeds({1,2,3,4}); | ||||||
|   GridParallelRNG          pRNG(&Grid); |   GridParallelRNG          pRNG(&Grid); | ||||||
|   pRNG.SeedFixedIntegers(seeds); |   pRNG.SeedFixedIntegers(seeds); | ||||||
|   //  pRNG.SeedRandomDevice(); |   //  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||||
|  |  | ||||||
|   LatticeFermion src   (&Grid); random(pRNG,src); |   LatticeFermion src   (&Grid); random(pRNG,src); | ||||||
|   LatticeFermion result(&Grid); result=zero; |   LatticeFermion result(&Grid); result=zero; | ||||||
|   | |||||||
| @@ -1,11 +1,7 @@ | |||||||
| include Make.inc | include Make.inc | ||||||
|  |  | ||||||
| simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o | bench-local: all | ||||||
|  | 	./Benchmark_su3 | ||||||
| EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a | 	./Benchmark_memory_bandwidth | ||||||
|  | 	./Benchmark_wilson | ||||||
| libsimple_su3_test_a_SOURCES = simple_su3_test.cc | 	./Benchmark_dwf --dslash-unroll | ||||||
|  |  | ||||||
| libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc |  | ||||||
|  |  | ||||||
| libsimple_simd_test_a_SOURCES = simple_simd_test.cc |  | ||||||
| @@ -1,6 +1,6 @@ | |||||||
| #!/usr/bin/env bash | #!/usr/bin/env bash | ||||||
|  |  | ||||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' | EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | ||||||
|  |  | ||||||
| echo "-- deploying Eigen source..." | echo "-- deploying Eigen source..." | ||||||
| wget ${EIGEN_URL} --no-check-certificate | wget ${EIGEN_URL} --no-check-certificate | ||||||
|   | |||||||
							
								
								
									
										122
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										122
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -1,16 +1,19 @@ | |||||||
| AC_PREREQ([2.63]) | AC_PREREQ([2.63]) | ||||||
| AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) | AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) | ||||||
| AC_CANONICAL_BUILD | AC_CANONICAL_BUILD | ||||||
| AC_CANONICAL_HOST | AC_CANONICAL_HOST | ||||||
| AC_CANONICAL_TARGET | AC_CANONICAL_TARGET | ||||||
| AM_INIT_AUTOMAKE(subdir-objects) | AM_INIT_AUTOMAKE([subdir-objects 1.13]) | ||||||
|  | AM_EXTRA_RECURSIVE_TARGETS([tests bench]) | ||||||
| AC_CONFIG_MACRO_DIR([m4]) | AC_CONFIG_MACRO_DIR([m4]) | ||||||
| AC_CONFIG_SRCDIR([lib/Grid.h]) | AC_CONFIG_SRCDIR([lib/Grid.h]) | ||||||
| AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | ||||||
| m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||||
|  |  | ||||||
|  | ################ Get git info | ||||||
|  | #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) | ||||||
|  |  | ||||||
| ############### Checks for programs | ############### Checks for programs | ||||||
| CXXFLAGS="-O3 $CXXFLAGS" |  | ||||||
| AC_PROG_CXX | AC_PROG_CXX | ||||||
| AC_PROG_RANLIB | AC_PROG_RANLIB | ||||||
|  |  | ||||||
| @@ -24,6 +27,9 @@ AX_GXX_VERSION | |||||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||||
|       [version of g++ that will compile the code]) |       [version of g++ that will compile the code]) | ||||||
|  |  | ||||||
|  | CXXFLAGS="-g $CXXFLAGS" | ||||||
|  |  | ||||||
|  |  | ||||||
| ############### Checks for typedefs, structures, and compiler characteristics | ############### Checks for typedefs, structures, and compiler characteristics | ||||||
| AC_TYPE_SIZE_T | AC_TYPE_SIZE_T | ||||||
| AC_TYPE_UINT32_T | AC_TYPE_UINT32_T | ||||||
| @@ -67,6 +73,13 @@ AC_ARG_WITH([fftw], | |||||||
|             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] |             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] | ||||||
|             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) |             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
|  | ############### LIME | ||||||
|  | AC_ARG_WITH([lime], | ||||||
|  |             [AS_HELP_STRING([--with-lime=prefix], | ||||||
|  |             [try this for a non-standard install prefix of the LIME library])], | ||||||
|  |             [AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"] | ||||||
|  |             [AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
| ############### lapack | ############### lapack | ||||||
| AC_ARG_ENABLE([lapack], | AC_ARG_ENABLE([lapack], | ||||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], |     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], | ||||||
| @@ -83,6 +96,18 @@ case ${ac_LAPACK} in | |||||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; |         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; | ||||||
| esac | esac | ||||||
|  |  | ||||||
|  | ############### FP16 conversions | ||||||
|  | AC_ARG_ENABLE([sfw-fp16], | ||||||
|  |     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], | ||||||
|  |     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) | ||||||
|  | case ${ac_SFW_FP16} in | ||||||
|  |     yes) | ||||||
|  |       AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);; | ||||||
|  |     no);; | ||||||
|  |     *) | ||||||
|  |       AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);; | ||||||
|  | esac | ||||||
|  |  | ||||||
| ############### MKL | ############### MKL | ||||||
| AC_ARG_ENABLE([mkl], | AC_ARG_ENABLE([mkl], | ||||||
|     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], |     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], | ||||||
| @@ -152,6 +177,18 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3], | |||||||
|                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] |                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] | ||||||
|                [have_fftw=true]) |                [have_fftw=true]) | ||||||
|  |  | ||||||
|  | AC_SEARCH_LIBS([limeCreateReader], [lime], | ||||||
|  |                [AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])] | ||||||
|  |                [have_lime=true], | ||||||
|  | 	       [AC_MSG_WARN(C-LIME library was not found in your system. | ||||||
|  | In order to use ILGG file format please install or provide the correct path to your installation | ||||||
|  | Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) | ||||||
|  |  | ||||||
|  | AC_SEARCH_LIBS([crc32], [z], | ||||||
|  |                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] | ||||||
|  |                [have_zlib=true], | ||||||
|  | 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] |                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||||
|                [have_hdf5=true] |                [have_hdf5=true] | ||||||
| @@ -176,19 +213,26 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|     case ${ac_SIMD} in |     case ${ac_SIMD} in | ||||||
|       SSE4) |       SSE4) | ||||||
|         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) |         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) | ||||||
|  | 	case ${ac_SFW_FP16} in | ||||||
|  | 	  yes) | ||||||
| 	  SIMD_FLAGS='-msse4.2';; | 	  SIMD_FLAGS='-msse4.2';; | ||||||
|  | 	  no) | ||||||
|  | 	  SIMD_FLAGS='-msse4.2 -mf16c';; | ||||||
|  | 	  *) | ||||||
|  |           AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);; | ||||||
|  | 	esac;; | ||||||
|       AVX) |       AVX) | ||||||
|         AC_DEFINE([AVX1],[1],[AVX intrinsics]) |         AC_DEFINE([AVX1],[1],[AVX intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx';; |         SIMD_FLAGS='-mavx -mf16c';; | ||||||
|       AVXFMA4) |       AVXFMA4) | ||||||
|         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) |         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) | ||||||
|         SIMD_FLAGS='-mavx -mfma4';; |         SIMD_FLAGS='-mavx -mfma4 -mf16c';; | ||||||
|       AVXFMA) |       AVXFMA) | ||||||
|         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) |         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) | ||||||
|         SIMD_FLAGS='-mavx -mfma';; |         SIMD_FLAGS='-mavx -mfma -mf16c';; | ||||||
|       AVX2) |       AVX2) | ||||||
|         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) |         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx2 -mfma';; |         SIMD_FLAGS='-mavx2 -mfma -mf16c';; | ||||||
|       AVX512) |       AVX512) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; |         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; | ||||||
| @@ -321,7 +365,7 @@ AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | |||||||
| ############### RNG selection | ############### RNG selection | ||||||
| AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ | AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ | ||||||
| 	            [Select Random Number Generator to be used])],\ | 	            [Select Random Number Generator to be used])],\ | ||||||
| 	            [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) | 	            [ac_RNG=${enable_rng}],[ac_RNG=sitmo]) | ||||||
|  |  | ||||||
| case ${ac_RNG} in | case ${ac_RNG} in | ||||||
|      ranlux48) |      ranlux48) | ||||||
| @@ -384,32 +428,31 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg]) | |||||||
|  |  | ||||||
| ############### Ouput | ############### Ouput | ||||||
| cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | ||||||
|  | GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" | ||||||
|  | GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS" | ||||||
|  | GRID_LIBS=$LIBS | ||||||
|  | GRID_SHORT_SHA=`git rev-parse --short HEAD` | ||||||
|  | GRID_SHA=`git rev-parse HEAD` | ||||||
|  | GRID_BRANCH=`git rev-parse --abbrev-ref HEAD` | ||||||
| AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | ||||||
| AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | ||||||
| AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | ||||||
| AC_SUBST([AM_CFLAGS]) | AC_SUBST([AM_CFLAGS]) | ||||||
| AC_SUBST([AM_CXXFLAGS]) | AC_SUBST([AM_CXXFLAGS]) | ||||||
| AC_SUBST([AM_LDFLAGS]) | AC_SUBST([AM_LDFLAGS]) | ||||||
| AC_CONFIG_FILES(Makefile) | AC_SUBST([GRID_CXXFLAGS]) | ||||||
| AC_CONFIG_FILES(lib/Makefile) | AC_SUBST([GRID_LDFLAGS]) | ||||||
| AC_CONFIG_FILES(tests/Makefile) | AC_SUBST([GRID_LIBS]) | ||||||
| AC_CONFIG_FILES(tests/IO/Makefile) | AC_SUBST([GRID_SHA]) | ||||||
| AC_CONFIG_FILES(tests/core/Makefile) | AC_SUBST([GRID_BRANCH]) | ||||||
| AC_CONFIG_FILES(tests/debug/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/forces/Makefile) | git_commit=`cd $srcdir && ./scripts/configure.commit` | ||||||
| AC_CONFIG_FILES(tests/hadrons/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/hmc/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/solver/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) |  | ||||||
| AC_CONFIG_FILES(benchmarks/Makefile) |  | ||||||
| AC_CONFIG_FILES(extras/Makefile) |  | ||||||
| AC_CONFIG_FILES(extras/Hadrons/Makefile) |  | ||||||
| AC_OUTPUT |  | ||||||
|  |  | ||||||
| echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
| Summary of configuration for $PACKAGE v$VERSION | Summary of configuration for $PACKAGE v$VERSION | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  | ----- GIT VERSION ------------------------------------- | ||||||
|  | $git_commit | ||||||
| ----- PLATFORM ---------------------------------------- | ----- PLATFORM ---------------------------------------- | ||||||
| architecture (build)        : $build_cpu | architecture (build)        : $build_cpu | ||||||
| os (build)                  : $build_os | os (build)                  : $build_os | ||||||
| @@ -422,10 +465,12 @@ SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | |||||||
| Threading                   : ${ac_openmp} | Threading                   : ${ac_openmp} | ||||||
| Communications type         : ${comms_type} | Communications type         : ${comms_type} | ||||||
| Default precision           : ${ac_PRECISION} | Default precision           : ${ac_PRECISION} | ||||||
|  | Software FP16 conversion    : ${ac_SFW_FP16} | ||||||
| RNG choice                  : ${ac_RNG} | RNG choice                  : ${ac_RNG} | ||||||
| GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||||
| LAPACK                      : ${ac_LAPACK} | LAPACK                      : ${ac_LAPACK} | ||||||
| FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | LIME (ILDG support)         : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi` | ||||||
| HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | ||||||
| build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | ||||||
| ----- BUILD FLAGS ------------------------------------- | ----- BUILD FLAGS ------------------------------------- | ||||||
| @@ -435,7 +480,32 @@ LDFLAGS: | |||||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
| LIBS: | LIBS: | ||||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
| -------------------------------------------------------" > config.summary | -------------------------------------------------------" > grid.configure.summary | ||||||
|  |  | ||||||
|  | GRID_SUMMARY="`cat grid.configure.summary`" | ||||||
|  | AM_SUBST_NOTMAKE([GRID_SUMMARY]) | ||||||
|  | AC_SUBST([GRID_SUMMARY]) | ||||||
|  |  | ||||||
|  | AC_CONFIG_FILES([grid-config], [chmod +x grid-config]) | ||||||
|  | AC_CONFIG_FILES(Makefile) | ||||||
|  | AC_CONFIG_FILES(lib/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/IO/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/core/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/debug/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/forces/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/hadrons/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/hmc/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/solver/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/smearing/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/testu01/Makefile) | ||||||
|  | AC_CONFIG_FILES(benchmarks/Makefile) | ||||||
|  | AC_CONFIG_FILES(extras/Makefile) | ||||||
|  | AC_CONFIG_FILES(extras/Hadrons/Makefile) | ||||||
|  | AC_OUTPUT | ||||||
|  |  | ||||||
| echo "" | echo "" | ||||||
| cat config.summary | cat grid.configure.summary | ||||||
| echo "" | echo "" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -162,7 +162,8 @@ void Application::saveParameterFile(const std::string parameterFileName) | |||||||
| sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | ||||||
|  |  | ||||||
| #define DEFINE_MEMPEAK \ | #define DEFINE_MEMPEAK \ | ||||||
| auto memPeak = [this](const std::vector<unsigned int> &program)\ | GeneticScheduler<unsigned int>::ObjFunc memPeak = \ | ||||||
|  | [this](const std::vector<unsigned int> &program)\ | ||||||
| {\ | {\ | ||||||
|     unsigned int memPeak;\ |     unsigned int memPeak;\ | ||||||
|     bool         msg;\ |     bool         msg;\ | ||||||
|   | |||||||
| @@ -145,6 +145,15 @@ std::string typeName(void) | |||||||
|     return typeName(typeIdPt<T>()); |     return typeName(typeIdPt<T>()); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // default writers/readers | ||||||
|  | #ifdef HAVE_HDF5 | ||||||
|  | typedef Hdf5Reader CorrReader; | ||||||
|  | typedef Hdf5Writer CorrWriter; | ||||||
|  | #else | ||||||
|  | typedef XmlReader CorrReader; | ||||||
|  | typedef XmlWriter CorrWriter; | ||||||
|  | #endif | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Global_hpp_ | #endif // Hadrons_Global_hpp_ | ||||||
|   | |||||||
| @@ -29,12 +29,20 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Point.hpp> | #include <Grid/Hadrons/Modules/MSource/Point.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MSource/Wall.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | ||||||
| #include <Grid/Hadrons/Modules/Quark.hpp> | #include <Grid/Hadrons/Modules/Quark.hpp> | ||||||
|   | |||||||
| @@ -48,7 +48,8 @@ public: | |||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     unsigned int, Ls, |                                     unsigned int, Ls, | ||||||
|                                     double      , mass, |                                     double      , mass, | ||||||
|                                     double      , M5); |                                     double      , M5, | ||||||
|  |                                     std::string , boundary); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| @@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void) | |||||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " |                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" |                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|  |     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||||
|  |                  << std::endl; | ||||||
|     env().createGrid(par().Ls); |     env().createGrid(par().Ls); | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &g4     = *env().getGrid(); |     auto &g4     = *env().getGrid(); | ||||||
|     auto &grb4   = *env().getRbGrid(); |     auto &grb4   = *env().getRbGrid(); | ||||||
|     auto &g5     = *env().getGrid(par().Ls); |     auto &g5     = *env().getGrid(par().Ls); | ||||||
|     auto &grb5   = *env().getRbGrid(par().Ls); |     auto &grb5   = *env().getRbGrid(par().Ls); | ||||||
|  |     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||||
|  |     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); | ||||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, |     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||||
|                                                 par().mass, par().M5); |                                                 par().mass, par().M5, | ||||||
|  |                                                 implParams); | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -46,7 +46,8 @@ class WilsonPar: Serializable | |||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     double     , mass); |                                     double     , mass, | ||||||
|  |                                     std::string, boundary); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| @@ -112,10 +113,15 @@ void TWilson<FImpl>::execute() | |||||||
| { | { | ||||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass |     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; |                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||||
|  |     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||||
|  |                  << std::endl; | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &grid   = *env().getGrid(); |     auto &grid   = *env().getGrid(); | ||||||
|     auto &gridRb = *env().getRbGrid(); |     auto &gridRb = *env().getRbGrid(); | ||||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); |     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||||
|  |     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); | ||||||
|  |     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, | ||||||
|  |                                             implParams); | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" |                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" | ||||||
|                  << par().q3 << "'" << std::endl; |                  << par().q3 << "'" << std::endl; | ||||||
|      |      | ||||||
|     XmlWriter             writer(par().output); |     CorrWriter             writer(par().output); | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); |     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); | ||||||
|   | |||||||
							
								
								
									
										144
									
								
								extras/Hadrons/Modules/MContraction/DiscLoop.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								extras/Hadrons/Modules/MContraction/DiscLoop.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_DiscLoop_hpp_ | ||||||
|  | #define Hadrons_DiscLoop_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                                DiscLoop                                    * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | class DiscLoopPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar, | ||||||
|  |                                     std::string,    q_loop, | ||||||
|  |                                     Gamma::Algebra, gamma, | ||||||
|  |                                     std::string,    output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | class TDiscLoop: public Module<DiscLoopPar> | ||||||
|  | { | ||||||
|  |     TYPE_ALIASES(FImpl,); | ||||||
|  |     class Result: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||||
|  |                                         Gamma::Algebra, gamma, | ||||||
|  |                                         std::vector<Complex>, corr); | ||||||
|  |     }; | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TDiscLoop(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TDiscLoop(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                       TDiscLoop implementation                             * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | TDiscLoop<FImpl>::TDiscLoop(const std::string name) | ||||||
|  | : Module<DiscLoopPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TDiscLoop<FImpl>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q_loop}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TDiscLoop<FImpl>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TDiscLoop<FImpl>::setup(void) | ||||||
|  | { | ||||||
|  |      | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TDiscLoop<FImpl>::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing disconnected loop contraction '" << getName()  | ||||||
|  |                  << "' using '" << par().q_loop << "' with " << par().gamma  | ||||||
|  |                  << " insertion." << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter            writer(par().output); | ||||||
|  |     PropagatorField       &q_loop = *env().template getObject<PropagatorField>(par().q_loop); | ||||||
|  |     LatticeComplex        c(env().getGrid()); | ||||||
|  |     Gamma                 gamma(par().gamma); | ||||||
|  |     std::vector<TComplex> buf; | ||||||
|  |     Result                result; | ||||||
|  |  | ||||||
|  |     c = trace(gamma*q_loop); | ||||||
|  |     sliceSum(c, buf, Tp); | ||||||
|  |  | ||||||
|  |     result.gamma = par().gamma; | ||||||
|  |     result.corr.resize(buf.size()); | ||||||
|  |     for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|  |     { | ||||||
|  |         result.corr[t] = TensorRemove(buf[t]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     write(writer, "disc", result); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_DiscLoop_hpp_ | ||||||
							
								
								
									
										170
									
								
								extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_Gamma3pt_hpp_ | ||||||
|  | #define Hadrons_Gamma3pt_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * 3pt contraction with gamma matrix insertion. | ||||||
|  |  * | ||||||
|  |  * Schematic: | ||||||
|  |  * | ||||||
|  |  *             q2           q3 | ||||||
|  |  *        /----<------*------<----¬ | ||||||
|  |  *       /          gamma          \ | ||||||
|  |  *      /                           \ | ||||||
|  |  *   i *                            * f | ||||||
|  |  *      \                          / | ||||||
|  |  *       \                        / | ||||||
|  |  *        \----------->----------/ | ||||||
|  |  *                   q1 | ||||||
|  |  * | ||||||
|  |  *      trace(g5*q1*adj(q2)*g5*gamma*q3) | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                               Gamma3pt                                     * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | class Gamma3ptPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar, | ||||||
|  |                                     std::string,    q1, | ||||||
|  |                                     std::string,    q2, | ||||||
|  |                                     std::string,    q3, | ||||||
|  |                                     Gamma::Algebra, gamma, | ||||||
|  |                                     std::string,    output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | class TGamma3pt: public Module<Gamma3ptPar> | ||||||
|  | { | ||||||
|  |     TYPE_ALIASES(FImpl1, 1); | ||||||
|  |     TYPE_ALIASES(FImpl2, 2); | ||||||
|  |     TYPE_ALIASES(FImpl3, 3); | ||||||
|  |     class Result: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||||
|  |                                         Gamma::Algebra, gamma, | ||||||
|  |                                         std::vector<Complex>, corr); | ||||||
|  |     }; | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TGamma3pt(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TGamma3pt(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                       TGamma3pt implementation                             * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name) | ||||||
|  | : Module<Gamma3ptPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void) | ||||||
|  | { | ||||||
|  |      | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing 3pt contractions '" << getName() << "' using" | ||||||
|  |                  << " quarks '" << par().q1 << "', '" << par().q2 << "' and '" | ||||||
|  |                  << par().q3 << "', with " << par().gamma << " insertion."  | ||||||
|  |                  << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter            writer(par().output); | ||||||
|  |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|  |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|  |     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q3); | ||||||
|  |     LatticeComplex        c(env().getGrid()); | ||||||
|  |     Gamma                 g5(Gamma::Algebra::Gamma5); | ||||||
|  |     Gamma                 gamma(par().gamma); | ||||||
|  |     std::vector<TComplex> buf; | ||||||
|  |     Result                result; | ||||||
|  |  | ||||||
|  |     c = trace(g5*q1*adj(q2)*(g5*gamma)*q3); | ||||||
|  |     sliceSum(c, buf, Tp); | ||||||
|  |  | ||||||
|  |     result.gamma = par().gamma; | ||||||
|  |     result.corr.resize(buf.size()); | ||||||
|  |     for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|  |     { | ||||||
|  |         result.corr[t] = TensorRemove(buf[t]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     write(writer, "gamma3pt", result); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_Gamma3pt_hpp_ | ||||||
| @@ -6,8 +6,10 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |         Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -36,20 +38,39 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   | ||||||
|  |  Meson contractions | ||||||
|  |  ----------------------------- | ||||||
|  |   | ||||||
|  |  * options: | ||||||
|  |  - q1: input propagator 1 (string) | ||||||
|  |  - q2: input propagator 2 (string) | ||||||
|  |  - gammas: gamma products to insert at sink & source, pairs of gamma matrices  | ||||||
|  |            (space-separated strings) in angled brackets (i.e. <g_sink g_src>), | ||||||
|  |            in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>"). | ||||||
|  |  | ||||||
|  |            Special values: "all" - perform all possible contractions. | ||||||
|  |  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."), | ||||||
|  |         given as multiples of (2*pi) / L. | ||||||
|  | */ | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                                TMeson                                       * |  *                                TMeson                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair; | ||||||
|  |  | ||||||
| class MesonPar: Serializable | class MesonPar: Serializable | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, | ||||||
|                                     std::string, q1, |                                     std::string, q1, | ||||||
|                                     std::string, q2, |                                     std::string, q2, | ||||||
|                                     std::string,    output, |                                     std::string, gammas, | ||||||
|                                     Gamma::Algebra, gammaSource, |                                     std::string, mom, | ||||||
|                                     Gamma::Algebra, gammaSink); |                                     std::string, output); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| @@ -61,7 +82,10 @@ public: | |||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr); |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||||
|  |                                         Gamma::Algebra, gamma_snk, | ||||||
|  |                                         Gamma::Algebra, gamma_src, | ||||||
|  |                                         std::vector<Complex>, corr); | ||||||
|     }; |     }; | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
| @@ -71,6 +95,7 @@ public: | |||||||
|     // dependencies/products |     // dependencies/products | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     virtual void parseGammaString(std::vector<GammaPair> &gammaList); | ||||||
|     // execution |     // execution | ||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
| @@ -103,6 +128,31 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | |||||||
|     return output; |     return output; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template <typename FImpl1, typename FImpl2> | ||||||
|  | void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) | ||||||
|  | { | ||||||
|  |     gammaList.clear(); | ||||||
|  |     // Determine gamma matrices to insert at source/sink. | ||||||
|  |     if (par().gammas.compare("all") == 0) | ||||||
|  |     { | ||||||
|  |         // Do all contractions. | ||||||
|  |         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) | ||||||
|  |         { | ||||||
|  |             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) | ||||||
|  |             { | ||||||
|  |                 gammaList.push_back(std::make_pair((Gamma::Algebra)i,  | ||||||
|  |                                                    (Gamma::Algebra)j)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         // Parse individual contractions from input string. | ||||||
|  |         gammaList = strToVec<GammaPair>(par().gammas); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| void TMeson<FImpl1, FImpl2>::execute(void) | void TMeson<FImpl1, FImpl2>::execute(void) | ||||||
| @@ -111,21 +161,44 @@ void TMeson<FImpl1, FImpl2>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" |                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|      |      | ||||||
|     XmlWriter             writer(par().output); |     CorrWriter              writer(par().output); | ||||||
|     PropagatorField1       &q1 = *env().template getObject<PropagatorField1>(par().q1); |     PropagatorField1       &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     PropagatorField2       &q2 = *env().template getObject<PropagatorField2>(par().q2); |     PropagatorField2       &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     LatticeComplex         c(env().getGrid()); |     LatticeComplex         c(env().getGrid()); | ||||||
|     Gamma                 gSrc(par().gammaSource), gSnk(par().gammaSink); |  | ||||||
|     Gamma                  g5(Gamma::Algebra::Gamma5); |     Gamma                  g5(Gamma::Algebra::Gamma5); | ||||||
|  |     std::vector<GammaPair> gammaList; | ||||||
|     std::vector<TComplex>  buf; |     std::vector<TComplex>  buf; | ||||||
|     Result                result; |     std::vector<Result>    result; | ||||||
|  |     std::vector<Real>      p; | ||||||
|  |  | ||||||
|     c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5); |     p  = strToVec<Real>(par().mom); | ||||||
|  |     LatticeComplex         ph(env().getGrid()), coor(env().getGrid()); | ||||||
|  |     Complex                i(0.0,1.0); | ||||||
|  |     ph = zero; | ||||||
|  |     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||||
|  |     { | ||||||
|  |         LatticeCoordinate(coor, mu); | ||||||
|  |         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||||
|  |     } | ||||||
|  |     ph = exp((Real)(2*M_PI)*i*ph); | ||||||
|  |      | ||||||
|  |     parseGammaString(gammaList); | ||||||
|  |  | ||||||
|  |     result.resize(gammaList.size()); | ||||||
|  |     for (unsigned int i = 0; i < result.size(); ++i) | ||||||
|  |     { | ||||||
|  |         Gamma gSnk(gammaList[i].first); | ||||||
|  |         Gamma gSrc(gammaList[i].second); | ||||||
|  |         c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph; | ||||||
|         sliceSum(c, buf, Tp); |         sliceSum(c, buf, Tp); | ||||||
|     result.corr.resize(buf.size()); |  | ||||||
|  |         result[i].gamma_snk = gammaList[i].first; | ||||||
|  |         result[i].gamma_src = gammaList[i].second; | ||||||
|  |         result[i].corr.resize(buf.size()); | ||||||
|         for (unsigned int t = 0; t < buf.size(); ++t) |         for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|         { |         { | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |             result[i].corr[t] = TensorRemove(buf[t]); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										114
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_WeakHamiltonian_hpp_ | ||||||
|  | #define Hadrons_WeakHamiltonian_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakHamiltonian                                    * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | /******************************************************************************* | ||||||
|  |  * Utilities for contractions involving the Weak Hamiltonian. | ||||||
|  |  ******************************************************************************/ | ||||||
|  | //// Sum and store correlator. | ||||||
|  | #define MAKE_DIAG(exp, buf, res, n)\ | ||||||
|  | sliceSum(exp, buf, Tp);\ | ||||||
|  | res.name = (n);\ | ||||||
|  | res.corr.resize(buf.size());\ | ||||||
|  | for (unsigned int t = 0; t < buf.size(); ++t)\ | ||||||
|  | {\ | ||||||
|  |     res.corr[t] = TensorRemove(buf[t]);\ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | //// Contraction of mu index: use 'mu' variable in exp. | ||||||
|  | #define SUM_MU(buf,exp)\ | ||||||
|  | buf = zero;\ | ||||||
|  | for (unsigned int mu = 0; mu < ndim; ++mu)\ | ||||||
|  | {\ | ||||||
|  |     buf += exp;\ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | enum  | ||||||
|  | { | ||||||
|  |   i_V = 0, | ||||||
|  |   i_A = 1, | ||||||
|  |   n_i = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class WeakHamiltonianPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar, | ||||||
|  |                                     std::string, q1, | ||||||
|  |                                     std::string, q2, | ||||||
|  |                                     std::string, q3, | ||||||
|  |                                     std::string, q4, | ||||||
|  |                                     std::string, output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | #define MAKE_WEAK_MODULE(modname)\ | ||||||
|  | class T##modname: public Module<WeakHamiltonianPar>\ | ||||||
|  | {\ | ||||||
|  | public:\ | ||||||
|  |     TYPE_ALIASES(FIMPL,)\ | ||||||
|  |     class Result: Serializable\ | ||||||
|  |     {\ | ||||||
|  |     public:\ | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\ | ||||||
|  |                                         std::string, name,\ | ||||||
|  |                                         std::vector<Complex>, corr);\ | ||||||
|  |     };\ | ||||||
|  | public:\ | ||||||
|  |     /* constructor */ \ | ||||||
|  |     T##modname(const std::string name);\ | ||||||
|  |     /* destructor */ \ | ||||||
|  |     virtual ~T##modname(void) = default;\ | ||||||
|  |     /* dependency relation */ \ | ||||||
|  |     virtual std::vector<std::string> getInput(void);\ | ||||||
|  |     virtual std::vector<std::string> getOutput(void);\ | ||||||
|  |     /* setup */ \ | ||||||
|  |     virtual void setup(void);\ | ||||||
|  |     /* execution */ \ | ||||||
|  |     virtual void execute(void);\ | ||||||
|  |     std::vector<std::string> VA_label = {"V", "A"};\ | ||||||
|  | };\ | ||||||
|  | MODULE_REGISTER_NS(modname, T##modname, MContraction); | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_WeakHamiltonian_hpp_ | ||||||
							
								
								
									
										137
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MContraction; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Weak Hamiltonian current-current contractions, Eye-type. | ||||||
|  |  *  | ||||||
|  |  * These contractions are generated by the Q1 and Q2 operators in the physical | ||||||
|  |  * basis (see e.g. Fig 3 of arXiv:1507.03094). | ||||||
|  |  *  | ||||||
|  |  * Schematics:        q4                 |                   | ||||||
|  |  *                  /-<-¬                |                              | ||||||
|  |  *                 /     \               |             q2           q3 | ||||||
|  |  *                 \     /               |        /----<------*------<----¬                         | ||||||
|  |  *            q2    \   /    q3          |       /          /-*-¬          \ | ||||||
|  |  *       /-----<-----* *-----<----¬      |      /          /     \          \ | ||||||
|  |  *    i *            H_W           * f   |   i *           \     /  q4      * f | ||||||
|  |  *       \                        /      |      \           \->-/          /    | ||||||
|  |  *        \                      /       |       \                        /        | ||||||
|  |  *         \---------->---------/        |        \----------->----------/         | ||||||
|  |  *                   q1                  |                   q1                   | ||||||
|  |  *                                       | | ||||||
|  |  *                Saucer (S)             |                  Eye (E) | ||||||
|  |  *  | ||||||
|  |  * S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2]) | ||||||
|  |  * E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2]) | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                  TWeakHamiltonianEye implementation                        * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name) | ||||||
|  | : Module<WeakHamiltonianPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TWeakHamiltonianEye::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TWeakHamiltonianEye::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianEye::setup(void) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianEye::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"  | ||||||
|  |                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||||
|  |                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||||
|  |                  << "'." << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter             writer(par().output); | ||||||
|  |     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||||
|  |     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||||
|  |     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||||
|  |     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||||
|  |     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||||
|  |     LatticeComplex        expbuf(env().getGrid()); | ||||||
|  |     std::vector<TComplex> corrbuf; | ||||||
|  |     std::vector<Result>   result(n_eye_diag); | ||||||
|  |     unsigned int ndim   = env().getNd(); | ||||||
|  |  | ||||||
|  |     PropagatorField              tmp1(env().getGrid()); | ||||||
|  |     LatticeComplex               tmp2(env().getGrid()); | ||||||
|  |     std::vector<PropagatorField> S_body(ndim, tmp1); | ||||||
|  |     std::vector<PropagatorField> S_loop(ndim, tmp1); | ||||||
|  |     std::vector<LatticeComplex>  E_body(ndim, tmp2); | ||||||
|  |     std::vector<LatticeComplex>  E_loop(ndim, tmp2); | ||||||
|  |  | ||||||
|  |     // Setup for S-type contractions. | ||||||
|  |     for (int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu])); | ||||||
|  |         S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu])); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform S-type contractions.     | ||||||
|  |     SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu])) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S") | ||||||
|  |  | ||||||
|  |     // Recycle sub-expressions for E-type contractions. | ||||||
|  |     for (unsigned int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         E_body[mu] = trace(S_body[mu]); | ||||||
|  |         E_loop[mu] = trace(S_loop[mu]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform E-type contractions. | ||||||
|  |     SUM_MU(expbuf, E_body[mu]*E_loop[mu]) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E") | ||||||
|  |  | ||||||
|  |     write(writer, "HW_Eye", result); | ||||||
|  | } | ||||||
							
								
								
									
										58
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_WeakHamiltonianEye_hpp_ | ||||||
|  | #define Hadrons_WeakHamiltonianEye_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakHamiltonianEye                                 * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | enum | ||||||
|  | { | ||||||
|  |     S_diag = 0, | ||||||
|  |     E_diag = 1, | ||||||
|  |     n_eye_diag = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Saucer and Eye subdiagram contractions. | ||||||
|  | #define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma) | ||||||
|  | #define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma) | ||||||
|  |  | ||||||
|  | MAKE_WEAK_MODULE(WeakHamiltonianEye) | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_WeakHamiltonianEye_hpp_ | ||||||
							
								
								
									
										139
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,139 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MContraction; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Weak Hamiltonian current-current contractions, Non-Eye-type. | ||||||
|  |  *  | ||||||
|  |  * These contractions are generated by the Q1 and Q2 operators in the physical | ||||||
|  |  * basis (see e.g. Fig 3 of arXiv:1507.03094). | ||||||
|  |  *  | ||||||
|  |  * Schematic:      | ||||||
|  |  *            q2             q3          |           q2              q3 | ||||||
|  |  *          /--<--¬       /--<--¬        |        /--<--¬         /--<--¬        | ||||||
|  |  *         /       \     /       \       |       /       \       /       \       | ||||||
|  |  *        /         \   /         \      |      /         \     /         \      | ||||||
|  |  *       /           \ /           \     |     /           \   /           \     | ||||||
|  |  *    i *             * H_W         *  f |  i *             * * H_W         * f  | ||||||
|  |  *      \             *             |    |     \           /   \           / | ||||||
|  |  *       \           / \           /     |      \         /     \         /     | ||||||
|  |  *        \         /   \         /      |       \       /       \       /   | ||||||
|  |  *         \       /     \       /       |        \-->--/         \-->--/       | ||||||
|  |  *          \-->--/       \-->--/        |          q1               q4  | ||||||
|  |  *            q1             q4          | | ||||||
|  |  *                Connected (C)          |                 Wing (W) | ||||||
|  |  * | ||||||
|  |  * C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu]) | ||||||
|  |  * W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu]) | ||||||
|  |  *  | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                  TWeakHamiltonianNonEye implementation                     * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name) | ||||||
|  | : Module<WeakHamiltonianPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TWeakHamiltonianNonEye::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianNonEye::setup(void) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianNonEye::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"  | ||||||
|  |                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||||
|  |                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||||
|  |                  << "'." << std::endl; | ||||||
|  |      | ||||||
|  |     CorrWriter             writer(par().output); | ||||||
|  |     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||||
|  |     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||||
|  |     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||||
|  |     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||||
|  |     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||||
|  |     LatticeComplex        expbuf(env().getGrid()); | ||||||
|  |     std::vector<TComplex> corrbuf; | ||||||
|  |     std::vector<Result>   result(n_noneye_diag);  | ||||||
|  |     unsigned int ndim   = env().getNd(); | ||||||
|  |  | ||||||
|  |     PropagatorField              tmp1(env().getGrid()); | ||||||
|  |     LatticeComplex               tmp2(env().getGrid()); | ||||||
|  |     std::vector<PropagatorField> C_i_side_loop(ndim, tmp1); | ||||||
|  |     std::vector<PropagatorField> C_f_side_loop(ndim, tmp1); | ||||||
|  |     std::vector<LatticeComplex>  W_i_side_loop(ndim, tmp2); | ||||||
|  |     std::vector<LatticeComplex>  W_f_side_loop(ndim, tmp2); | ||||||
|  |  | ||||||
|  |     // Setup for C-type contractions. | ||||||
|  |     for (int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu])); | ||||||
|  |         C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu])); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform C-type contractions.     | ||||||
|  |     SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu])) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C") | ||||||
|  |  | ||||||
|  |     // Recycle sub-expressions for W-type contractions. | ||||||
|  |     for (unsigned int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         W_i_side_loop[mu] = trace(C_i_side_loop[mu]); | ||||||
|  |         W_f_side_loop[mu] = trace(C_f_side_loop[mu]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform W-type contractions. | ||||||
|  |     SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu]) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W") | ||||||
|  |  | ||||||
|  |     write(writer, "HW_NonEye", result); | ||||||
|  | } | ||||||
| @@ -0,0 +1,57 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_WeakHamiltonianNonEye_hpp_ | ||||||
|  | #define Hadrons_WeakHamiltonianNonEye_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakHamiltonianNonEye                              * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | enum | ||||||
|  | { | ||||||
|  |     W_diag = 0, | ||||||
|  |     C_diag = 1, | ||||||
|  |     n_noneye_diag = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Wing and Connected subdiagram contractions | ||||||
|  | #define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) | ||||||
|  |  | ||||||
|  | MAKE_WEAK_MODULE(WeakHamiltonianNonEye) | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_WeakHamiltonianNonEye_hpp_ | ||||||
							
								
								
									
										135
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MContraction; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Weak Hamiltonian + current contractions, disconnected topology for neutral  | ||||||
|  |  * mesons. | ||||||
|  |  *  | ||||||
|  |  * These contractions are generated by operators Q_1,...,10 of the dS=1 Weak | ||||||
|  |  * Hamiltonian in the physical basis and an additional current J (see e.g.  | ||||||
|  |  * Fig 11 of arXiv:1507.03094). | ||||||
|  |  *  | ||||||
|  |  * Schematic: | ||||||
|  |  *                         | ||||||
|  |  *           q2          q4             q3 | ||||||
|  |  *       /--<--¬     /---<--¬       /---<--¬ | ||||||
|  |  *     /         \ /         \     /        \ | ||||||
|  |  *  i *           * H_W      |  J *          * f | ||||||
|  |  *     \         / \         /     \        / | ||||||
|  |  *      \--->---/   \-------/       \------/ | ||||||
|  |  *          q1  | ||||||
|  |  *  | ||||||
|  |  * options | ||||||
|  |  * - q1: input propagator 1 (string) | ||||||
|  |  * - q2: input propagator 2 (string) | ||||||
|  |  * - q3: input propagator 3 (string), assumed to be sequential propagator  | ||||||
|  |  * - q4: input propagator 4 (string), assumed to be a loop | ||||||
|  |  *  | ||||||
|  |  * type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5) | ||||||
|  |  * type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5) | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /******************************************************************************* | ||||||
|  |  *                  TWeakNeutral4ptDisc implementation                         * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name) | ||||||
|  | : Module<WeakHamiltonianPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TWeakNeutral4ptDisc::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakNeutral4ptDisc::setup(void) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakNeutral4ptDisc::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"  | ||||||
|  |                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||||
|  |                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||||
|  |                  << "'." << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter             writer(par().output); | ||||||
|  |     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||||
|  |     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||||
|  |     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||||
|  |     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||||
|  |     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||||
|  |     LatticeComplex        expbuf(env().getGrid()); | ||||||
|  |     std::vector<TComplex> corrbuf; | ||||||
|  |     std::vector<Result>   result(n_neut_disc_diag); | ||||||
|  |     unsigned int ndim   = env().getNd(); | ||||||
|  |  | ||||||
|  |     PropagatorField              tmp(env().getGrid()); | ||||||
|  |     std::vector<PropagatorField> meson(ndim, tmp); | ||||||
|  |     std::vector<PropagatorField> loop(ndim, tmp); | ||||||
|  |     LatticeComplex               curr(env().getGrid()); | ||||||
|  |  | ||||||
|  |     // Setup for type 1 contractions. | ||||||
|  |     for (int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu])); | ||||||
|  |         loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu])); | ||||||
|  |     } | ||||||
|  |     curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5)); | ||||||
|  |  | ||||||
|  |     // Perform type 1 contractions.     | ||||||
|  |     SUM_MU(expbuf, trace(meson[mu]*loop[mu])) | ||||||
|  |     expbuf *= curr; | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1") | ||||||
|  |  | ||||||
|  |     // Perform type 2 contractions. | ||||||
|  |     SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu])) | ||||||
|  |     expbuf *= curr; | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2") | ||||||
|  |  | ||||||
|  |     write(writer, "HW_disc0", result); | ||||||
|  | } | ||||||
							
								
								
									
										59
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_WeakNeutral4ptDisc_hpp_ | ||||||
|  | #define Hadrons_WeakNeutral4ptDisc_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakNeutral4ptDisc                                 * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | enum | ||||||
|  | { | ||||||
|  |     neut_disc_1_diag = 0, | ||||||
|  |     neut_disc_2_diag = 1, | ||||||
|  |     n_neut_disc_diag = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Neutral 4pt disconnected subdiagram contractions. | ||||||
|  | #define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) | ||||||
|  | #define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma) | ||||||
|  | #define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma)) | ||||||
|  |  | ||||||
|  | MAKE_WEAK_MODULE(WeakNeutral4ptDisc) | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_WeakNeutral4ptDisc_hpp_ | ||||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | |||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| void TLoad::execute(void) | void TLoad::execute(void) | ||||||
| { | { | ||||||
|     NerscField  header; |     FieldMetaData  header; | ||||||
|     std::string fileName = par().file + "." |     std::string fileName = par().file + "." | ||||||
|                            + std::to_string(env().getTrajectory()); |                            + std::to_string(env().getTrajectory()); | ||||||
|      |      | ||||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | |||||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); |     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||||
|     NerscIO::readConfiguration(U, header, fileName); |     NerscIO::readConfiguration(U, header, fileName); | ||||||
|     LOG(Message) << "NERSC header:" << std::endl; |     LOG(Message) << "NERSC header:" << std::endl; | ||||||
|     dump_nersc_header(header, LOG(Message)); |     dump_meta_data(header, LOG(Message)); | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										132
									
								
								extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,132 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_NoiseLoop_hpp_ | ||||||
|  | #define Hadrons_NoiseLoop_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   | ||||||
|  |  Noise loop propagator | ||||||
|  |  ----------------------------- | ||||||
|  |  * loop_x = q_x * adj(eta_x) | ||||||
|  |   | ||||||
|  |  * options: | ||||||
|  |  - q = Result of inversion on noise source. | ||||||
|  |  - eta = noise source. | ||||||
|  |  | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         NoiseLoop                                          * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MLoop) | ||||||
|  |  | ||||||
|  | class NoiseLoopPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar, | ||||||
|  |                                     std::string, q, | ||||||
|  |                                     std::string, eta); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | class TNoiseLoop: public Module<NoiseLoopPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     TYPE_ALIASES(FImpl,); | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TNoiseLoop(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TNoiseLoop(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                 TNoiseLoop implementation                                  * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | TNoiseLoop<FImpl>::TNoiseLoop(const std::string name) | ||||||
|  | : Module<NoiseLoopPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TNoiseLoop<FImpl>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q, par().eta}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TNoiseLoop<FImpl>::setup(void) | ||||||
|  | { | ||||||
|  |     env().template registerLattice<PropagatorField>(getName()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TNoiseLoop<FImpl>::execute(void) | ||||||
|  | { | ||||||
|  |     PropagatorField &loop = *env().template createLattice<PropagatorField>(getName()); | ||||||
|  |     PropagatorField &q    = *env().template getObject<PropagatorField>(par().q); | ||||||
|  |     PropagatorField &eta  = *env().template getObject<PropagatorField>(par().eta); | ||||||
|  |     loop = q*adj(eta); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_NoiseLoop_hpp_ | ||||||
| @@ -6,6 +6,7 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
| @@ -149,9 +150,9 @@ void TSeqGamma<FImpl>::execute(void) | |||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||||
|     { |     { | ||||||
|         LatticeCoordinate(coor, mu); |         LatticeCoordinate(coor, mu); | ||||||
|         ph = ph + p[mu]*coor; |         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||||
|     } |     } | ||||||
|     ph = exp(i*ph); |     ph = exp((Real)(2*M_PI)*i*ph); | ||||||
|     LatticeCoordinate(t, Tp); |     LatticeCoordinate(t, Tp); | ||||||
|     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); |     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										147
									
								
								extras/Hadrons/Modules/MSource/Wall.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								extras/Hadrons/Modules/MSource/Wall.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MSource/Wall.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_WallSource_hpp_ | ||||||
|  | #define Hadrons_WallSource_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   | ||||||
|  |  Wall source | ||||||
|  |  ----------------------------- | ||||||
|  |  * src_x = delta(x_3 - tW) * exp(i x.mom) | ||||||
|  |   | ||||||
|  |  * options: | ||||||
|  |  - tW: source timeslice (integer) | ||||||
|  |  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.") | ||||||
|  |   | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         Wall                                               * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MSource) | ||||||
|  |  | ||||||
|  | class WallPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar, | ||||||
|  |                                     unsigned int, tW, | ||||||
|  |                                     std::string, mom); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | class TWall: public Module<WallPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     TYPE_ALIASES(FImpl,); | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TWall(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TWall(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                 TWall implementation                                       * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | TWall<FImpl>::TWall(const std::string name) | ||||||
|  | : Module<WallPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TWall<FImpl>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TWall<FImpl>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TWall<FImpl>::setup(void) | ||||||
|  | { | ||||||
|  |     env().template registerLattice<PropagatorField>(getName()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TWall<FImpl>::execute(void) | ||||||
|  | {     | ||||||
|  |     LOG(Message) << "Generating wall source at t = " << par().tW  | ||||||
|  |                  << " with momentum " << par().mom << std::endl; | ||||||
|  |      | ||||||
|  |     PropagatorField &src = *env().template createLattice<PropagatorField>(getName()); | ||||||
|  |     Lattice<iScalar<vInteger>> t(env().getGrid()); | ||||||
|  |     LatticeComplex             ph(env().getGrid()), coor(env().getGrid()); | ||||||
|  |     std::vector<Real>          p; | ||||||
|  |     Complex                    i(0.0,1.0); | ||||||
|  |      | ||||||
|  |     p  = strToVec<Real>(par().mom); | ||||||
|  |     ph = zero; | ||||||
|  |     for(unsigned int mu = 0; mu < Nd; mu++) | ||||||
|  |     { | ||||||
|  |         LatticeCoordinate(coor, mu); | ||||||
|  |         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||||
|  |     } | ||||||
|  |     ph = exp((Real)(2*M_PI)*i*ph); | ||||||
|  |     LatticeCoordinate(t, Tp); | ||||||
|  |     src = 1.; | ||||||
|  |     src = where((t == par().tW), src*ph, 0.*src); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_WallSource_hpp_ | ||||||
| @@ -173,7 +173,7 @@ void TQuark<FImpl>::execute(void) | |||||||
|                 *env().template getObject<PropagatorField>(getName()); |                 *env().template getObject<PropagatorField>(getName()); | ||||||
|              |              | ||||||
|             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); |             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); | ||||||
|             axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1); |             axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); | ||||||
|             ExtractSlice(tmp, sol, 0, 0); |             ExtractSlice(tmp, sol, 0, 0); | ||||||
|             FermToProp(p4d, tmp, s, c); |             FermToProp(p4d, tmp, s, c); | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -1,4 +1,7 @@ | |||||||
| modules_cc =\ | modules_cc =\ | ||||||
|  |   Modules/MContraction/WeakHamiltonianEye.cc \ | ||||||
|  |   Modules/MContraction/WeakHamiltonianNonEye.cc \ | ||||||
|  |   Modules/MContraction/WeakNeutral4ptDisc.cc \ | ||||||
|   Modules/MGauge/Load.cc \ |   Modules/MGauge/Load.cc \ | ||||||
|   Modules/MGauge/Random.cc \ |   Modules/MGauge/Random.cc \ | ||||||
|   Modules/MGauge/Unit.cc |   Modules/MGauge/Unit.cc | ||||||
| @@ -7,13 +10,21 @@ modules_hpp =\ | |||||||
|   Modules/MAction/DWF.hpp \ |   Modules/MAction/DWF.hpp \ | ||||||
|   Modules/MAction/Wilson.hpp \ |   Modules/MAction/Wilson.hpp \ | ||||||
|   Modules/MContraction/Baryon.hpp \ |   Modules/MContraction/Baryon.hpp \ | ||||||
|  |   Modules/MContraction/DiscLoop.hpp \ | ||||||
|  |   Modules/MContraction/Gamma3pt.hpp \ | ||||||
|   Modules/MContraction/Meson.hpp \ |   Modules/MContraction/Meson.hpp \ | ||||||
|  |   Modules/MContraction/WeakHamiltonian.hpp \ | ||||||
|  |   Modules/MContraction/WeakHamiltonianEye.hpp \ | ||||||
|  |   Modules/MContraction/WeakHamiltonianNonEye.hpp \ | ||||||
|  |   Modules/MContraction/WeakNeutral4ptDisc.hpp \ | ||||||
|   Modules/MGauge/Load.hpp \ |   Modules/MGauge/Load.hpp \ | ||||||
|   Modules/MGauge/Random.hpp \ |   Modules/MGauge/Random.hpp \ | ||||||
|   Modules/MGauge/Unit.hpp \ |   Modules/MGauge/Unit.hpp \ | ||||||
|  |   Modules/MLoop/NoiseLoop.hpp \ | ||||||
|   Modules/MSolver/RBPrecCG.hpp \ |   Modules/MSolver/RBPrecCG.hpp \ | ||||||
|   Modules/MSource/Point.hpp \ |   Modules/MSource/Point.hpp \ | ||||||
|   Modules/MSource/SeqGamma.hpp \ |   Modules/MSource/SeqGamma.hpp \ | ||||||
|  |   Modules/MSource/Wall.hpp \ | ||||||
|   Modules/MSource/Z2.hpp \ |   Modules/MSource/Z2.hpp \ | ||||||
|   Modules/Quark.hpp |   Modules/Quark.hpp | ||||||
|  |  | ||||||
|   | |||||||
| @@ -21,3 +21,16 @@ problem. The test case works with icpc and with clang++, but fails consistently | |||||||
| current variants. | current variants. | ||||||
|  |  | ||||||
| Peter | Peter | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ************ | ||||||
|  |  | ||||||
|  | Second GCC bug reported, see Issue 100. | ||||||
|  |  | ||||||
|  | https://wandbox.org/permlink/tzssJza6R9XnqANw | ||||||
|  | https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652 | ||||||
|  |  | ||||||
|  | Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the | ||||||
|  | CI test suite. The limitations of Travis runtime limits & weak cores are being shown. | ||||||
|  |  | ||||||
|  | Travis uses 5.4.1 for g++-5. | ||||||
|   | |||||||
							
								
								
									
										86
									
								
								grid-config.in
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										86
									
								
								grid-config.in
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,86 @@ | |||||||
|  | #! /bin/sh | ||||||
|  |  | ||||||
|  | prefix=@prefix@ | ||||||
|  | exec_prefix=@exec_prefix@ | ||||||
|  | includedir=@includedir@ | ||||||
|  |  | ||||||
|  | usage() | ||||||
|  | { | ||||||
|  |   cat <<EOF | ||||||
|  | Usage: grid-config [OPTION] | ||||||
|  |  | ||||||
|  | Known values for OPTION are: | ||||||
|  |  | ||||||
|  |   --prefix     show Grid installation prefix | ||||||
|  |   --cxxflags   print pre-processor and compiler flags | ||||||
|  |   --ldflags    print library linking flags | ||||||
|  |   --libs       print library linking information | ||||||
|  |   --summary    print full build summary | ||||||
|  |   --help       display this help and exit | ||||||
|  |   --version    output version information | ||||||
|  |   --git        print git revision | ||||||
|  |  | ||||||
|  | EOF | ||||||
|  |    | ||||||
|  |   exit $1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | if test $# -eq 0; then | ||||||
|  |   usage 1 | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | cflags=false | ||||||
|  | libs=false | ||||||
|  |  | ||||||
|  | while test $# -gt 0; do | ||||||
|  |   case "$1" in | ||||||
|  |     -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; | ||||||
|  |     *) optarg= ;; | ||||||
|  |   esac | ||||||
|  |    | ||||||
|  |   case "$1" in | ||||||
|  |     --prefix) | ||||||
|  |       echo $prefix | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --version) | ||||||
|  |       echo @VERSION@ | ||||||
|  |       exit 0 | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --git) | ||||||
|  |       echo "@GRID_BRANCH@ @GRID_SHA@" | ||||||
|  |       exit 0 | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --help) | ||||||
|  |       usage 0 | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --cxxflags) | ||||||
|  |       echo @GRID_CXXFLAGS@ | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --ldflags) | ||||||
|  |       echo @GRID_LDFLAGS@ | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --libs) | ||||||
|  |       echo @GRID_LIBS@ | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --summary) | ||||||
|  |       echo "" | ||||||
|  |       echo "@GRID_SUMMARY@" | ||||||
|  |       echo "" | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     *) | ||||||
|  |       usage | ||||||
|  |       exit 1 | ||||||
|  |     ;; | ||||||
|  |   esac | ||||||
|  |   shift | ||||||
|  | done | ||||||
|  |  | ||||||
|  | exit 0 | ||||||
							
								
								
									
										37
									
								
								lib/DisableWarnings.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								lib/DisableWarnings.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/DisableWarnings.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef DISABLE_WARNINGS_H | ||||||
|  | #define DISABLE_WARNINGS_H | ||||||
|  |  | ||||||
|  |  //disables and intel compiler specific warning (in json.hpp) | ||||||
|  | #pragma warning disable 488   | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #endif | ||||||
| @@ -41,7 +41,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/GridQCDcore.h> | #include <Grid/GridQCDcore.h> | ||||||
| #include <Grid/qcd/action/Action.h> | #include <Grid/qcd/action/Action.h> | ||||||
|  | #include <Grid/qcd/utils/GaugeFix.h> | ||||||
| #include <Grid/qcd/smearing/Smearing.h> | #include <Grid/qcd/smearing/Smearing.h> | ||||||
|  | #include <Grid/parallelIO/MetaData.h> | ||||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -38,28 +38,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_BASE_H | #ifndef GRID_BASE_H | ||||||
| #define GRID_BASE_H | #define GRID_BASE_H | ||||||
|  |  | ||||||
| /////////////////// | #include <Grid/GridStd.h> | ||||||
| // Std C++ dependencies |  | ||||||
| /////////////////// |  | ||||||
| #include <cassert> |  | ||||||
| #include <complex> |  | ||||||
| #include <vector> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <random> |  | ||||||
| #include <functional> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <signal.h> |  | ||||||
| #include <ctime> |  | ||||||
| #include <sys/time.h> |  | ||||||
| #include <chrono> |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Grid headers |  | ||||||
| /////////////////// |  | ||||||
| #include "Config.h" |  | ||||||
|  |  | ||||||
| #include <Grid/perfmon/Timer.h> | #include <Grid/perfmon/Timer.h> | ||||||
| #include <Grid/perfmon/PerfCount.h> | #include <Grid/perfmon/PerfCount.h> | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								lib/GridStd.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								lib/GridStd.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | |||||||
|  | #ifndef GRID_STD_H | ||||||
|  | #define GRID_STD_H | ||||||
|  |  | ||||||
|  | /////////////////// | ||||||
|  | // Std C++ dependencies | ||||||
|  | /////////////////// | ||||||
|  | #include <cassert> | ||||||
|  | #include <complex> | ||||||
|  | #include <vector> | ||||||
|  | #include <string> | ||||||
|  | #include <iostream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <random> | ||||||
|  | #include <functional> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <signal.h> | ||||||
|  | #include <ctime> | ||||||
|  | #include <sys/time.h> | ||||||
|  | #include <chrono> | ||||||
|  | #include <zlib.h> | ||||||
|  |  | ||||||
|  | /////////////////// | ||||||
|  | // Grid config | ||||||
|  | /////////////////// | ||||||
|  | #include "Config.h" | ||||||
|  |  | ||||||
|  | #endif /* GRID_STD_H */ | ||||||
							
								
								
									
										9
									
								
								lib/Grid_Eigen_Dense.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								lib/Grid_Eigen_Dense.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | #pragma once | ||||||
|  | #if defined __GNUC__ | ||||||
|  | #pragma GCC diagnostic push | ||||||
|  | #pragma GCC diagnostic ignored "-Wdeprecated-declarations" | ||||||
|  | #endif | ||||||
|  | #include <Grid/Eigen/Dense> | ||||||
|  | #if defined __GNUC__ | ||||||
|  | #pragma GCC diagnostic pop | ||||||
|  | #endif | ||||||
| @@ -46,7 +46,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||||
|  |  | ||||||
| // Lanczos support | // Lanczos support | ||||||
| #include <Grid/algorithms/iterative/MatrixUtils.h> | //#include <Grid/algorithms/iterative/MatrixUtils.h> | ||||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | #include <Grid/algorithms/CoarsenedMatrix.h> | ||||||
| #include <Grid/algorithms/FFT.h> | #include <Grid/algorithms/FFT.h> | ||||||
|   | |||||||
| @@ -425,7 +425,7 @@ namespace Grid { | |||||||
| 	A[p]=zero; | 	A[p]=zero; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       GridParallelRNG  RNG(Grid()); RNG.SeedRandomDevice(); |       GridParallelRNG  RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34})); | ||||||
|       Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val); |       Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val); | ||||||
|  |  | ||||||
|       Complex one(1.0); |       Complex one(1.0); | ||||||
|   | |||||||
| @@ -197,8 +197,9 @@ namespace Grid { | |||||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { |     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||||
|  |  | ||||||
|       GridBase *grid=in._grid; |       GridBase *grid=in._grid; | ||||||
| //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; |  | ||||||
| //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; |       // std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | ||||||
|  |       //std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | ||||||
|  |  | ||||||
|       int vol=grid->gSites(); |       int vol=grid->gSites(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,7 +16,7 @@ | |||||||
| #define INCLUDED_ALG_REMEZ_H | #define INCLUDED_ALG_REMEZ_H | ||||||
|  |  | ||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <Config.h> | #include <Grid/GridStd.h> | ||||||
|  |  | ||||||
| #ifdef HAVE_LIBGMP | #ifdef HAVE_LIBGMP | ||||||
| #include "bigfloat.h" | #include "bigfloat.h" | ||||||
|   | |||||||
							
								
								
									
										593
									
								
								lib/algorithms/iterative/BlockConjugateGradient.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										593
									
								
								lib/algorithms/iterative/BlockConjugateGradient.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,593 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H | ||||||
|  | #define GRID_BLOCK_CONJUGATE_GRADIENT_H | ||||||
|  |  | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; | ||||||
|  |  | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Block conjugate gradient. Dimension zero should be the block direction | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | template <class Field> | ||||||
|  | class BlockConjugateGradient : public OperatorFunction<Field> { | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   typedef typename Field::scalar_type scomplex; | ||||||
|  |  | ||||||
|  |   int blockDim ; | ||||||
|  |   int Nblock; | ||||||
|  |  | ||||||
|  |   BlockCGtype CGtype; | ||||||
|  |   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||||
|  |                            // Defaults true. | ||||||
|  |   RealD Tolerance; | ||||||
|  |   Integer MaxIterations; | ||||||
|  |   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||||
|  |    | ||||||
|  |   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||||
|  |     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) | ||||||
|  |   {}; | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Thin QR factorisation (google it) | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||||
|  | 		 Eigen::MatrixXcd &C, | ||||||
|  | 		 Eigen::MatrixXcd &Cinv, | ||||||
|  | 		 Field & Q, | ||||||
|  | 		 const Field & R) | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   //Dimensions | ||||||
|  |   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock | ||||||
|  |   // | ||||||
|  |   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) | ||||||
|  |   // | ||||||
|  |   //   Q  C = R => Q = R C^{-1} | ||||||
|  |   // | ||||||
|  |   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  | ||||||
|  |   // | ||||||
|  |   // Set C = L^{dag}, and then Q^dag Q = ident  | ||||||
|  |   // | ||||||
|  |   // Checks: | ||||||
|  |   // Cdag C = Rdag R ; passes. | ||||||
|  |   // QdagQ  = 1      ; passes | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Cholesky from Eigen | ||||||
|  |   // There exists a ldlt that is documented as more stable | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||||
|  |  | ||||||
|  |   C    = L.adjoint(); | ||||||
|  |   Cinv = C.inverse(); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Q = R C^{-1} | ||||||
|  |   // | ||||||
|  |   // Q_j  = R_i Cinv(i,j)  | ||||||
|  |   // | ||||||
|  |   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // FIXME:: make a sliceMulMatrix to avoid zero vector | ||||||
|  |   sliceMulMatrix(Q,Cinv,R,Orthog); | ||||||
|  | } | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Call one of several implementations | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
|  | { | ||||||
|  |   if ( CGtype == BlockCGrQ ) { | ||||||
|  |     BlockCGrQsolve(Linop,Src,Psi); | ||||||
|  |   } else if (CGtype == BlockCG ) { | ||||||
|  |     BlockCGsolve(Linop,Src,Psi); | ||||||
|  |   } else if (CGtype == CGmultiRHS ) { | ||||||
|  |     CGmultiRHSsolve(Linop,Src,Psi); | ||||||
|  |   } else { | ||||||
|  |     assert(0); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////////////////////// | ||||||
|  | // BlockCGrQ implementation: | ||||||
|  | //-------------------------- | ||||||
|  | // X is guess/Solution | ||||||
|  | // B is RHS | ||||||
|  | // Solve A X_i = B_i    ;        i refers to Nblock index | ||||||
|  | //////////////////////////////////////////////////////////////////////////// | ||||||
|  | void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||||
|  |   Nblock = B._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
|  |  | ||||||
|  |   X.checkerboard = B.checkerboard; | ||||||
|  |   conformable(X, B); | ||||||
|  |  | ||||||
|  |   Field tmp(B); | ||||||
|  |   Field Q(B); | ||||||
|  |   Field D(B); | ||||||
|  |   Field Z(B); | ||||||
|  |   Field AD(B); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   // Initial residual computation & set up | ||||||
|  |   std::vector<RealD> residuals(Nblock); | ||||||
|  |   std::vector<RealD> ssq(Nblock); | ||||||
|  |  | ||||||
|  |   sliceNorm(ssq,B,Orthog); | ||||||
|  |   RealD sssum=0; | ||||||
|  |   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,B,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,X,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   /************************************************************************ | ||||||
|  |    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) | ||||||
|  |    ************************************************************************ | ||||||
|  |    * Dimensions: | ||||||
|  |    * | ||||||
|  |    *   X,B==(Nferm x Nblock) | ||||||
|  |    *   A==(Nferm x Nferm) | ||||||
|  |    *   | ||||||
|  |    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site | ||||||
|  |    *  | ||||||
|  |    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||||
|  |    * for k:  | ||||||
|  |    *   Z  = AD | ||||||
|  |    *   M  = [D^dag Z]^{-1} | ||||||
|  |    *   X  = X + D MC | ||||||
|  |    *   QS = Q - ZM | ||||||
|  |    *   D  = Q + D S^dag | ||||||
|  |    *   C  = S C | ||||||
|  |    */ | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   // Initial block: initial search dir is guess | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; | ||||||
|  |  | ||||||
|  |   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||||
|  |  | ||||||
|  |   Linop.HermOp(X, AD); | ||||||
|  |   tmp = B - AD;   | ||||||
|  |   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); | ||||||
|  |   D=Q; | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   // Timers | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   GridStopWatch sliceInnerTimer; | ||||||
|  |   GridStopWatch sliceMaddTimer; | ||||||
|  |   GridStopWatch QRTimer; | ||||||
|  |   GridStopWatch MatrixTimer; | ||||||
|  |   GridStopWatch SolverTimer; | ||||||
|  |   SolverTimer.Start(); | ||||||
|  |  | ||||||
|  |   int k; | ||||||
|  |   for (k = 1; k <= MaxIterations; k++) { | ||||||
|  |  | ||||||
|  |     //3. Z  = AD | ||||||
|  |     MatrixTimer.Start(); | ||||||
|  |     Linop.HermOp(D, Z);       | ||||||
|  |     MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|  |     //4. M  = [D^dag Z]^{-1} | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     m_M       = m_DZ.inverse(); | ||||||
|  |  | ||||||
|  |     //5. X  = X + D MC | ||||||
|  |     m_tmp     = m_M * m_C; | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     //6. QS = Q - ZM | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |     QRTimer.Start(); | ||||||
|  |     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); | ||||||
|  |     QRTimer.Stop(); | ||||||
|  |      | ||||||
|  |     //7. D  = Q + D S^dag | ||||||
|  |     m_tmp = m_S.adjoint(); | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     //8. C  = S C | ||||||
|  |     m_C = m_S*m_C; | ||||||
|  |      | ||||||
|  |     /********************* | ||||||
|  |      * convergence monitor | ||||||
|  |      ********************* | ||||||
|  |      */ | ||||||
|  |     m_rr = m_C.adjoint() * m_C; | ||||||
|  |  | ||||||
|  |     RealD max_resid=0; | ||||||
|  |     RealD rrsum=0; | ||||||
|  |     RealD rr; | ||||||
|  |  | ||||||
|  |     for(int b=0;b<Nblock;b++) { | ||||||
|  |       rrsum+=real(m_rr(b,b)); | ||||||
|  |       rr = real(m_rr(b,b))/ssq[b]; | ||||||
|  |       if ( rr > max_resid ) max_resid = rr; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||||
|  | 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; | ||||||
|  |  | ||||||
|  |     if ( max_resid < Tolerance*Tolerance ) {  | ||||||
|  |  | ||||||
|  |       SolverTimer.Stop(); | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; | ||||||
|  |  | ||||||
|  |       for(int b=0;b<Nblock;b++){ | ||||||
|  | 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||||
|  | 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||||
|  |       } | ||||||
|  |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|  |       Linop.HermOp(X, AD); | ||||||
|  |       AD = AD-B; | ||||||
|  |       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; | ||||||
|  | 	     | ||||||
|  |       IterationsToComplete = k; | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; | ||||||
|  |  | ||||||
|  |   if (ErrorOnNoConverge) assert(0); | ||||||
|  |   IterationsToComplete = k; | ||||||
|  | } | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||||
|  |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
|  |  | ||||||
|  |   Psi.checkerboard = Src.checkerboard; | ||||||
|  |   conformable(Psi, Src); | ||||||
|  |  | ||||||
|  |   Field P(Src); | ||||||
|  |   Field AP(Src); | ||||||
|  |   Field R(Src); | ||||||
|  |    | ||||||
|  |   Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   // Initial residual computation & set up | ||||||
|  |   std::vector<RealD> residuals(Nblock); | ||||||
|  |   std::vector<RealD> ssq(Nblock); | ||||||
|  |  | ||||||
|  |   sliceNorm(ssq,Src,Orthog); | ||||||
|  |   RealD sssum=0; | ||||||
|  |   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Src,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Psi,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   // Initial search dir is guess | ||||||
|  |   Linop.HermOp(Psi, AP); | ||||||
|  |    | ||||||
|  |  | ||||||
|  |   /************************************************************************ | ||||||
|  |    * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) | ||||||
|  |    ************************************************************************ | ||||||
|  |    * O'Leary : R = B - A X | ||||||
|  |    * O'Leary : P = M R ; preconditioner M = 1 | ||||||
|  |    * O'Leary : alpha = PAP^{-1} RMR | ||||||
|  |    * O'Leary : beta  = RMR^{-1}_old RMR_new | ||||||
|  |    * O'Leary : X=X+Palpha | ||||||
|  |    * O'Leary : R_new=R_old-AP alpha | ||||||
|  |    * O'Leary : P=MR_new+P beta | ||||||
|  |    */ | ||||||
|  |  | ||||||
|  |   R = Src - AP;   | ||||||
|  |   P = R; | ||||||
|  |   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||||
|  |  | ||||||
|  |   GridStopWatch sliceInnerTimer; | ||||||
|  |   GridStopWatch sliceMaddTimer; | ||||||
|  |   GridStopWatch MatrixTimer; | ||||||
|  |   GridStopWatch SolverTimer; | ||||||
|  |   SolverTimer.Start(); | ||||||
|  |  | ||||||
|  |   int k; | ||||||
|  |   for (k = 1; k <= MaxIterations; k++) { | ||||||
|  |  | ||||||
|  |     RealD rrsum=0; | ||||||
|  |     for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b)); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||||
|  | 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; | ||||||
|  |  | ||||||
|  |     MatrixTimer.Start(); | ||||||
|  |     Linop.HermOp(P, AP); | ||||||
|  |     MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Alpha | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductMatrix(m_pAp,P,AP,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     m_pAp_inv = m_pAp.inverse(); | ||||||
|  |     m_alpha   = m_pAp_inv * m_rr ; | ||||||
|  |  | ||||||
|  |     // Psi, R update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi | ||||||
|  |     sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Beta | ||||||
|  |     m_rr_inv = m_rr.inverse(); | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     m_beta = m_rr_inv *m_rr; | ||||||
|  |  | ||||||
|  |     // Search update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(AP,m_beta,P,R,Orthog); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |     P= AP; | ||||||
|  |  | ||||||
|  |     /********************* | ||||||
|  |      * convergence monitor | ||||||
|  |      ********************* | ||||||
|  |      */ | ||||||
|  |     RealD max_resid=0; | ||||||
|  |     RealD rr; | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       rr = real(m_rr(b,b))/ssq[b]; | ||||||
|  |       if ( rr > max_resid ) max_resid = rr; | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     if ( max_resid < Tolerance*Tolerance ) {  | ||||||
|  |  | ||||||
|  |       SolverTimer.Stop(); | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||||
|  |       for(int b=0;b<Nblock;b++){ | ||||||
|  | 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||||
|  | 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||||
|  |       } | ||||||
|  |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|  |       Linop.HermOp(Psi, AP); | ||||||
|  |       AP = AP-Src; | ||||||
|  |       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||||
|  | 	     | ||||||
|  |       IterationsToComplete = k; | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl; | ||||||
|  |  | ||||||
|  |   if (ErrorOnNoConverge) assert(0); | ||||||
|  |   IterationsToComplete = k; | ||||||
|  | } | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||||
|  | // Use this for spread out across nodes | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim | ||||||
|  |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
|  |  | ||||||
|  |   Psi.checkerboard = Src.checkerboard; | ||||||
|  |   conformable(Psi, Src); | ||||||
|  |  | ||||||
|  |   Field P(Src); | ||||||
|  |   Field AP(Src); | ||||||
|  |   Field R(Src); | ||||||
|  |    | ||||||
|  |   std::vector<ComplexD> v_pAp(Nblock); | ||||||
|  |   std::vector<RealD> v_rr (Nblock); | ||||||
|  |   std::vector<RealD> v_rr_inv(Nblock); | ||||||
|  |   std::vector<RealD> v_alpha(Nblock); | ||||||
|  |   std::vector<RealD> v_beta(Nblock); | ||||||
|  |  | ||||||
|  |   // Initial residual computation & set up | ||||||
|  |   std::vector<RealD> residuals(Nblock); | ||||||
|  |   std::vector<RealD> ssq(Nblock); | ||||||
|  |  | ||||||
|  |   sliceNorm(ssq,Src,Orthog); | ||||||
|  |   RealD sssum=0; | ||||||
|  |   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Src,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Psi,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   // Initial search dir is guess | ||||||
|  |   Linop.HermOp(Psi, AP); | ||||||
|  |  | ||||||
|  |   R = Src - AP;   | ||||||
|  |   P = R; | ||||||
|  |   sliceNorm(v_rr,R,Orthog); | ||||||
|  |  | ||||||
|  |   GridStopWatch sliceInnerTimer; | ||||||
|  |   GridStopWatch sliceMaddTimer; | ||||||
|  |   GridStopWatch sliceNormTimer; | ||||||
|  |   GridStopWatch MatrixTimer; | ||||||
|  |   GridStopWatch SolverTimer; | ||||||
|  |  | ||||||
|  |   SolverTimer.Start(); | ||||||
|  |   int k; | ||||||
|  |   for (k = 1; k <= MaxIterations; k++) { | ||||||
|  |  | ||||||
|  |     RealD rrsum=0; | ||||||
|  |     for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||||
|  | 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; | ||||||
|  |  | ||||||
|  |     MatrixTimer.Start(); | ||||||
|  |     Linop.HermOp(P, AP); | ||||||
|  |     MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Alpha | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductVector(v_pAp,P,AP,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       v_alpha[b] = v_rr[b]/real(v_pAp[b]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Psi, R update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddVector(Psi,v_alpha, P,Psi,Orthog);     // add alpha *  P to psi | ||||||
|  |     sliceMaddVector(R  ,v_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Beta | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       v_rr_inv[b] = 1.0/v_rr[b]; | ||||||
|  |     } | ||||||
|  |     sliceNormTimer.Start(); | ||||||
|  |     sliceNorm(v_rr,R,Orthog); | ||||||
|  |     sliceNormTimer.Stop(); | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       v_beta[b] = v_rr_inv[b] *v_rr[b]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Search update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddVector(P,v_beta,P,R,Orthog); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     /********************* | ||||||
|  |      * convergence monitor | ||||||
|  |      ********************* | ||||||
|  |      */ | ||||||
|  |     RealD max_resid=0; | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       RealD rr = v_rr[b]/ssq[b]; | ||||||
|  |       if ( rr > max_resid ) max_resid = rr; | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     if ( max_resid < Tolerance*Tolerance ) {  | ||||||
|  |  | ||||||
|  |       SolverTimer.Stop(); | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; | ||||||
|  |       for(int b=0;b<Nblock;b++){ | ||||||
|  | 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||||
|  |       } | ||||||
|  |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|  |       Linop.HermOp(Psi, AP); | ||||||
|  |       AP = AP-Src; | ||||||
|  |       std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tNorm       " << sliceNormTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |       IterationsToComplete = k; | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl; | ||||||
|  |  | ||||||
|  |   if (ErrorOnNoConverge) assert(0); | ||||||
|  |   IterationsToComplete = k; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
| @@ -78,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     cp = a; |     cp = a; | ||||||
|     ssq = norm2(src); |     ssq = norm2(src); | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; | ||||||
|               << "ConjugateGradient: guess " << guess << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   src " << ssq << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:    mp " << d << std::endl; | ||||||
|               << "ConjugateGradient:   src " << ssq << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   mmp " << b << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||||
|               << "ConjugateGradient:    mp " << d << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:     p " << a << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:   mmp " << b << std::endl; |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:  cp,r " << cp << std::endl; |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:     p " << a << std::endl; |  | ||||||
|  |  | ||||||
|     RealD rsq = Tolerance * Tolerance * ssq; |     RealD rsq = Tolerance * Tolerance * ssq; | ||||||
|  |  | ||||||
| @@ -99,8 +93,7 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq |               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; | ||||||
|               << std::endl; |  | ||||||
|  |  | ||||||
|     GridStopWatch LinalgTimer; |     GridStopWatch LinalgTimer; | ||||||
|     GridStopWatch MatrixTimer; |     GridStopWatch MatrixTimer; | ||||||
| @@ -130,8 +123,11 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|       p = p * b + r; |       p = p * b + r; | ||||||
|  |  | ||||||
|       LinalgTimer.Stop(); |       LinalgTimer.Stop(); | ||||||
|  |  | ||||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k |       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||||
|                 << " residual " << cp << " target " << rsq << std::endl; |                 << " residual " << cp << " target " << rsq << std::endl; | ||||||
|  |       std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl; | ||||||
|  |       std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl; | ||||||
|  |  | ||||||
|       // Stopping condition |       // Stopping condition | ||||||
|       if (cp <= rsq) { |       if (cp <= rsq) { | ||||||
| @@ -139,32 +135,33 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|         Linop.HermOpAndNorm(psi, mmp, d, qq); |         Linop.HermOpAndNorm(psi, mmp, d, qq); | ||||||
|         p = mmp - src; |         p = mmp - src; | ||||||
|  |  | ||||||
|         RealD mmpnorm = sqrt(norm2(mmp)); |  | ||||||
|         RealD psinorm = sqrt(norm2(psi)); |  | ||||||
|         RealD srcnorm = sqrt(norm2(src)); |         RealD srcnorm = sqrt(norm2(src)); | ||||||
|         RealD resnorm = sqrt(norm2(p)); |         RealD resnorm = sqrt(norm2(p)); | ||||||
|         RealD true_residual = resnorm / srcnorm; |         RealD true_residual = resnorm / srcnorm; | ||||||
|  |  | ||||||
|         std::cout << GridLogMessage |         std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; | ||||||
|                   << "ConjugateGradient: Converged on iteration " << k << std::endl; |         std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; | ||||||
|         std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq) | 	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; | ||||||
|                   << " true residual " << true_residual << " target " | 	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; | ||||||
|                   << Tolerance << std::endl; |  | ||||||
|         std::cout << GridLogMessage << "Time elapsed: Iterations " |         std::cout << GridLogMessage << "Time breakdown "<<std::endl; | ||||||
|                   << SolverTimer.Elapsed() << " Matrix  " | 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||||
|                   << MatrixTimer.Elapsed() << " Linalg " | 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||||
|                   << LinalgTimer.Elapsed(); | 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||||
|         std::cout << std::endl; |  | ||||||
|  |  | ||||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); |         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||||
|  |  | ||||||
| 	IterationsToComplete = k;	 | 	IterationsToComplete = k;	 | ||||||
|  |  | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" |     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||||
|               << std::endl; |               << std::endl; | ||||||
|  |  | ||||||
|     if (ErrorOnNoConverge) assert(0); |     if (ErrorOnNoConverge) assert(0); | ||||||
|     IterationsToComplete = k; |     IterationsToComplete = k; | ||||||
|  |  | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,137 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/DenseMatrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_DENSE_MATRIX_H |  | ||||||
| #define GRID_DENSE_MATRIX_H |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     // Matrix untils |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| template<class T> using DenseVector = std::vector<T>; |  | ||||||
| template<class T> using DenseMatrix = DenseVector<DenseVector<T> >; |  | ||||||
|  |  | ||||||
| template<class T> void Size(DenseVector<T> & vec, int &N)  |  | ||||||
| {  |  | ||||||
|   N= vec.size(); |  | ||||||
| } |  | ||||||
| template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)  |  | ||||||
| {  |  | ||||||
|   N= mat.size(); |  | ||||||
|   M= mat[0].size(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)  |  | ||||||
| {  |  | ||||||
|   int M; Size(mat,N,M); |  | ||||||
|   assert(N==M); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class T> void Resize(DenseVector<T > & mat, int N) {  |  | ||||||
|   mat.resize(N); |  | ||||||
| } |  | ||||||
| template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {  |  | ||||||
|   mat.resize(N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     mat[i].resize(M); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| template<class T> void Fill(DenseMatrix<T> & mat, T&val) {  |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     mat[i][j] = val; |  | ||||||
|   }} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Transpose of a matrix **/ |  | ||||||
| template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){ |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   DenseMatrix<T> C; Resize(C,M,N); |  | ||||||
|   for(int i=0;i<M;i++){ |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     C[i][j] = mat[j][i]; |  | ||||||
|   }}  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
| /** Set DenseMatrix to unit matrix **/ |  | ||||||
| template<class T> void Unity(DenseMatrix<T> &A){ |  | ||||||
|   int N;  SizeSquare(A,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     for(int j=0;j<N;j++){ |  | ||||||
|       if ( i==j ) A[i][j] = 1; |  | ||||||
|       else        A[i][j] = 0; |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Add C * I to matrix **/ |  | ||||||
| template<class T> |  | ||||||
| void PlusUnit(DenseMatrix<T> & A,T c){ |  | ||||||
|   int dim;  SizeSquare(A,dim); |  | ||||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return the Hermitian conjugate of matrix **/ |  | ||||||
| template<class T> |  | ||||||
| DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){ |  | ||||||
|  |  | ||||||
|   int dim; SizeSquare(mat,dim); |  | ||||||
|  |  | ||||||
|   DenseMatrix<T> C; Resize(C,dim,dim); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     for(int j=0;j<dim;j++){ |  | ||||||
|       C[i][j] = conj(mat[j][i]); |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
| /**Get a square submatrix**/ |  | ||||||
| template <class T> |  | ||||||
| DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end) |  | ||||||
| { |  | ||||||
|   DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st); |  | ||||||
|  |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|   for(int j = col_st; j<col_end; j++){ |  | ||||||
|     H[i-row_st][j-col_st]=A[i][j]; |  | ||||||
|   }} |  | ||||||
|   return H; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #include "Householder.h" |  | ||||||
| #include "Francis.h" |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| @@ -1,81 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/EigenSort.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_EIGENSORT_H |  | ||||||
| #define GRID_EIGENSORT_H |  | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     // Eigen sorter to begin with |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class SortEigen { |  | ||||||
|  private: |  | ||||||
|    |  | ||||||
| //hacking for testing for now |  | ||||||
|  private: |  | ||||||
|   static bool less_lmd(RealD left,RealD right){ |  | ||||||
|     return left > right; |  | ||||||
|   }   |  | ||||||
|   static bool less_pair(std::pair<RealD,Field const*>& left, |  | ||||||
|                         std::pair<RealD,Field const*>& right){ |  | ||||||
|     return left.first > (right.first); |  | ||||||
|   }   |  | ||||||
|    |  | ||||||
|    |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|   void push(DenseVector<RealD>& lmd, |  | ||||||
|             DenseVector<Field>& evec,int N) { |  | ||||||
|     DenseVector<Field> cpy(lmd.size(),evec[0]._grid); |  | ||||||
|     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; |  | ||||||
|      |  | ||||||
|     DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());     |  | ||||||
|     for(int i=0;i<lmd.size();++i) |  | ||||||
|       emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); |  | ||||||
|  |  | ||||||
|     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); |  | ||||||
|  |  | ||||||
|     typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); |  | ||||||
|     for(int i=0;i<N;++i){ |  | ||||||
|       lmd[i]=it->first; |  | ||||||
|       evec[i]=*(it->second); |  | ||||||
|       ++it; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   void push(DenseVector<RealD>& lmd,int N) { |  | ||||||
|     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); |  | ||||||
|   } |  | ||||||
|   bool saturated(RealD lmd, RealD thrs) { |  | ||||||
|     return fabs(lmd) > fabs(thrs); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -1,525 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/Francis.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef FRANCIS_H |  | ||||||
| #define FRANCIS_H |  | ||||||
|  |  | ||||||
| #include <cstdlib> |  | ||||||
| #include <string> |  | ||||||
| #include <cmath> |  | ||||||
| #include <iostream> |  | ||||||
| #include <sstream> |  | ||||||
| #include <stdexcept> |  | ||||||
| #include <fstream> |  | ||||||
| #include <complex> |  | ||||||
| #include <algorithm> |  | ||||||
|  |  | ||||||
| //#include <timer.h> |  | ||||||
| //#include <lapacke.h> |  | ||||||
| //#include <Eigen/Dense> |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
| template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); |  | ||||||
| template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|   Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. |  | ||||||
| H = |  | ||||||
|       x  x  x  x  x  x  x  x  x |  | ||||||
|       x  x  x  x  x  x  x  x  x |  | ||||||
|       0  x  x  x  x  x  x  x  x |  | ||||||
|       0  0  x  x  x  x  x  x  x |  | ||||||
|       0  0  0  x  x  x  x  x  x |  | ||||||
|       0  0  0  0  x  x  x  x  x |  | ||||||
|       0  0  0  0  0  x  x  x  x |  | ||||||
|       0  0  0  0  0  0  x  x  x |  | ||||||
|       0  0  0  0  0  0  0  x  x |  | ||||||
| Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. |  | ||||||
| **/ |  | ||||||
| template <class T> |  | ||||||
| int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) |  | ||||||
| { |  | ||||||
|   DenseMatrix<T> H = Hin;  |  | ||||||
|  |  | ||||||
|   int N ; SizeSquare(H,N); |  | ||||||
|   int M = N; |  | ||||||
|  |  | ||||||
|   Fill(evals,0); |  | ||||||
|   Fill(evecs,0); |  | ||||||
|  |  | ||||||
|   T s,t,x=0,y=0,z=0; |  | ||||||
|   T u,d; |  | ||||||
|   T apd,amd,bc; |  | ||||||
|   DenseVector<T> p(N,0); |  | ||||||
|   T nrm = Norm(H);    ///DenseMatrix Norm |  | ||||||
|   int n, m; |  | ||||||
|   int e = 0; |  | ||||||
|   int it = 0; |  | ||||||
|   int tot_it = 0; |  | ||||||
|   int l = 0; |  | ||||||
|   int r = 0; |  | ||||||
|   DenseMatrix<T> P; Resize(P,N,N); Unity(P); |  | ||||||
|   DenseVector<int> trows(N,0); |  | ||||||
|  |  | ||||||
|   /// Check if the matrix is really hessenberg, if not abort |  | ||||||
|   RealD sth = 0; |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     for(int i=j+2;i<N;i++){ |  | ||||||
|       sth = abs(H[i][j]); |  | ||||||
|       if(sth > small){ |  | ||||||
| 	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; |  | ||||||
| 	exit(1); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   do{ |  | ||||||
|     std::cout << "Francis QR Step N = " << N << std::endl; |  | ||||||
|     /** Check for convergence |  | ||||||
|       x  x  x  x  x |  | ||||||
|       0  x  x  x  x |  | ||||||
|       0  0  x  x  x |  | ||||||
|       0  0  x  x  x |  | ||||||
|       0  0  0  0  x |  | ||||||
|       for this matrix l = 4 |  | ||||||
|      **/ |  | ||||||
|     do{ |  | ||||||
|       l = Chop_subdiag(H,nrm,e,small); |  | ||||||
|       r = 0;    ///May have converged on more than one eval |  | ||||||
|       ///Single eval |  | ||||||
|       if(l == N-1){ |  | ||||||
|         evals[e] = H[l][l]; |  | ||||||
|         N--; e++; r++; it = 0; |  | ||||||
|       } |  | ||||||
|       ///RealD eval |  | ||||||
|       if(l == N-2){ |  | ||||||
|         trows[l+1] = 1;    ///Needed for UTSolve |  | ||||||
|         apd = H[l][l] + H[l+1][l+1]; |  | ||||||
|         amd = H[l][l] - H[l+1][l+1]; |  | ||||||
|         bc =  (T)4.0*H[l+1][l]*H[l][l+1]; |  | ||||||
|         evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) ); |  | ||||||
|         evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); |  | ||||||
|         N-=2; e+=2; r++; it = 0; |  | ||||||
|       } |  | ||||||
|     } while(r>0); |  | ||||||
|  |  | ||||||
|     if(N ==0) break; |  | ||||||
|  |  | ||||||
|     DenseVector<T > ck; Resize(ck,3); |  | ||||||
|     DenseVector<T> v;   Resize(v,3); |  | ||||||
|  |  | ||||||
|     for(int m = N-3; m >= l; m--){ |  | ||||||
|       ///Starting vector essentially random shift. |  | ||||||
|       if(it%10 == 0 && N >= 3 && it > 0){ |  | ||||||
|         s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); |  | ||||||
|         t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); |  | ||||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; |  | ||||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); |  | ||||||
|         z = H[m+1][m]*H[m+2][m+1]; |  | ||||||
|       } |  | ||||||
|       ///Starting vector implicit Q theorem |  | ||||||
|       else{ |  | ||||||
|         s = (H[N-2][N-2] + H[N-1][N-1]); |  | ||||||
|         t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); |  | ||||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; |  | ||||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); |  | ||||||
|         z = H[m+1][m]*H[m+2][m+1]; |  | ||||||
|       } |  | ||||||
|       ck[0] = x; ck[1] = y; ck[2] = z; |  | ||||||
|  |  | ||||||
|       if(m == l) break; |  | ||||||
|  |  | ||||||
|       /** Some stupid thing from numerical recipies, seems to work**/ |  | ||||||
|       // PAB.. for heaven's sake quote page, purpose, evidence it works. |  | ||||||
|       //       what sort of comment is that!?!?!? |  | ||||||
|       u=abs(H[m][m-1])*(abs(y)+abs(z)); |  | ||||||
|       d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); |  | ||||||
|       if ((T)abs(u+d) == (T)abs(d) ){ |  | ||||||
| 	l = m; break; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       //if (u < small){l = m; break;} |  | ||||||
|     } |  | ||||||
|     if(it > 100000){ |  | ||||||
|      std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; |  | ||||||
|      std::cout << "got " << e << " evals " << l << " " << N << std::endl; |  | ||||||
|       exit(1); |  | ||||||
|     } |  | ||||||
|     normalize(ck);    ///Normalization cancels in PHP anyway |  | ||||||
|     T beta; |  | ||||||
|     Householder_vector<T >(ck, 0, 2, v, beta); |  | ||||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,0); |  | ||||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,1); |  | ||||||
|     ///Accumulate eigenvector |  | ||||||
|     Householder_mult<T >(P,v,beta,0,l,l+2,1); |  | ||||||
|     int sw = 0;      ///Are we on the last row? |  | ||||||
|     for(int k=l;k<N-2;k++){ |  | ||||||
|       x = H[k+1][k]; |  | ||||||
|       y = H[k+2][k]; |  | ||||||
|       z = (T)0.0; |  | ||||||
|       if(k+3 <= N-1){ |  | ||||||
| 	z = H[k+3][k]; |  | ||||||
|       } else{ |  | ||||||
| 	sw = 1;  |  | ||||||
| 	v[2] = (T)0.0; |  | ||||||
|       } |  | ||||||
|       ck[0] = x; ck[1] = y; ck[2] = z; |  | ||||||
|       normalize(ck); |  | ||||||
|       Householder_vector<T >(ck, 0, 2-sw, v, beta); |  | ||||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0); |  | ||||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1); |  | ||||||
|       ///Accumulate eigenvector |  | ||||||
|       Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1); |  | ||||||
|     } |  | ||||||
|     it++; |  | ||||||
|     tot_it++; |  | ||||||
|   }while(N > 1); |  | ||||||
|   N = evals.size(); |  | ||||||
|   ///Annoying - UT solves in reverse order; |  | ||||||
|   DenseVector<T> tmp; Resize(tmp,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     tmp[i] = evals[N-i-1]; |  | ||||||
|   }  |  | ||||||
|   evals = tmp; |  | ||||||
|   UTeigenvectors(H, trows, evals, evecs); |  | ||||||
|   for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);} |  | ||||||
|   return tot_it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) |  | ||||||
| { |  | ||||||
|   /** |  | ||||||
|   Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. |  | ||||||
|   H = |  | ||||||
|   x  x  0  0  0  0 |  | ||||||
|   x  x  x  0  0  0 |  | ||||||
|   0  x  x  x  0  0 |  | ||||||
|   0  0  x  x  x  0 |  | ||||||
|   0  0  0  x  x  x |  | ||||||
|   0  0  0  0  x  x |  | ||||||
|   Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/ |  | ||||||
|   return my_Wilkinson(Hin, evals, evecs, small, small); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol) |  | ||||||
| { |  | ||||||
|   int N; SizeSquare(Hin,N); |  | ||||||
|   int M = N; |  | ||||||
|  |  | ||||||
|   ///I don't want to modify the input but matricies must be passed by reference |  | ||||||
|   //Scale a matrix by its "norm" |  | ||||||
|   //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm); |  | ||||||
|   DenseMatrix<T> H;  H = Hin; |  | ||||||
|    |  | ||||||
|   RealD Hnorm = abs(Norm(Hin)); |  | ||||||
|   H = H * (1.0 / Hnorm); |  | ||||||
|  |  | ||||||
|   // TODO use openmp and memset |  | ||||||
|   Fill(evals,0); |  | ||||||
|   Fill(evecs,0); |  | ||||||
|  |  | ||||||
|   T s, t, x = 0, y = 0, z = 0; |  | ||||||
|   T u, d; |  | ||||||
|   T apd, amd, bc; |  | ||||||
|   DenseVector<T> p; Resize(p,N); Fill(p,0); |  | ||||||
|  |  | ||||||
|   T nrm = Norm(H);    ///DenseMatrix Norm |  | ||||||
|   int n, m; |  | ||||||
|   int e = 0; |  | ||||||
|   int it = 0; |  | ||||||
|   int tot_it = 0; |  | ||||||
|   int l = 0; |  | ||||||
|   int r = 0; |  | ||||||
|   DenseMatrix<T> P; Resize(P,N,N); |  | ||||||
|   Unity(P); |  | ||||||
|   DenseVector<int> trows(N, 0); |  | ||||||
|   /// Check if the matrix is really symm tridiag |  | ||||||
|   RealD sth = 0; |  | ||||||
|   for(int j = 0; j < N; ++j) |  | ||||||
|   { |  | ||||||
|     for(int i = j + 2; i < N; ++i) |  | ||||||
|     { |  | ||||||
|       if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) |  | ||||||
|       { |  | ||||||
| 	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; |  | ||||||
| 	std::cout << "Warning tridiagonalize and call again" << std::endl; |  | ||||||
|         // exit(1); // see what is going on |  | ||||||
|         //return; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   do{ |  | ||||||
|     do{ |  | ||||||
|       //Jasper |  | ||||||
|       //Check if the subdiagonal term is small enough (<small) |  | ||||||
|       //if true then it is converged. |  | ||||||
|       //check start from H.dim - e - 1 |  | ||||||
|       //How to deal with more than 2 are converged? |  | ||||||
|       //What if Chop_symm_subdiag return something int the middle? |  | ||||||
|       //-------------- |  | ||||||
|       l = Chop_symm_subdiag(H,nrm, e, small); |  | ||||||
|       r = 0;    ///May have converged on more than one eval |  | ||||||
|       //Jasper |  | ||||||
|       //In this case |  | ||||||
|       // x  x  0  0  0  0 |  | ||||||
|       // x  x  x  0  0  0 |  | ||||||
|       // 0  x  x  x  0  0 |  | ||||||
|       // 0  0  x  x  x  0 |  | ||||||
|       // 0  0  0  x  x  0 |  | ||||||
|       // 0  0  0  0  0  x  <- l |  | ||||||
|       //-------------- |  | ||||||
|       ///Single eval |  | ||||||
|       if(l == N - 1) |  | ||||||
|       { |  | ||||||
|         evals[e] = H[l][l]; |  | ||||||
|         N--; |  | ||||||
|         e++; |  | ||||||
|         r++; |  | ||||||
|         it = 0; |  | ||||||
|       } |  | ||||||
|       //Jasper |  | ||||||
|       // x  x  0  0  0  0 |  | ||||||
|       // x  x  x  0  0  0 |  | ||||||
|       // 0  x  x  x  0  0 |  | ||||||
|       // 0  0  x  x  0  0 |  | ||||||
|       // 0  0  0  0  x  x  <- l |  | ||||||
|       // 0  0  0  0  x  x |  | ||||||
|       //-------------- |  | ||||||
|       ///RealD eval |  | ||||||
|       if(l == N - 2) |  | ||||||
|       { |  | ||||||
|         trows[l + 1] = 1;    ///Needed for UTSolve |  | ||||||
|         apd = H[l][l] + H[l + 1][ l + 1]; |  | ||||||
|         amd = H[l][l] - H[l + 1][l + 1]; |  | ||||||
|         bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1]; |  | ||||||
|         evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc)); |  | ||||||
|         evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc)); |  | ||||||
|         N -= 2; |  | ||||||
|         e += 2; |  | ||||||
|         r++; |  | ||||||
|         it = 0; |  | ||||||
|       } |  | ||||||
|     }while(r > 0); |  | ||||||
|     //Jasper |  | ||||||
|     //Already converged |  | ||||||
|     //-------------- |  | ||||||
|     if(N == 0) break; |  | ||||||
|  |  | ||||||
|     DenseVector<T> ck,v; Resize(ck,2); Resize(v,2); |  | ||||||
|  |  | ||||||
|     for(int m = N - 3; m >= l; m--) |  | ||||||
|     { |  | ||||||
|       ///Starting vector essentially random shift. |  | ||||||
|       if(it%10 == 0 && N >= 3 && it > 0) |  | ||||||
|       { |  | ||||||
|         t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); |  | ||||||
|         x = H[m][m] - t; |  | ||||||
|         z = H[m + 1][m]; |  | ||||||
|       } else { |  | ||||||
|       ///Starting vector implicit Q theorem |  | ||||||
|         d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; |  | ||||||
|         t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]  |  | ||||||
| 	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); |  | ||||||
|         x = H[m][m] - t; |  | ||||||
|         z = H[m + 1][m]; |  | ||||||
|       } |  | ||||||
|       //Jasper |  | ||||||
|       //why it is here???? |  | ||||||
|       //----------------------- |  | ||||||
|       if(m == l) |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|       u = abs(H[m][m - 1]) * (abs(y) + abs(z)); |  | ||||||
|       d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); |  | ||||||
|       if ((T)abs(u + d) == (T)abs(d)) |  | ||||||
|       { |  | ||||||
|         l = m; |  | ||||||
|         break; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     //Jasper |  | ||||||
|     if(it > 1000000) |  | ||||||
|     { |  | ||||||
|       std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; |  | ||||||
|       std::cout << "got " << e << " evals " << l << " " << N << std::endl; |  | ||||||
|       exit(1); |  | ||||||
|     } |  | ||||||
|     // |  | ||||||
|     T s, c; |  | ||||||
|     Givens_calc<T>(x, z, c, s); |  | ||||||
|     Givens_mult<T>(H, l, l + 1, c, -s, 0); |  | ||||||
|     Givens_mult<T>(H, l, l + 1, c,  s, 1); |  | ||||||
|     Givens_mult<T>(P, l, l + 1, c,  s, 1); |  | ||||||
|     // |  | ||||||
|     for(int k = l; k < N - 2; ++k) |  | ||||||
|     { |  | ||||||
|       x = H.A[k + 1][k]; |  | ||||||
|       z = H.A[k + 2][k]; |  | ||||||
|       Givens_calc<T>(x, z, c, s); |  | ||||||
|       Givens_mult<T>(H, k + 1, k + 2, c, -s, 0); |  | ||||||
|       Givens_mult<T>(H, k + 1, k + 2, c,  s, 1); |  | ||||||
|       Givens_mult<T>(P, k + 1, k + 2, c,  s, 1); |  | ||||||
|     } |  | ||||||
|     it++; |  | ||||||
|     tot_it++; |  | ||||||
|   }while(N > 1); |  | ||||||
|  |  | ||||||
|   N = evals.size(); |  | ||||||
|   ///Annoying - UT solves in reverse order; |  | ||||||
|   DenseVector<T> tmp(N); |  | ||||||
|   for(int i = 0; i < N; ++i) |  | ||||||
|     tmp[i] = evals[N-i-1]; |  | ||||||
|   evals = tmp; |  | ||||||
|   // |  | ||||||
|   UTeigenvectors(H, trows, evals, evecs); |  | ||||||
|   //UTSymmEigenvectors(H, trows, evals, evecs); |  | ||||||
|   for(int i = 0; i < evals.size(); ++i) |  | ||||||
|   { |  | ||||||
|     evecs[i] = P * evecs[i]; |  | ||||||
|     normalize(evecs[i]); |  | ||||||
|     evals[i] = evals[i] * Hnorm; |  | ||||||
|   } |  | ||||||
|   // // FIXME this is to test |  | ||||||
|   // Hin.write("evecs3", evecs); |  | ||||||
|   // Hin.write("evals3", evals); |  | ||||||
|   // // check rsd |  | ||||||
|   // for(int i = 0; i < M; i++) { |  | ||||||
|   //   vector<T> Aevec = Hin * evecs[i]; |  | ||||||
|   //   RealD norm2(0.); |  | ||||||
|   //   for(int j = 0; j < M; j++) { |  | ||||||
|   //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); |  | ||||||
|   //   } |  | ||||||
|   // } |  | ||||||
|   return tot_it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|   turn a matrix A = |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   into |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   0  x  x  x  x |  | ||||||
|   0  0  x  x  x |  | ||||||
|   0  0  0  x  x |  | ||||||
|   with householder rotations |  | ||||||
|   Slow. |  | ||||||
|   */ |  | ||||||
|   int N ; SizeSquare(A,N); |  | ||||||
|   DenseVector<T > p; Resize(p,N); Fill(p,0); |  | ||||||
|  |  | ||||||
|   for(int k=start;k<N-2;k++){ |  | ||||||
|     //cerr << "hess" << k << std::endl; |  | ||||||
|     DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); |  | ||||||
|     for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column |  | ||||||
|     normalize(ck);    ///Normalization cancels in PHP anyway |  | ||||||
|     T beta; |  | ||||||
|     Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector |  | ||||||
|     Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA |  | ||||||
|     Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H |  | ||||||
|     ///Accumulate eigenvector |  | ||||||
|     Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H |  | ||||||
|   } |  | ||||||
|   /*for(int l=0;l<N-2;l++){ |  | ||||||
|     for(int k=l+2;k<N;k++){ |  | ||||||
|     A(0,k,l); |  | ||||||
|     } |  | ||||||
|     }*/ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ |  | ||||||
| ///Tridiagonalize a matrix |  | ||||||
|   int N; SizeSquare(A,N); |  | ||||||
|   Hess(A,Q,start); |  | ||||||
|   /*for(int l=0;l<N-2;l++){ |  | ||||||
|     for(int k=l+2;k<N;k++){ |  | ||||||
|     A(0,l,k); |  | ||||||
|     } |  | ||||||
|     }*/ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| void ForceTridiagonal(DenseMatrix<T> &A){ |  | ||||||
| ///Tridiagonalize a matrix |  | ||||||
|   int N ; SizeSquare(A,N); |  | ||||||
|   for(int l=0;l<N-2;l++){ |  | ||||||
|     for(int k=l+2;k<N;k++){ |  | ||||||
|       A[l][k]=0; |  | ||||||
|       A[k][l]=0; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
|   ///Solve a symmetric eigensystem, not necessarily in tridiagonal form |  | ||||||
|   int N; SizeSquare(Ain,N); |  | ||||||
|   DenseMatrix<T > A; A = Ain; |  | ||||||
|   DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q); |  | ||||||
|   Tri(A,Q,0); |  | ||||||
|   int it = my_Wilkinson<T>(A, evals, evecs, small); |  | ||||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} |  | ||||||
|   return it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
|   return my_Wilkinson(Ain, evals, evecs, small); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
|   return my_SymmEigensystem(Ain, evals, evecs, small); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
| ///Solve a general eigensystem, not necessarily in tridiagonal form |  | ||||||
|   int N = Ain.dim; |  | ||||||
|   DenseMatrix<T > A(N); A = Ain; |  | ||||||
|   DenseMatrix<T > Q(N);Q.Unity(); |  | ||||||
|   Hess(A,Q,0); |  | ||||||
|   int it = QReigensystem<T>(A, evals, evecs, small); |  | ||||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} |  | ||||||
|   return it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -1,242 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/Householder.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef HOUSEHOLDER_H |  | ||||||
| #define HOUSEHOLDER_H |  | ||||||
|  |  | ||||||
| #define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; |  | ||||||
| #define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; |  | ||||||
| #define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; |  | ||||||
|  |  | ||||||
| #include <cstdlib> |  | ||||||
| #include <string> |  | ||||||
| #include <cmath> |  | ||||||
| #include <iostream> |  | ||||||
| #include <sstream> |  | ||||||
| #include <stdexcept> |  | ||||||
| #include <fstream> |  | ||||||
| #include <complex> |  | ||||||
| #include <algorithm> |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
| /** Comparison function for finding the max element in a vector **/ |  | ||||||
| template <class T> bool cf(T i, T j) {  |  | ||||||
|   return abs(i) < abs(j);  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /**  |  | ||||||
| 	Calculate a real Givens angle  |  | ||||||
|  **/ |  | ||||||
| template <class T> inline void Givens_calc(T y, T z, T &c, T &s){ |  | ||||||
|  |  | ||||||
|   RealD mz = (RealD)abs(z); |  | ||||||
|    |  | ||||||
|   if(mz==0.0){ |  | ||||||
|     c = 1; s = 0; |  | ||||||
|   } |  | ||||||
|   if(mz >= (RealD)abs(y)){ |  | ||||||
|     T t = -y/z; |  | ||||||
|     s = (T)1.0 / sqrt ((T)1.0 + t * t); |  | ||||||
|     c = s * t; |  | ||||||
|   } else { |  | ||||||
|     T t = -z/y; |  | ||||||
|     c = (T)1.0 / sqrt ((T)1.0 + t * t); |  | ||||||
|     s = c * t; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir) |  | ||||||
| { |  | ||||||
|   int q ; SizeSquare(A,q); |  | ||||||
|  |  | ||||||
|   if(dir == 0){ |  | ||||||
|     for(int j=0;j<q;j++){ |  | ||||||
|       T nu = A[i][j]; |  | ||||||
|       T w  = A[k][j]; |  | ||||||
|       A[i][j] = (c*nu + s*w); |  | ||||||
|       A[k][j] = (-s*nu + c*w); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if(dir == 1){ |  | ||||||
|     for(int j=0;j<q;j++){ |  | ||||||
|       T nu = A[j][i]; |  | ||||||
|       T w  = A[j][k]; |  | ||||||
|       A[j][i] = (c*nu - s*w); |  | ||||||
|       A[j][k] = (s*nu + c*w); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	from input = x; |  | ||||||
| 	Compute the complex Householder vector, v, such that |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
|  |  | ||||||
| 	P | x |    | x | k = 0 |  | ||||||
| 	| x |    | 0 |  |  | ||||||
| 	| x | =  | 0 | |  | ||||||
| 	| x |    | 0 | j = 3 |  | ||||||
| 	| x |	   | x | |  | ||||||
|  |  | ||||||
| 	These are the "Unreduced" Householder vectors. |  | ||||||
|  |  | ||||||
|  **/ |  | ||||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta) |  | ||||||
| { |  | ||||||
|   int N ; Size(input,N); |  | ||||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> ); |  | ||||||
|  |  | ||||||
|   if(abs(m) > 0.0){ |  | ||||||
|     T alpha = 0; |  | ||||||
|  |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = input[i]/m; |  | ||||||
|       alpha = alpha + v[i]*conj(v[i]); |  | ||||||
|     } |  | ||||||
|     alpha = sqrt(alpha); |  | ||||||
|     beta = (T)1.0/(alpha*(alpha + abs(v[k]) )); |  | ||||||
|  |  | ||||||
|     if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha; |  | ||||||
|     else                 v[k] = -alpha; |  | ||||||
|   } else{ |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = 0.0; |  | ||||||
|     }  |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	from input = x; |  | ||||||
| 	Compute the complex Householder vector, v, such that |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
|  |  | ||||||
| 	Px = alpha*e_dir |  | ||||||
|  |  | ||||||
| 	These are the "Unreduced" Householder vectors. |  | ||||||
|  |  | ||||||
|  **/ |  | ||||||
|  |  | ||||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta) |  | ||||||
| { |  | ||||||
|   int N = input.size(); |  | ||||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); |  | ||||||
|    |  | ||||||
|   if(abs(m) > 0.0){ |  | ||||||
|     T alpha = 0; |  | ||||||
|  |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = input[i]/m; |  | ||||||
|       alpha = alpha + v[i]*conj(v[i]); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     alpha = sqrt(alpha); |  | ||||||
|     beta = 1.0/(alpha*(alpha + abs(v[dir]) )); |  | ||||||
| 	 |  | ||||||
|     if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; |  | ||||||
|     else                  v[dir] = -alpha; |  | ||||||
|   }else{ |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = 0.0; |  | ||||||
|     }  |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	Compute the product PA if trans = 0 |  | ||||||
| 	AP if trans = 1 |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
| 	start at element l of matrix A |  | ||||||
| 	v is of length j - k + 1 of v are nonzero |  | ||||||
|  **/ |  | ||||||
|  |  | ||||||
| template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans) |  | ||||||
| { |  | ||||||
|   int N ; SizeSquare(A,N); |  | ||||||
|  |  | ||||||
|   if(abs(beta) > 0.0){ |  | ||||||
|     for(int p=l; p<N; p++){ |  | ||||||
|       T s = 0; |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p]; |  | ||||||
| 	s *= beta; |  | ||||||
| 	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);} |  | ||||||
|       } else { |  | ||||||
| 	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];} |  | ||||||
| 	s *= beta; |  | ||||||
| 	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);} |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	Compute the product PA if trans = 0 |  | ||||||
| 	AP if trans = 1 |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
| 	start at element l of matrix A |  | ||||||
| 	v is of length j - k + 1 of v are nonzero |  | ||||||
| 	A is tridiagonal |  | ||||||
|  **/ |  | ||||||
| template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans) |  | ||||||
| { |  | ||||||
|   if(abs(beta) > 0.0){ |  | ||||||
|  |  | ||||||
|     int N ; SizeSquare(A,N); |  | ||||||
|  |  | ||||||
|     DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);  |  | ||||||
|  |  | ||||||
|     T s; |  | ||||||
|     for(int p=l; p<M; p++){ |  | ||||||
|       s = 0; |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p]; |  | ||||||
|       }else{ |  | ||||||
| 	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i]; |  | ||||||
|       } |  | ||||||
|       s = beta*s; |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k]; |  | ||||||
|       }else{ |  | ||||||
| 	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     for(int p=l; p<M; p++){ |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p]; |  | ||||||
|       }else{ |  | ||||||
| 	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i]; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,453 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/Matrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef MATRIX_H |  | ||||||
| #define MATRIX_H |  | ||||||
|  |  | ||||||
| #include <cstdlib> |  | ||||||
| #include <string> |  | ||||||
| #include <cmath> |  | ||||||
| #include <vector> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <complex> |  | ||||||
| #include <typeinfo> |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /** Sign function **/ |  | ||||||
| template <class T> T sign(T p){return ( p/abs(p) );} |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| ///////////////////// Hijack STL containers for our wicked means ///////////////////////////////////////// |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| template<class T> using Vector = Vector<T>; |  | ||||||
| template<class T> using Matrix = Vector<Vector<T> >; |  | ||||||
|  |  | ||||||
| template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); } |  | ||||||
|  |  | ||||||
| template<class T> void Resize(Matrix<T > & mat, int N, int M) {  |  | ||||||
|   mat.resize(N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     mat[i].resize(M); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| template<class T> void Size(Vector<T> & vec, int &N)  |  | ||||||
| {  |  | ||||||
|   N= vec.size(); |  | ||||||
| } |  | ||||||
| template<class T> void Size(Matrix<T> & mat, int &N,int &M)  |  | ||||||
| {  |  | ||||||
|   N= mat.size(); |  | ||||||
|   M= mat[0].size(); |  | ||||||
| } |  | ||||||
| template<class T> void SizeSquare(Matrix<T> & mat, int &N)  |  | ||||||
| {  |  | ||||||
|   int M; Size(mat,N,M); |  | ||||||
|   assert(N==M); |  | ||||||
| } |  | ||||||
| template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)  |  | ||||||
| {  |  | ||||||
|   int N2,M2; |  | ||||||
|   Size(mat1,N1,M1); |  | ||||||
|   Size(mat2,N2,M2); |  | ||||||
|   assert(N1==N2); |  | ||||||
|   assert(M1==M2); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //***************************************** |  | ||||||
| //*	(Complex) Vector operations	* |  | ||||||
| //***************************************** |  | ||||||
|  |  | ||||||
| /**Conj of a Vector **/ |  | ||||||
| template <class T> Vector<T> conj(Vector<T> p){ |  | ||||||
| 	Vector<T> q(p.size()); |  | ||||||
| 	for(int i=0;i<p.size();i++){q[i] = conj(p[i]);} |  | ||||||
| 	return q; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Norm of a Vector**/ |  | ||||||
| template <class T> T norm(Vector<T> p){ |  | ||||||
| 	T sum = 0; |  | ||||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} |  | ||||||
| 	return abs(sqrt(sum)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Norm squared of a Vector **/ |  | ||||||
| template <class T> T norm2(Vector<T> p){ |  | ||||||
| 	T sum = 0; |  | ||||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} |  | ||||||
| 	return abs((sum)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Sum elements of a Vector **/ |  | ||||||
| template <class T> T trace(Vector<T> p){ |  | ||||||
| 	T sum = 0; |  | ||||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i];} |  | ||||||
| 	return sum; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Fill a Vector with constant c **/ |  | ||||||
| template <class T> void Fill(Vector<T> &p, T c){ |  | ||||||
| 	for(int i=0;i<p.size();i++){p[i] = c;} |  | ||||||
| } |  | ||||||
| /** Normalize a Vector **/ |  | ||||||
| template <class T> void normalize(Vector<T> &p){ |  | ||||||
| 	T m = norm(p); |  | ||||||
| 	if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;} |  | ||||||
| } |  | ||||||
| /** Vector by scalar **/ |  | ||||||
| template <class T, class U> Vector<T> times(Vector<T> p, U s){ |  | ||||||
| 	for(int i=0;i<p.size();i++){p[i] *= s;} |  | ||||||
| 	return p; |  | ||||||
| } |  | ||||||
| template <class T, class U> Vector<T> times(U s, Vector<T> p){ |  | ||||||
| 	for(int i=0;i<p.size();i++){p[i] *= s;} |  | ||||||
| 	return p; |  | ||||||
| } |  | ||||||
| /** inner product of a and b = conj(a) . b **/ |  | ||||||
| template <class T> T inner(Vector<T> a, Vector<T> b){ |  | ||||||
| 	T m = 0.; |  | ||||||
| 	for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];} |  | ||||||
| 	return m; |  | ||||||
| } |  | ||||||
| /** sum of a and b = a + b **/ |  | ||||||
| template <class T> Vector<T> add(Vector<T> a, Vector<T> b){ |  | ||||||
| 	Vector<T> m(a.size()); |  | ||||||
| 	for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];} |  | ||||||
| 	return m; |  | ||||||
| } |  | ||||||
| /** sum of a and b = a - b **/ |  | ||||||
| template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){ |  | ||||||
| 	Vector<T> m(a.size()); |  | ||||||
| 	for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];} |  | ||||||
| 	return m; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /**  |  | ||||||
|  ********************************* |  | ||||||
|  *	Matrices	         * |  | ||||||
|  ********************************* |  | ||||||
|  **/ |  | ||||||
|  |  | ||||||
| template<class T> void Fill(Matrix<T> & mat, T&val) {  |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     mat[i][j] = val; |  | ||||||
|   }} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Transpose of a matrix **/ |  | ||||||
| Matrix<T> Transpose(Matrix<T> & mat){ |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   Matrix C; Resize(C,M,N); |  | ||||||
|   for(int i=0;i<M;i++){ |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     C[i][j] = mat[j][i]; |  | ||||||
|   }}  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
| /** Set Matrix to unit matrix **/ |  | ||||||
| template<class T> void Unity(Matrix<T> &mat){ |  | ||||||
|   int N;  SizeSquare(mat,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     for(int j=0;j<N;j++){ |  | ||||||
|       if ( i==j ) A[i][j] = 1; |  | ||||||
|       else        A[i][j] = 0; |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
| } |  | ||||||
| /** Add C * I to matrix **/ |  | ||||||
| template<class T> |  | ||||||
| void PlusUnit(Matrix<T> & A,T c){ |  | ||||||
|   int dim;  SizeSquare(A,dim); |  | ||||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return the Hermitian conjugate of matrix **/ |  | ||||||
| Matrix<T> HermitianConj(Matrix<T> &mat){ |  | ||||||
|  |  | ||||||
|   int dim; SizeSquare(mat,dim); |  | ||||||
|  |  | ||||||
|   Matrix<T> C; Resize(C,dim,dim); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     for(int j=0;j<dim;j++){ |  | ||||||
|       C[i][j] = conj(mat[j][i]); |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return diagonal entries as a Vector **/ |  | ||||||
| Vector<T> diag(Matrix<T> &A) |  | ||||||
| { |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   Vector<T> d; Resize(d,dim); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     d[i] = A[i][i]; |  | ||||||
|   } |  | ||||||
|   return d; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Left multiply by a Vector **/ |  | ||||||
| Vector<T> operator *(Vector<T> &B,Matrix<T> &A) |  | ||||||
| { |  | ||||||
|   int K,M,N;  |  | ||||||
|   Size(B,K); |  | ||||||
|   Size(A,M,N); |  | ||||||
|   assert(K==M); |  | ||||||
|    |  | ||||||
|   Vector<T> C; Resize(C,N); |  | ||||||
|  |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     T sum = 0.0; |  | ||||||
|     for(int i=0;i<M;i++){ |  | ||||||
|       sum += B[i] * A[i][j]; |  | ||||||
|     } |  | ||||||
|     C[j] =  sum; |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return 1/diagonal entries as a Vector **/ |  | ||||||
| Vector<T> inv_diag(Matrix<T> & A){ |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   Vector<T> d; Resize(d,dim); |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     d[i] = 1.0/A[i][i]; |  | ||||||
|   } |  | ||||||
|   return d; |  | ||||||
| } |  | ||||||
| /** Matrix Addition **/ |  | ||||||
| inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B) |  | ||||||
| { |  | ||||||
|   int N,M  ; SizeSame(A,B,N,M); |  | ||||||
|   Matrix C; Resize(C,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     for(int j=0;j<M;j++){ |  | ||||||
|       C[i][j] = A[i][j] +  B[i][j]; |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
|   return C; |  | ||||||
| }  |  | ||||||
| /** Matrix Subtraction **/ |  | ||||||
| inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){ |  | ||||||
|   int N,M  ; SizeSame(A,B,N,M); |  | ||||||
|   Matrix C; Resize(C,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     C[i][j] = A[i][j] -  B[i][j]; |  | ||||||
|   }} |  | ||||||
|   return C; |  | ||||||
| }  |  | ||||||
|  |  | ||||||
| /** Matrix scalar multiplication **/ |  | ||||||
| inline Matrix<T> operator* (Matrix<T> & A,T c){ |  | ||||||
|   int N,M; Size(A,N,M); |  | ||||||
|   Matrix C; Resize(C,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     C[i][j] = A[i][j]*c; |  | ||||||
|   }}  |  | ||||||
|   return C; |  | ||||||
| }  |  | ||||||
| /** Matrix Matrix multiplication **/ |  | ||||||
| inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){ |  | ||||||
|   int K,L,N,M; |  | ||||||
|   Size(A,K,L); |  | ||||||
|   Size(B,N,M); assert(L==N); |  | ||||||
|   Matrix C; Resize(C,K,M); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<K;i++){ |  | ||||||
|     for(int j=0;j<M;j++){ |  | ||||||
|       T sum = 0.0; |  | ||||||
|       for(int k=0;k<N;k++) sum += A[i][k]*B[k][j]; |  | ||||||
|       C[i][j] =sum; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| }  |  | ||||||
| /** Matrix Vector multiplication **/ |  | ||||||
| inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){ |  | ||||||
|   int M,N,K; |  | ||||||
|   Size(A,N,M); |  | ||||||
|   Size(B,K); assert(K==M); |  | ||||||
|   Vector<T> C; Resize(C,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     T sum = 0.0; |  | ||||||
|     for(int j=0;j<M;j++) sum += A[i][j]*B[j]; |  | ||||||
|     C[i] =  sum; |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| }  |  | ||||||
|  |  | ||||||
| /** Some version of Matrix norm **/ |  | ||||||
| /* |  | ||||||
| inline T Norm(){ // this is not a usual L2 norm |  | ||||||
|     T norm = 0; |  | ||||||
|     for(int i=0;i<dim;i++){ |  | ||||||
|       for(int j=0;j<dim;j++){ |  | ||||||
| 	norm += abs(A[i][j]); |  | ||||||
|     }} |  | ||||||
|     return norm; |  | ||||||
|   } |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| /** Some version of Matrix norm **/ |  | ||||||
| template<class T> T LargestDiag(Matrix<T> &A) |  | ||||||
| { |  | ||||||
|   int dim ; SizeSquare(A,dim);  |  | ||||||
|  |  | ||||||
|   T ld = abs(A[0][0]); |  | ||||||
|   for(int i=1;i<dim;i++){ |  | ||||||
|     T cf = abs(A[i][i]); |  | ||||||
|     if(abs(cf) > abs(ld) ){ld = cf;} |  | ||||||
|   } |  | ||||||
|   return ld; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ |  | ||||||
| template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small) |  | ||||||
| { |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   for(int l = dim - 1 - offset; l >= 1; l--) {             		 |  | ||||||
|     if((U)abs(A[l][l - 1]) < (U)small) { |  | ||||||
|       A[l][l-1]=(U)0.0; |  | ||||||
|       return l; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   return 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ |  | ||||||
| template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)  |  | ||||||
| { |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   for(int l = dim - 1 - offset; l >= 1; l--) { |  | ||||||
|     if((U)abs(A[l][l - 1]) < (U)small) { |  | ||||||
|       A[l][l - 1] = (U)0.0; |  | ||||||
|       A[l - 1][l] = (U)0.0; |  | ||||||
|       return l; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   return 0; |  | ||||||
| } |  | ||||||
| /**Assign a submatrix to a larger one**/ |  | ||||||
| template<class T> |  | ||||||
| void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) |  | ||||||
| { |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|     for(int j = col_st; j<col_end; j++){ |  | ||||||
|       A[i][j] = S[i - row_st][j - col_st]; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /**Get a square submatrix**/ |  | ||||||
| template <class T> |  | ||||||
| Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end) |  | ||||||
| { |  | ||||||
|   Matrix<T> H; Resize(row_end - row_st,col_end-col_st); |  | ||||||
|  |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|   for(int j = col_st; j<col_end; j++){ |  | ||||||
|     H[i-row_st][j-col_st]=A[i][j]; |  | ||||||
|   }} |  | ||||||
|   return H; |  | ||||||
| } |  | ||||||
|    |  | ||||||
|  /**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/ |  | ||||||
| template<class T> |  | ||||||
| void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) |  | ||||||
| { |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|   for(int j = col_st; j<col_end; j++){ |  | ||||||
|     A[i][j] = S[i - row_st][j - col_st]; |  | ||||||
|   }} |  | ||||||
| } |  | ||||||
|    |  | ||||||
| /** compute b_i A_ij b_j **/ // surprised no Conj |  | ||||||
| template<class T> T proj(Matrix<T> A, Vector<T> B){ |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   int dimB; Size(B,dimB); |  | ||||||
|   assert(dimB==dim); |  | ||||||
|   T C = 0; |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     T sum = 0.0; |  | ||||||
|     for(int j=0;j<dim;j++){ |  | ||||||
|       sum += A[i][j]*B[j]; |  | ||||||
|     } |  | ||||||
|     C +=  B[i]*sum; // No conj? |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  ************************************************************* |  | ||||||
|  * |  | ||||||
|  * Matrix Vector products |  | ||||||
|  * |  | ||||||
|  ************************************************************* |  | ||||||
|  */ |  | ||||||
| // Instead make a linop and call my CG; |  | ||||||
|  |  | ||||||
| /// q -> q Q |  | ||||||
| template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q) |  | ||||||
| { |  | ||||||
|   int M; SizeSquare(Q,M); |  | ||||||
|   int N; Size(q,N);  |  | ||||||
|   assert(M==N); |  | ||||||
|  |  | ||||||
|   times(q,Q,N); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// q -> q Q |  | ||||||
| template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N) |  | ||||||
| { |  | ||||||
|   GridBase *grid = q[0]._grid; |  | ||||||
|   int M; SizeSquare(Q,M); |  | ||||||
|   int K; Size(q,K);  |  | ||||||
|   assert(N<M); |  | ||||||
|   assert(N<K); |  | ||||||
|   Vector<Fermion> S(N,grid ); |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     S[j] = zero; |  | ||||||
|     for(int k=0;k<N;k++){ |  | ||||||
|       S[j] = S[j] +  q[k]* Q[k][j];  |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   for(int j=0;j<q.size();j++){ |  | ||||||
|     q[j] = S[j]; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -1,75 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/MatrixUtils.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_MATRIX_UTILS_H |  | ||||||
| #define GRID_MATRIX_UTILS_H |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|   namespace MatrixUtils {  |  | ||||||
|  |  | ||||||
|     template<class T> inline void Size(Matrix<T>& A,int &N,int &M){ |  | ||||||
|       N=A.size(); assert(N>0); |  | ||||||
|       M=A[0].size(); |  | ||||||
|       for(int i=0;i<N;i++){ |  | ||||||
| 	assert(A[i].size()==M); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     template<class T> inline void SizeSquare(Matrix<T>& A,int &N) |  | ||||||
|     { |  | ||||||
|       int M; |  | ||||||
|       Size(A,N,M); |  | ||||||
|       assert(N==M); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     template<class T> inline void Fill(Matrix<T>& A,T & val) |  | ||||||
|     {  |  | ||||||
|       int N,M; |  | ||||||
|       Size(A,N,M); |  | ||||||
|       for(int i=0;i<N;i++){ |  | ||||||
|       for(int j=0;j<M;j++){ |  | ||||||
| 	A[i][j]=val; |  | ||||||
|       }} |  | ||||||
|     } |  | ||||||
|     template<class T> inline void Diagonal(Matrix<T>& A,T & val) |  | ||||||
|     {  |  | ||||||
|       int N; |  | ||||||
|       SizeSquare(A,N); |  | ||||||
|       for(int i=0;i<N;i++){ |  | ||||||
| 	A[i][i]=val; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     template<class T> inline void Identity(Matrix<T>& A) |  | ||||||
|     { |  | ||||||
|       Fill(A,0.0); |  | ||||||
|       Diagonal(A,1.0); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   }; |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -1,15 +0,0 @@ | |||||||
| - ConjugateGradientMultiShift |  | ||||||
| - MCR |  | ||||||
|  |  | ||||||
| - Potentially Useful Boost libraries |  | ||||||
|  |  | ||||||
| - MultiArray |  | ||||||
| - Aligned allocator; memory pool |  | ||||||
| - Remez -- Mike or Boost? |  | ||||||
| - Multiprecision |  | ||||||
| - quaternians |  | ||||||
| - Tokenize |  | ||||||
| - Serialization |  | ||||||
| - Regex |  | ||||||
| - Proto (ET) |  | ||||||
| - uBlas |  | ||||||
| @@ -1,122 +0,0 @@ | |||||||
| #include <math.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <vector> |  | ||||||
|  |  | ||||||
| struct Bisection { |  | ||||||
|  |  | ||||||
| static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig) |  | ||||||
| { |  | ||||||
|   int i,j; |  | ||||||
|   std::vector<RealD> evec1(row_num+3); |  | ||||||
|   std::vector<RealD> evec2(row_num+3); |  | ||||||
|   RealD eps2; |  | ||||||
|   ALPHA[1]=0.; |  | ||||||
|   BETHA[1]=0.; |  | ||||||
|   for(i=0;i<row_num-1;i++) { |  | ||||||
|     ALPHA[i+1] = A[i*(row_num+1)].real(); |  | ||||||
|     BETHA[i+2] = A[i*(row_num+1)+1].real(); |  | ||||||
|   } |  | ||||||
|   ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real(); |  | ||||||
|   bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2); |  | ||||||
|   bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2); |  | ||||||
|  |  | ||||||
|   // Do we really need to sort here? |  | ||||||
|   int begin=1; |  | ||||||
|   int end = row_num; |  | ||||||
|   int swapped=1; |  | ||||||
|   while(swapped) { |  | ||||||
|     swapped=0; |  | ||||||
|     for(i=begin;i<end;i++){ |  | ||||||
|       if(mag(evec2[i])>mag(evec2[i+1]))	{ |  | ||||||
| 	swap(evec2+i,evec2+i+1); |  | ||||||
| 	swapped=1; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     end--; |  | ||||||
|     for(i=end-1;i>=begin;i--){ |  | ||||||
|       if(mag(evec2[i])>mag(evec2[i+1]))	{ |  | ||||||
| 	swap(evec2+i,evec2+i+1); |  | ||||||
| 	swapped=1; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     begin++; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   for(i=0;i<row_num;i++){ |  | ||||||
|     for(j=0;j<row_num;j++) { |  | ||||||
|       if(i==j) H[i*row_num+j]=evec2[i+1]; |  | ||||||
|       else H[i*row_num+j]=0.; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void bisec(std::vector<RealD> &c,    |  | ||||||
| 		  std::vector<RealD> &b, |  | ||||||
| 		  int n, |  | ||||||
| 		  int m1, |  | ||||||
| 		  int m2, |  | ||||||
| 		  RealD eps1, |  | ||||||
| 		  RealD relfeh, |  | ||||||
| 		  std::vector<RealD> &x, |  | ||||||
| 		  RealD &eps2) |  | ||||||
| { |  | ||||||
|   std::vector<RealD> wu(n+2); |  | ||||||
|  |  | ||||||
|   RealD h,q,x1,xu,x0,xmin,xmax;  |  | ||||||
|   int i,a,k; |  | ||||||
|  |  | ||||||
|   b[1]=0.0; |  | ||||||
|   xmin=c[n]-fabs(b[n]); |  | ||||||
|   xmax=c[n]+fabs(b[n]); |  | ||||||
|   for(i=1;i<n;i++){ |  | ||||||
|     h=fabs(b[i])+fabs(b[i+1]); |  | ||||||
|     if(c[i]+h>xmax) xmax= c[i]+h; |  | ||||||
|     if(c[i]-h<xmin) xmin= c[i]-h; |  | ||||||
|   } |  | ||||||
|   xmax *=2.; |  | ||||||
|  |  | ||||||
|   eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin); |  | ||||||
|   if(eps1<=0.0) eps1=eps2; |  | ||||||
|   eps2=0.5*eps1+7.0*(eps2); |  | ||||||
|   x0=xmax; |  | ||||||
|   for(i=m1;i<=m2;i++){ |  | ||||||
|     x[i]=xmax; |  | ||||||
|     wu[i]=xmin; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   for(k=m2;k>=m1;k--){ |  | ||||||
|     xu=xmin; |  | ||||||
|     i=k; |  | ||||||
|     do{ |  | ||||||
|       if(xu<wu[i]){ |  | ||||||
| 	xu=wu[i]; |  | ||||||
| 	i=m1-1; |  | ||||||
|       } |  | ||||||
|       i--; |  | ||||||
|     }while(i>=m1); |  | ||||||
|     if(x0>x[k]) x0=x[k]; |  | ||||||
|     while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){ |  | ||||||
|       x1=(xu+x0)/2; |  | ||||||
|  |  | ||||||
|       a=0; |  | ||||||
|       q=1.0; |  | ||||||
|       for(i=1;i<=n;i++){ |  | ||||||
| 	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh); |  | ||||||
| 	if(q<0) a++; |  | ||||||
|       } |  | ||||||
|       //			printf("x1=%e a=%d\n",x1,a); |  | ||||||
|       if(a<k){ |  | ||||||
| 	if(a<m1){ |  | ||||||
| 	  xu=x1; |  | ||||||
| 	  wu[m1]=x1; |  | ||||||
| 	}else { |  | ||||||
| 	  xu=x1; |  | ||||||
| 	  wu[a+1]=x1; |  | ||||||
| 	  if(x[a]>x1) x[a]=x1; |  | ||||||
| 	} |  | ||||||
|       }else x0=x1; |  | ||||||
|     } |  | ||||||
|     x[k]=(x0+xu)/2; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| } |  | ||||||
| @@ -1 +0,0 @@ | |||||||
|  |  | ||||||
| @@ -6,8 +6,9 @@ | |||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |     Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -49,7 +50,6 @@ public: | |||||||
|  |  | ||||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; |     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||||
|  |  | ||||||
|  |  | ||||||
|     // Physics Grid information. |     // Physics Grid information. | ||||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. |     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||||
|     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal |     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal | ||||||
| @@ -62,13 +62,12 @@ public: | |||||||
|     int _isites; |     int _isites; | ||||||
|     int _fsites;                  // _isites*_osites = product(dimensions). |     int _fsites;                  // _isites*_osites = product(dimensions). | ||||||
|     int _gsites; |     int _gsites; | ||||||
|     std::vector<int> _slice_block;   // subslice information |     std::vector<int> _slice_block;// subslice information | ||||||
|     std::vector<int> _slice_stride; |     std::vector<int> _slice_stride; | ||||||
|     std::vector<int> _slice_nblock; |     std::vector<int> _slice_nblock; | ||||||
|  |  | ||||||
|     // Might need these at some point |     std::vector<int> _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] | ||||||
|     //    std::vector<int> _lstart;     // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] |     std::vector<int> _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||||
|     //    std::vector<int> _lend;       // local end of array in gcoors    _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 |  | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
| @@ -121,6 +120,12 @@ public: | |||||||
|       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); |       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) { | ||||||
|  |       lcoor.resize(_ndimension); | ||||||
|  |       for (int d = 0; d < _ndimension; d++) | ||||||
|  |         lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
|     // SIMD lane addressing |     // SIMD lane addressing | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
| @@ -128,6 +133,7 @@ public: | |||||||
|     { |     { | ||||||
|       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); |       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     inline int PermuteDim(int dimension){ |     inline int PermuteDim(int dimension){ | ||||||
|       return _simd_layout[dimension]>1; |       return _simd_layout[dimension]>1; | ||||||
|     } |     } | ||||||
| @@ -168,20 +174,44 @@ public: | |||||||
|     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  |     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||||
|     inline int Nd    (void) const { return _ndimension;}; |     inline int Nd    (void) const { return _ndimension;}; | ||||||
|  |  | ||||||
|  |     inline const std::vector<int> LocalStarts(void)             { return _lstart;    }; | ||||||
|     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; |     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; | ||||||
|     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; |     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; | ||||||
|     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; |     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; | ||||||
|     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; |     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////////////////////////////// | ||||||
|  |     // Utility to print the full decomposition details  | ||||||
|  |     //////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |     void show_decomposition(){ | ||||||
|  |       std::cout << GridLogMessage << "Full Dimensions    : " << _fdimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "Global Dimensions  : " << _gdimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "Local Dimensions   : " << _ldimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "Outer strides      : " << _ostride << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "Inner strides      : " << _istride << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "iSites             : " << _isites << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "oSites             : " << _osites << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "lSites             : " << lSites() << std::endl;         | ||||||
|  |       std::cout << GridLogMessage << "gSites             : " << gSites() << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "Nd                 : " << _ndimension << std::endl;              | ||||||
|  |     }  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
|     // Global addressing |     // Global addressing | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
|     void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){ |     void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){ | ||||||
|  |       assert(gidx< gSites()); | ||||||
|       Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); |       Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); | ||||||
|     } |     } | ||||||
|     void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){ |     void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){ | ||||||
|  |       assert(lidx<lSites()); | ||||||
|       Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); |       Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ |     void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ | ||||||
|       gidx=0; |       gidx=0; | ||||||
|       int mult=1; |       int mult=1; | ||||||
|   | |||||||
| @@ -76,6 +76,8 @@ public: | |||||||
|         _ldimensions.resize(_ndimension); |         _ldimensions.resize(_ndimension); | ||||||
|         _rdimensions.resize(_ndimension); |         _rdimensions.resize(_ndimension); | ||||||
|         _simd_layout.resize(_ndimension); |         _simd_layout.resize(_ndimension); | ||||||
|  | 	_lstart.resize(_ndimension); | ||||||
|  | 	_lend.resize(_ndimension); | ||||||
|              |              | ||||||
|         _ostride.resize(_ndimension); |         _ostride.resize(_ndimension); | ||||||
|         _istride.resize(_ndimension); |         _istride.resize(_ndimension); | ||||||
| @@ -94,6 +96,8 @@ public: | |||||||
| 	  // Use a reduced simd grid | 	  // Use a reduced simd grid | ||||||
| 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions | 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions | ||||||
| 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition | 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition | ||||||
|  | 	  _lstart[d]     = _processor_coor[d]*_ldimensions[d]; | ||||||
|  | 	  _lend[d]       = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; | ||||||
| 	  _osites  *= _rdimensions[d]; | 	  _osites  *= _rdimensions[d]; | ||||||
| 	  _isites  *= _simd_layout[d]; | 	  _isites  *= _simd_layout[d]; | ||||||
|                  |                  | ||||||
|   | |||||||
| @@ -151,6 +151,8 @@ public: | |||||||
|       _ldimensions.resize(_ndimension); |       _ldimensions.resize(_ndimension); | ||||||
|       _rdimensions.resize(_ndimension); |       _rdimensions.resize(_ndimension); | ||||||
|       _simd_layout.resize(_ndimension); |       _simd_layout.resize(_ndimension); | ||||||
|  |       _lstart.resize(_ndimension); | ||||||
|  |       _lend.resize(_ndimension); | ||||||
|        |        | ||||||
|       _ostride.resize(_ndimension); |       _ostride.resize(_ndimension); | ||||||
|       _istride.resize(_ndimension); |       _istride.resize(_ndimension); | ||||||
| @@ -169,6 +171,8 @@ public: | |||||||
| 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard | 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard | ||||||
| 	} | 	} | ||||||
| 	_ldimensions[d] = _gdimensions[d]/_processors[d]; | 	_ldimensions[d] = _gdimensions[d]/_processors[d]; | ||||||
|  | 	_lstart[d]     = _processor_coor[d]*_ldimensions[d]; | ||||||
|  | 	_lend[d]       = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; | ||||||
|  |  | ||||||
| 	// Use a reduced simd grid | 	// Use a reduced simd grid | ||||||
| 	_simd_layout[d] = simd_layout[d]; | 	_simd_layout[d] = simd_layout[d]; | ||||||
|   | |||||||
| @@ -60,6 +60,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { | |||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| // Grid information queries | // Grid information queries | ||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
|  | int                      CartesianCommunicator::Dimensions(void)         { return _ndimension; }; | ||||||
| int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | ||||||
| int                      CartesianCommunicator::BossRank(void)          { return 0; }; | int                      CartesianCommunicator::BossRank(void)          { return 0; }; | ||||||
| int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | ||||||
| @@ -91,6 +92,7 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | |||||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | ||||||
|  |  | ||||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||||
|  | int                      CartesianCommunicator::RankCount(void)    { return ProcessorCount();}; | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 						       void *xmit, | 						       void *xmit, | ||||||
|   | |||||||
| @@ -148,6 +148,7 @@ class CartesianCommunicator { | |||||||
|   int  RankFromProcessorCoor(std::vector<int> &coor); |   int  RankFromProcessorCoor(std::vector<int> &coor); | ||||||
|   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); |   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); | ||||||
|    |    | ||||||
|  |   int                      Dimensions(void)        ; | ||||||
|   int                      IsBoss(void)            ; |   int                      IsBoss(void)            ; | ||||||
|   int                      BossRank(void)          ; |   int                      BossRank(void)          ; | ||||||
|   int                      ThisRank(void)          ; |   int                      ThisRank(void)          ; | ||||||
| @@ -155,6 +156,7 @@ class CartesianCommunicator { | |||||||
|   const std::vector<int> & ProcessorGrid(void)     ; |   const std::vector<int> & ProcessorGrid(void)     ; | ||||||
|   int                      ProcessorCount(void)    ; |   int                      ProcessorCount(void)    ; | ||||||
|   int                      NodeCount(void)    ; |   int                      NodeCount(void)    ; | ||||||
|  |   int                      RankCount(void)    ; | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid |   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||||
| @@ -175,6 +177,8 @@ class CartesianCommunicator { | |||||||
|   void GlobalSumVector(ComplexF *c,int N); |   void GlobalSumVector(ComplexF *c,int N); | ||||||
|   void GlobalSum(ComplexD &c); |   void GlobalSum(ComplexD &c); | ||||||
|   void GlobalSumVector(ComplexD *c,int N); |   void GlobalSumVector(ComplexD *c,int N); | ||||||
|  |   void GlobalXOR(uint32_t &); | ||||||
|  |   void GlobalXOR(uint64_t &); | ||||||
|    |    | ||||||
|   template<class obj> void GlobalSum(obj &o){ |   template<class obj> void GlobalSum(obj &o){ | ||||||
|     typedef typename obj::scalar_type scalar_type; |     typedef typename obj::scalar_type scalar_type; | ||||||
|   | |||||||
| @@ -83,6 +83,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
|   | |||||||
| @@ -65,6 +65,7 @@ std::vector<int> CartesianCommunicator::MyGroup; | |||||||
| std::vector<void *> CartesianCommunicator::ShmCommBufs; | std::vector<void *> CartesianCommunicator::ShmCommBufs; | ||||||
|  |  | ||||||
| int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | ||||||
|  | int CartesianCommunicator::RankCount(void)    { return WorldSize;}; | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef FORCE_COMMS | #undef FORCE_COMMS | ||||||
| @@ -206,7 +207,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); |       sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); | ||||||
|  |  | ||||||
|       shm_unlink(shm_name); |       shm_unlink(shm_name); | ||||||
|       int fd=shm_open(shm_name,O_RDWR|O_CREAT,0660); |       int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); | ||||||
|       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } |       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } | ||||||
|       ftruncate(fd, size); |       ftruncate(fd, size); | ||||||
|  |  | ||||||
| @@ -226,7 +227,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|      |      | ||||||
|       sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); |       sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); | ||||||
|  |  | ||||||
|       int fd=shm_open(shm_name,O_RDWR,0660); |       int fd=shm_open(shm_name,O_RDWR,0666); | ||||||
|       if ( fd<0 ) {	perror("failed shm_open");	assert(0);      } |       if ( fd<0 ) {	perror("failed shm_open");	assert(0);      } | ||||||
|  |  | ||||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); |       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | ||||||
| @@ -509,6 +510,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
|   | |||||||
| @@ -59,6 +59,8 @@ void CartesianCommunicator::GlobalSum(double &){} | |||||||
| void CartesianCommunicator::GlobalSum(uint32_t &){} | void CartesianCommunicator::GlobalSum(uint32_t &){} | ||||||
| void CartesianCommunicator::GlobalSum(uint64_t &){} | void CartesianCommunicator::GlobalSum(uint64_t &){} | ||||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint32_t &){} | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint64_t &){} | ||||||
|  |  | ||||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||||
| 					   void *recv, | 					   void *recv, | ||||||
|   | |||||||
| @@ -30,21 +30,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| class SimpleCompressor { |  | ||||||
| public: |  | ||||||
|   void Point(int) {}; |  | ||||||
|  |  | ||||||
|   vobj operator() (const vobj &arg) { |  | ||||||
|     return arg; |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| // Gather for when there is no need to SIMD split with compression | // Gather for when there is no need to SIMD split  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| template<class vobj,class cobj,class compressor> void  | template<class vobj> void  | ||||||
| Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) | Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0) | ||||||
| { | { | ||||||
|   int rd = rhs._grid->_rdimensions[dimension]; |   int rd = rhs._grid->_rdimensions[dimension]; | ||||||
|  |  | ||||||
| @@ -62,7 +52,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen | |||||||
|       for(int b=0;b<e2;b++){ |       for(int b=0;b<e2;b++){ | ||||||
| 	int o  = n*stride; | 	int o  = n*stride; | ||||||
| 	int bo = n*e2; | 	int bo = n*e2; | ||||||
| 	buffer[off+bo+b]=compress(rhs._odata[so+o+b]); | 	buffer[off+bo+b]=rhs._odata[so+o+b]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } else {  |   } else {  | ||||||
| @@ -78,17 +68,16 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen | |||||||
|        } |        } | ||||||
|      } |      } | ||||||
|      parallel_for(int i=0;i<table.size();i++){ |      parallel_for(int i=0;i<table.size();i++){ | ||||||
|        buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]); |        buffer[off+table[i].first]=rhs._odata[so+table[i].second]; | ||||||
|      } |      } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| // Gather for when there *is* need to SIMD split with compression | // Gather for when there *is* need to SIMD split  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| template<class cobj,class vobj,class compressor> void  | template<class vobj> void  | ||||||
| Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_object *> pointers,int dimension,int plane,int cbmask,compressor &compress) | Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask) | ||||||
| { | { | ||||||
|   int rd = rhs._grid->_rdimensions[dimension]; |   int rd = rhs._grid->_rdimensions[dimension]; | ||||||
|  |  | ||||||
| @@ -109,8 +98,8 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_ | |||||||
| 	int o      =   n*n1; | 	int o      =   n*n1; | ||||||
| 	int offset = b+n*e2; | 	int offset = b+n*e2; | ||||||
| 	 | 	 | ||||||
| 	cobj temp =compress(rhs._odata[so+o+b]); | 	vobj temp =rhs._odata[so+o+b]; | ||||||
| 	extract<cobj>(temp,pointers,offset); | 	extract<vobj>(temp,pointers,offset); | ||||||
|  |  | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
| @@ -127,32 +116,14 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_ | |||||||
| 	int offset = b+n*e2; | 	int offset = b+n*e2; | ||||||
|  |  | ||||||
| 	if ( ocb & cbmask ) { | 	if ( ocb & cbmask ) { | ||||||
| 	  cobj temp =compress(rhs._odata[so+o+b]); | 	  vobj temp =rhs._odata[so+o+b]; | ||||||
| 	  extract<cobj>(temp,pointers,offset); | 	  extract<vobj>(temp,pointers,offset); | ||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| // Gather for when there is no need to SIMD split |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask) |  | ||||||
| { |  | ||||||
|   SimpleCompressor<vobj> dontcompress; |  | ||||||
|   Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| // Gather for when there *is* need to SIMD split |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask) |  | ||||||
| { |  | ||||||
|   SimpleCompressor<vobj> dontcompress; |  | ||||||
|   Gather_plane_extract<vobj,vobj,decltype(dontcompress)>(rhs,pointers,dimension,plane,cbmask,dontcompress); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
| // Scatter for when there is no need to SIMD split | // Scatter for when there is no need to SIMD split | ||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
| @@ -200,7 +171,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo | |||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
| // Scatter for when there *is* need to SIMD split | // Scatter for when there *is* need to SIMD split | ||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
|  template<class vobj,class cobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<cobj *> pointers,int dimension,int plane,int cbmask) | template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask) | ||||||
| { | { | ||||||
|   int rd = rhs._grid->_rdimensions[dimension]; |   int rd = rhs._grid->_rdimensions[dimension]; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -154,13 +154,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r | |||||||
| 			   recv_from_rank, | 			   recv_from_rank, | ||||||
| 			   bytes); | 			   bytes); | ||||||
|       grid->Barrier(); |       grid->Barrier(); | ||||||
|       /* |  | ||||||
|       for(int i=0;i<send_buf.size();i++){ |  | ||||||
| 	assert(recv_buf.size()==buffer_size); |  | ||||||
| 	assert(send_buf.size()==buffer_size); |  | ||||||
| 	std::cout << "SendRecv_Cshift_comms ["<<i<<" "<< dimension<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << "  0x" << cbmask<<std::endl; |  | ||||||
|       } |  | ||||||
|       */ |  | ||||||
|       Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask); |       Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -246,13 +240,6 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo | |||||||
| 			     (void *)&recv_buf_extract[i][0], | 			     (void *)&recv_buf_extract[i][0], | ||||||
| 			     recv_from_rank, | 			     recv_from_rank, | ||||||
| 			     bytes); | 			     bytes); | ||||||
| 	/* |  | ||||||
| 	for(int w=0;w<recv_buf_extract[i].size();w++){ |  | ||||||
| 	  assert(recv_buf_extract[i].size()==buffer_size); |  | ||||||
| 	  assert(send_buf_extract[i].size()==buffer_size); |  | ||||||
| 	  std::cout << "SendRecv_Cshift_comms ["<<w<<" "<< dimension<<"] recv "<<recv_buf_extract[i][w]<<" send " << send_buf_extract[nbr_lane][w]  << cbmask<<std::endl; |  | ||||||
| 	} |  | ||||||
| 	*/	 |  | ||||||
| 	grid->Barrier(); | 	grid->Barrier(); | ||||||
| 	rpointers[i] = &recv_buf_extract[i][0]; | 	rpointers[i] = &recv_buf_extract[i][0]; | ||||||
|       } else {  |       } else {  | ||||||
|   | |||||||
							
								
								
									
										12276
									
								
								lib/json/json.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12276
									
								
								lib/json/json.hpp
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -261,12 +261,22 @@ public: | |||||||
|    |    | ||||||
|   virtual ~Lattice(void) = default; |   virtual ~Lattice(void) = default; | ||||||
|      |      | ||||||
|  |   void reset(GridBase* grid) { | ||||||
|  |     if (_grid != grid) { | ||||||
|  |       _grid = grid; | ||||||
|  |       _odata.resize(grid->oSites()); | ||||||
|  |       checkerboard = 0; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |  | ||||||
|   template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ |   template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ | ||||||
|     parallel_for(int ss=0;ss<_grid->oSites();ss++){ |     parallel_for(int ss=0;ss<_grid->oSites();ss++){ | ||||||
|       this->_odata[ss]=r; |       this->_odata[ss]=r; | ||||||
|     } |     } | ||||||
|     return *this; |     return *this; | ||||||
|   } |   } | ||||||
|  |    | ||||||
|   template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){ |   template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){ | ||||||
|     this->checkerboard = r.checkerboard; |     this->checkerboard = r.checkerboard; | ||||||
|     conformable(*this,r); |     conformable(*this,r); | ||||||
| @@ -291,7 +301,7 @@ public: | |||||||
|     *this = (*this)+r; |     *this = (*this)+r; | ||||||
|     return *this; |     return *this; | ||||||
|   } |   } | ||||||
|  }; // class Lattice | }; // class Lattice | ||||||
|    |    | ||||||
|   template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ |   template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ | ||||||
|     std::vector<int> gcoor; |     std::vector<int> gcoor; | ||||||
|   | |||||||
| @@ -1,35 +1,29 @@ | |||||||
|     /************************************************************************************* | /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./lib/lattice/Lattice_reduction.h |     Source file: ./lib/lattice/Lattice_reduction.h | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or |     the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. |     (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |     This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |     You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #ifndef GRID_LATTICE_REDUCTION_H | #ifndef GRID_LATTICE_REDUCTION_H | ||||||
| #define GRID_LATTICE_REDUCTION_H | #define GRID_LATTICE_REDUCTION_H | ||||||
|  |  | ||||||
|  | #include <Grid/Grid_Eigen_Dense.h> | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| #ifdef GRID_WARN_SUBOPTIMAL | #ifdef GRID_WARN_SUBOPTIMAL | ||||||
| #warning "Optimisation alert all these reduction loops are NOT threaded " | #warning "Optimisation alert all these reduction loops are NOT threaded " | ||||||
| @@ -38,32 +32,30 @@ namespace Grid { | |||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Deterministic Reduction operations |   // Deterministic Reduction operations | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | ||||||
|   ComplexD nrm = innerProduct(arg,arg); |   ComplexD nrm = innerProduct(arg,arg); | ||||||
|   return std::real(nrm);  |   return std::real(nrm);  | ||||||
|   } | } | ||||||
|  |  | ||||||
|     template<class vobj> | // Double inner product | ||||||
|     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)  | template<class vobj> | ||||||
|     { | inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)  | ||||||
|  | { | ||||||
|   typedef typename vobj::scalar_type scalar_type; |   typedef typename vobj::scalar_type scalar_type; | ||||||
|       typedef typename vobj::vector_type vector_type; |   typedef typename vobj::vector_typeD vector_type; | ||||||
|   scalar_type  nrm; |   scalar_type  nrm; | ||||||
|    |    | ||||||
|   GridBase *grid = left._grid; |   GridBase *grid = left._grid; | ||||||
|    |    | ||||||
|   std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); |   std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); | ||||||
|       for(int i=0;i<grid->SumArraySize();i++){ |  | ||||||
| 	sumarray[i]=zero; |  | ||||||
|       } |  | ||||||
|    |    | ||||||
|   parallel_for(int thr=0;thr<grid->SumArraySize();thr++){ |   parallel_for(int thr=0;thr<grid->SumArraySize();thr++){ | ||||||
|     int nwork, mywork, myoff; |     int nwork, mywork, myoff; | ||||||
|     GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); |     GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); | ||||||
|      |      | ||||||
| 	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation |     decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation | ||||||
|     for(int ss=myoff;ss<mywork+myoff; ss++){ |     for(int ss=myoff;ss<mywork+myoff; ss++){ | ||||||
| 	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); |       vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]); | ||||||
|     } |     } | ||||||
|     sumarray[thr]=TensorRemove(vnrm) ; |     sumarray[thr]=TensorRemove(vnrm) ; | ||||||
|   } |   } | ||||||
| @@ -75,36 +67,36 @@ namespace Grid { | |||||||
|   nrm = Reduce(vvnrm);// sum across simd |   nrm = Reduce(vvnrm);// sum across simd | ||||||
|   right._grid->GlobalSum(nrm); |   right._grid->GlobalSum(nrm); | ||||||
|   return nrm; |   return nrm; | ||||||
|     } | } | ||||||
|   |   | ||||||
|     template<class Op,class T1> | template<class Op,class T1> | ||||||
|       inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) | inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) | ||||||
|   ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object |   ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object | ||||||
|     { | { | ||||||
|   return sum(closure(expr)); |   return sum(closure(expr)); | ||||||
|     } | } | ||||||
|  |  | ||||||
|     template<class Op,class T1,class T2> | template<class Op,class T1,class T2> | ||||||
|       inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr) | inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr) | ||||||
|       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object |       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object | ||||||
|     { | { | ||||||
|   return sum(closure(expr)); |   return sum(closure(expr)); | ||||||
|     } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|     template<class Op,class T1,class T2,class T3> | template<class Op,class T1,class T2,class T3> | ||||||
|       inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) | inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) | ||||||
|   ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), |   ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||||
| 				      eval(0,std::get<1>(expr.second)), | 				      eval(0,std::get<1>(expr.second)), | ||||||
| 				      eval(0,std::get<2>(expr.second)) | 				      eval(0,std::get<2>(expr.second)) | ||||||
| 				      ))::scalar_object | 				      ))::scalar_object | ||||||
|     { | { | ||||||
|   return sum(closure(expr)); |   return sum(closure(expr)); | ||||||
|     } | } | ||||||
|  |  | ||||||
|     template<class vobj> |  | ||||||
|     inline typename vobj::scalar_object sum(const Lattice<vobj> &arg){ |  | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | inline typename vobj::scalar_object sum(const Lattice<vobj> &arg) | ||||||
|  | { | ||||||
|   GridBase *grid=arg._grid; |   GridBase *grid=arg._grid; | ||||||
|   int Nsimd = grid->Nsimd(); |   int Nsimd = grid->Nsimd(); | ||||||
|    |    | ||||||
| @@ -139,19 +131,24 @@ namespace Grid { | |||||||
|   arg._grid->GlobalSum(ssum); |   arg._grid->GlobalSum(ssum); | ||||||
|    |    | ||||||
|   return ssum; |   return ssum; | ||||||
|     } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc... | ||||||
|  | ////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
| template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) | template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) | ||||||
| { | { | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|  |   // FIXME precision promoted summation | ||||||
|  |   // may be important for correlation functions | ||||||
|  |   // But easily avoided by using double precision fields | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|   GridBase  *grid = Data._grid; |   GridBase  *grid = Data._grid; | ||||||
|   assert(grid!=NULL); |   assert(grid!=NULL); | ||||||
|  |  | ||||||
|   // FIXME |  | ||||||
|   // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl; |  | ||||||
|  |  | ||||||
|   const int    Nd = grid->_ndimension; |   const int    Nd = grid->_ndimension; | ||||||
|   const int Nsimd = grid->Nsimd(); |   const int Nsimd = grid->Nsimd(); | ||||||
|  |  | ||||||
| @@ -166,20 +163,28 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   std::vector<sobj> lsSum(ld,zero);                    // sum across these down to scalars |   std::vector<sobj> lsSum(ld,zero);                    // sum across these down to scalars | ||||||
|   std::vector<sobj> extracted(Nsimd);                  // splitting the SIMD |   std::vector<sobj> extracted(Nsimd);                  // splitting the SIMD | ||||||
|  |  | ||||||
|   result.resize(fd); // And then global sum to return the same vector to every node for IO to file |   result.resize(fd); // And then global sum to return the same vector to every node  | ||||||
|   for(int r=0;r<rd;r++){ |   for(int r=0;r<rd;r++){ | ||||||
|     lvSum[r]=zero; |     lvSum[r]=zero; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   std::vector<int>  coor(Nd);   |   int e1=    grid->_slice_nblock[orthogdim]; | ||||||
|  |   int e2=    grid->_slice_block [orthogdim]; | ||||||
|  |   int stride=grid->_slice_stride[orthogdim]; | ||||||
|  |  | ||||||
|   // sum over reduced dimension planes, breaking out orthog dir |   // sum over reduced dimension planes, breaking out orthog dir | ||||||
|  |   // Parallel over orthog direction | ||||||
|  |   parallel_for(int r=0;r<rd;r++){ | ||||||
|  |  | ||||||
|   for(int ss=0;ss<grid->oSites();ss++){ |     int so=r*grid->_ostride[orthogdim]; // base offset for start of plane  | ||||||
|     Lexicographic::CoorFromIndex(coor,ss,grid->_rdimensions); |  | ||||||
|     int r = coor[orthogdim]; |     for(int n=0;n<e1;n++){ | ||||||
|  |       for(int b=0;b<e2;b++){ | ||||||
|  | 	int ss= so+n*stride+b; | ||||||
| 	lvSum[r]=lvSum[r]+Data._odata[ss]; | 	lvSum[r]=lvSum[r]+Data._odata[ss]; | ||||||
|       } |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|   // Sum across simd lanes in the plane, breaking out orthog dir. |   // Sum across simd lanes in the plane, breaking out orthog dir. | ||||||
|   std::vector<int> icoor(Nd); |   std::vector<int> icoor(Nd); | ||||||
| @@ -214,10 +219,341 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|  |  | ||||||
|     result[t]=gsum; |     result[t]=gsum; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)  | ||||||
|  | { | ||||||
|  |   typedef typename vobj::vector_type   vector_type; | ||||||
|  |   typedef typename vobj::scalar_type   scalar_type; | ||||||
|  |   GridBase  *grid = lhs._grid; | ||||||
|  |   assert(grid!=NULL); | ||||||
|  |   conformable(grid,rhs._grid); | ||||||
|  |  | ||||||
|  |   const int    Nd = grid->_ndimension; | ||||||
|  |   const int Nsimd = grid->Nsimd(); | ||||||
|  |  | ||||||
|  |   assert(orthogdim >= 0); | ||||||
|  |   assert(orthogdim < Nd); | ||||||
|  |  | ||||||
|  |   int fd=grid->_fdimensions[orthogdim]; | ||||||
|  |   int ld=grid->_ldimensions[orthogdim]; | ||||||
|  |   int rd=grid->_rdimensions[orthogdim]; | ||||||
|  |  | ||||||
|  |   std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first | ||||||
|  |   std::vector<scalar_type > lsSum(ld,scalar_type(0.0));                    // sum across these down to scalars | ||||||
|  |   std::vector<iScalar<scalar_type> > extracted(Nsimd);                  // splitting the SIMD | ||||||
|  |  | ||||||
|  |   result.resize(fd); // And then global sum to return the same vector to every node for IO to file | ||||||
|  |   for(int r=0;r<rd;r++){ | ||||||
|  |     lvSum[r]=zero; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   int e1=    grid->_slice_nblock[orthogdim]; | ||||||
|  |   int e2=    grid->_slice_block [orthogdim]; | ||||||
|  |   int stride=grid->_slice_stride[orthogdim]; | ||||||
|  |  | ||||||
|  |   parallel_for(int r=0;r<rd;r++){ | ||||||
|  |  | ||||||
|  |     int so=r*grid->_ostride[orthogdim]; // base offset for start of plane  | ||||||
|  |  | ||||||
|  |     for(int n=0;n<e1;n++){ | ||||||
|  |       for(int b=0;b<e2;b++){ | ||||||
|  | 	int ss= so+n*stride+b; | ||||||
|  | 	vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss])); | ||||||
|  | 	lvSum[r]=lvSum[r]+vv; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Sum across simd lanes in the plane, breaking out orthog dir. | ||||||
|  |   std::vector<int> icoor(Nd); | ||||||
|  |   for(int rt=0;rt<rd;rt++){ | ||||||
|  |  | ||||||
|  |     iScalar<vector_type> temp;  | ||||||
|  |     temp._internal = lvSum[rt]; | ||||||
|  |     extract(temp,extracted); | ||||||
|  |  | ||||||
|  |     for(int idx=0;idx<Nsimd;idx++){ | ||||||
|  |  | ||||||
|  |       grid->iCoorFromIindex(icoor,idx); | ||||||
|  |  | ||||||
|  |       int ldx =rt+icoor[orthogdim]*rd; | ||||||
|  |  | ||||||
|  |       lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal; | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   // sum over nodes. | ||||||
|  |   scalar_type gsum; | ||||||
|  |   for(int t=0;t<fd;t++){ | ||||||
|  |     int pt = t/ld; // processor plane | ||||||
|  |     int lt = t%ld; | ||||||
|  |     if ( pt == grid->_processor_coor[orthogdim] ) { | ||||||
|  |       gsum=lsSum[lt]; | ||||||
|  |     } else { | ||||||
|  |       gsum=scalar_type(0.0); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     grid->GlobalSum(gsum); | ||||||
|  |  | ||||||
|  |     result[t]=gsum; | ||||||
|  |   } | ||||||
| } | } | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)  | ||||||
|  | { | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_type scalar_type; | ||||||
|  |   typedef typename vobj::vector_type vector_type; | ||||||
|  |    | ||||||
|  |   int Nblock = rhs._grid->GlobalDimensions()[Orthog]; | ||||||
|  |   std::vector<ComplexD> ip(Nblock); | ||||||
|  |   sn.resize(Nblock); | ||||||
|  |    | ||||||
|  |   sliceInnerProductVector(ip,rhs,rhs,Orthog); | ||||||
|  |   for(int ss=0;ss<Nblock;ss++){ | ||||||
|  |     sn[ss] = real(ip[ss]); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y, | ||||||
|  | 			    int orthogdim,RealD scale=1.0)  | ||||||
|  | {     | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_type scalar_type; | ||||||
|  |   typedef typename vobj::vector_type vector_type; | ||||||
|  |   typedef typename vobj::tensor_reduced tensor_reduced; | ||||||
|  |    | ||||||
|  |   scalar_type zscale(scale); | ||||||
|  |  | ||||||
|  |   GridBase *grid  = X._grid; | ||||||
|  |  | ||||||
|  |   int Nsimd  =grid->Nsimd(); | ||||||
|  |   int Nblock =grid->GlobalDimensions()[orthogdim]; | ||||||
|  |  | ||||||
|  |   int fd     =grid->_fdimensions[orthogdim]; | ||||||
|  |   int ld     =grid->_ldimensions[orthogdim]; | ||||||
|  |   int rd     =grid->_rdimensions[orthogdim]; | ||||||
|  |  | ||||||
|  |   int e1     =grid->_slice_nblock[orthogdim]; | ||||||
|  |   int e2     =grid->_slice_block [orthogdim]; | ||||||
|  |   int stride =grid->_slice_stride[orthogdim]; | ||||||
|  |  | ||||||
|  |   std::vector<int> icoor; | ||||||
|  |  | ||||||
|  |   for(int r=0;r<rd;r++){ | ||||||
|  |  | ||||||
|  |     int so=r*grid->_ostride[orthogdim]; // base offset for start of plane  | ||||||
|  |  | ||||||
|  |     vector_type    av; | ||||||
|  |  | ||||||
|  |     for(int l=0;l<Nsimd;l++){ | ||||||
|  |       grid->iCoorFromIindex(icoor,l); | ||||||
|  |       int ldx =r+icoor[orthogdim]*rd; | ||||||
|  |       scalar_type *as =(scalar_type *)&av; | ||||||
|  |       as[l] = scalar_type(a[ldx])*zscale; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     tensor_reduced at; at=av; | ||||||
|  |  | ||||||
|  |     parallel_for_nest2(int n=0;n<e1;n++){ | ||||||
|  |       for(int b=0;b<e2;b++){ | ||||||
|  | 	int ss= so+n*stride+b; | ||||||
|  | 	R._odata[ss] = at*X._odata[ss]+Y._odata[ss]; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) | ||||||
|  | { | ||||||
|  |   int NN    = BlockSolverGrid->_ndimension; | ||||||
|  |   int nsimd = BlockSolverGrid->Nsimd(); | ||||||
|  |    | ||||||
|  |   std::vector<int> latt_phys(0); | ||||||
|  |   std::vector<int> simd_phys(0); | ||||||
|  |   std::vector<int>  mpi_phys(0); | ||||||
|  |    | ||||||
|  |   for(int d=0;d<NN;d++){ | ||||||
|  |     if( d!=Orthog ) {  | ||||||
|  |       latt_phys.push_back(BlockSolverGrid->_fdimensions[d]); | ||||||
|  |       simd_phys.push_back(BlockSolverGrid->_simd_layout[d]); | ||||||
|  |       mpi_phys.push_back(BlockSolverGrid->_processors[d]); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)  | ||||||
|  | {     | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_type scalar_type; | ||||||
|  |   typedef typename vobj::vector_type vector_type; | ||||||
|  |  | ||||||
|  |   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||||
|  |  | ||||||
|  |   GridBase *FullGrid  = X._grid; | ||||||
|  |   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||||
|  |  | ||||||
|  |   Lattice<vobj> Xslice(SliceGrid); | ||||||
|  |   Lattice<vobj> Rslice(SliceGrid); | ||||||
|  |  | ||||||
|  |   assert( FullGrid->_simd_layout[Orthog]==1); | ||||||
|  |   int nh =  FullGrid->_ndimension; | ||||||
|  |   int nl = SliceGrid->_ndimension; | ||||||
|  |  | ||||||
|  |   //FIXME package in a convenient iterator | ||||||
|  |   //Should loop over a plane orthogonal to direction "Orthog" | ||||||
|  |   int stride=FullGrid->_slice_stride[Orthog]; | ||||||
|  |   int block =FullGrid->_slice_block [Orthog]; | ||||||
|  |   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||||
|  |   int ostride=FullGrid->_ostride[Orthog]; | ||||||
|  | #pragma omp parallel  | ||||||
|  |   { | ||||||
|  |     std::vector<vobj> s_x(Nblock); | ||||||
|  |  | ||||||
|  | #pragma omp for collapse(2) | ||||||
|  |     for(int n=0;n<nblock;n++){ | ||||||
|  |     for(int b=0;b<block;b++){ | ||||||
|  |       int o  = n*stride + b; | ||||||
|  |  | ||||||
|  |       for(int i=0;i<Nblock;i++){ | ||||||
|  | 	s_x[i] = X[o+i*ostride]; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       vobj dot; | ||||||
|  |       for(int i=0;i<Nblock;i++){ | ||||||
|  | 	dot = Y[o+i*ostride]; | ||||||
|  | 	for(int j=0;j<Nblock;j++){ | ||||||
|  | 	  dot = dot + s_x[j]*(scale*aa(j,i)); | ||||||
|  | 	} | ||||||
|  | 	R[o+i*ostride]=dot; | ||||||
|  |       } | ||||||
|  |     }} | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)  | ||||||
|  | {     | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_type scalar_type; | ||||||
|  |   typedef typename vobj::vector_type vector_type; | ||||||
|  |  | ||||||
|  |   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||||
|  |  | ||||||
|  |   GridBase *FullGrid  = X._grid; | ||||||
|  |   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||||
|  |  | ||||||
|  |   Lattice<vobj> Xslice(SliceGrid); | ||||||
|  |   Lattice<vobj> Rslice(SliceGrid); | ||||||
|  |  | ||||||
|  |   assert( FullGrid->_simd_layout[Orthog]==1); | ||||||
|  |   int nh =  FullGrid->_ndimension; | ||||||
|  |   int nl = SliceGrid->_ndimension; | ||||||
|  |  | ||||||
|  |   //FIXME package in a convenient iterator | ||||||
|  |   //Should loop over a plane orthogonal to direction "Orthog" | ||||||
|  |   int stride=FullGrid->_slice_stride[Orthog]; | ||||||
|  |   int block =FullGrid->_slice_block [Orthog]; | ||||||
|  |   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||||
|  |   int ostride=FullGrid->_ostride[Orthog]; | ||||||
|  | #pragma omp parallel  | ||||||
|  |   { | ||||||
|  |     std::vector<vobj> s_x(Nblock); | ||||||
|  |  | ||||||
|  | #pragma omp for collapse(2) | ||||||
|  |     for(int n=0;n<nblock;n++){ | ||||||
|  |     for(int b=0;b<block;b++){ | ||||||
|  |       int o  = n*stride + b; | ||||||
|  |  | ||||||
|  |       for(int i=0;i<Nblock;i++){ | ||||||
|  | 	s_x[i] = X[o+i*ostride]; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       vobj dot; | ||||||
|  |       for(int i=0;i<Nblock;i++){ | ||||||
|  | 	dot = s_x[0]*(scale*aa(0,i)); | ||||||
|  | 	for(int j=1;j<Nblock;j++){ | ||||||
|  | 	  dot = dot + s_x[j]*(scale*aa(j,i)); | ||||||
|  | 	} | ||||||
|  | 	R[o+i*ostride]=dot; | ||||||
|  |       } | ||||||
|  |     }} | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | static void sliceInnerProductMatrix(  Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | ||||||
|  | { | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_type scalar_type; | ||||||
|  |   typedef typename vobj::vector_type vector_type; | ||||||
|  |    | ||||||
|  |   GridBase *FullGrid  = lhs._grid; | ||||||
|  |   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||||
|  |    | ||||||
|  |   int Nblock = FullGrid->GlobalDimensions()[Orthog]; | ||||||
|  |    | ||||||
|  |   Lattice<vobj> Lslice(SliceGrid); | ||||||
|  |   Lattice<vobj> Rslice(SliceGrid); | ||||||
|  |    | ||||||
|  |   mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   assert( FullGrid->_simd_layout[Orthog]==1); | ||||||
|  |   int nh =  FullGrid->_ndimension; | ||||||
|  |   int nl = SliceGrid->_ndimension; | ||||||
|  |  | ||||||
|  |   //FIXME package in a convenient iterator | ||||||
|  |   //Should loop over a plane orthogonal to direction "Orthog" | ||||||
|  |   int stride=FullGrid->_slice_stride[Orthog]; | ||||||
|  |   int block =FullGrid->_slice_block [Orthog]; | ||||||
|  |   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||||
|  |   int ostride=FullGrid->_ostride[Orthog]; | ||||||
|  |  | ||||||
|  |   typedef typename vobj::vector_typeD vector_typeD; | ||||||
|  |  | ||||||
|  | #pragma omp parallel  | ||||||
|  |   { | ||||||
|  |     std::vector<vobj> Left(Nblock); | ||||||
|  |     std::vector<vobj> Right(Nblock); | ||||||
|  |     Eigen::MatrixXcd  mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  | #pragma omp for collapse(2) | ||||||
|  |     for(int n=0;n<nblock;n++){ | ||||||
|  |     for(int b=0;b<block;b++){ | ||||||
|  |  | ||||||
|  |       int o  = n*stride + b; | ||||||
|  |  | ||||||
|  |       for(int i=0;i<Nblock;i++){ | ||||||
|  | 	Left [i] = lhs[o+i*ostride]; | ||||||
|  | 	Right[i] = rhs[o+i*ostride]; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       for(int i=0;i<Nblock;i++){ | ||||||
|  |       for(int j=0;j<Nblock;j++){ | ||||||
|  | 	auto tmp = innerProduct(Left[i],Right[j]); | ||||||
|  | 	vector_typeD rtmp = TensorRemove(tmp); | ||||||
|  | 	mat_thread(i,j) += Reduce(rtmp); | ||||||
|  |       }} | ||||||
|  |     }} | ||||||
|  | #pragma omp critical | ||||||
|  |     { | ||||||
|  |       mat += mat_thread; | ||||||
|  |     }   | ||||||
|  |   } | ||||||
|  |   return; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } /*END NAMESPACE GRID*/ | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -6,8 +6,8 @@ | |||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |     Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -30,12 +30,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #define GRID_LATTICE_RNG_H | #define GRID_LATTICE_RNG_H | ||||||
|  |  | ||||||
| #include <random> | #include <random> | ||||||
|  |  | ||||||
|  | #ifdef RNG_SITMO | ||||||
| #include <Grid/sitmo_rng/sitmo_prng_engine.hpp> | #include <Grid/sitmo_rng/sitmo_prng_engine.hpp> | ||||||
|  | #endif  | ||||||
|  |  | ||||||
|  | #if defined(RNG_SITMO) | ||||||
|  | #define RNG_FAST_DISCARD | ||||||
|  | #else  | ||||||
|  | #undef  RNG_FAST_DISCARD | ||||||
|  | #endif | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
|   //http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf ? |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////// | ||||||
|   // Allow the RNG state to be less dense than the fine grid |   // Allow the RNG state to be less dense than the fine grid | ||||||
|   ////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////// | ||||||
| @@ -65,120 +72,188 @@ namespace Grid { | |||||||
|  |  | ||||||
|       multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];  |       multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return multiplicity; |     return multiplicity; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |    | ||||||
|  | // merge of April 11 2017 | ||||||
|  | //<<<<<<< HEAD | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   // this function is necessary for the LS vectorised field | ||||||
|  |   inline int RNGfillable_general(GridBase *coarse,GridBase *fine) | ||||||
|  |   { | ||||||
|  |     int rngdims = coarse->_ndimension; | ||||||
|  |      | ||||||
|  |     // trivially extended in higher dims, with locality guaranteeing RNG state is local to node | ||||||
|  |     int lowerdims   = fine->_ndimension - coarse->_ndimension;  assert(lowerdims >= 0); | ||||||
|  |     // assumes that the higher dimensions are not using more processors | ||||||
|  |     // all further divisions are local | ||||||
|  |     for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1); | ||||||
|  |     for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]); | ||||||
|  |      | ||||||
|  |  | ||||||
|  |     // then divide the number of local sites | ||||||
|  |     // check that the total number of sims agree, meanse the iSites are the same | ||||||
|  |     assert(fine->Nsimd() == coarse->Nsimd()); | ||||||
|  |  | ||||||
|  |     // check that the two grids divide cleanly | ||||||
|  |     assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() ); | ||||||
|  |  | ||||||
|  |     return fine->lSites() / coarse->lSites(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /* | ||||||
|   // Wrap seed_seq to give common interface with random_device |   // Wrap seed_seq to give common interface with random_device | ||||||
|   // Should rather wrap random_device and have a generate |  | ||||||
|   class fixedSeed { |   class fixedSeed { | ||||||
|   public: |   public: | ||||||
|  |  | ||||||
|     typedef std::seed_seq::result_type result_type; |     typedef std::seed_seq::result_type result_type; | ||||||
|  |  | ||||||
|     std::seed_seq src; |     std::seed_seq src; | ||||||
|      |      | ||||||
|     template<class int_type> fixedSeed(const std::vector<int_type> &seeds) : src(seeds.begin(),seeds.end()) {}; |     fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {}; | ||||||
|  |  | ||||||
|     template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { |     result_type operator () (void){ | ||||||
|       src.generate(begin,end); |       std::vector<result_type> list(1); | ||||||
|  |       src.generate(list.begin(),list.end()); | ||||||
|  |       return list[0]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  | ======= | ||||||
|   class deviceSeed { | >>>>>>> develop | ||||||
|   public: |   */ | ||||||
|  |  | ||||||
|     std::random_device rd; |  | ||||||
|  |  | ||||||
|     typedef std::random_device::result_type result_type; |  | ||||||
|      |  | ||||||
|     deviceSeed(void) : rd(){}; |  | ||||||
|  |  | ||||||
|     template< class RandomIt > void generate( RandomIt begin, RandomIt end ) { |  | ||||||
|       for(RandomIt it=begin; it!=end;it++){ |  | ||||||
| 	*it = rd(); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|    |    | ||||||
|   // real scalars are one component |   // real scalars are one component | ||||||
|   template<class scalar,class distribution,class generator> void fillScalar(scalar &s,distribution &dist,generator & gen) |   template<class scalar,class distribution,class generator>  | ||||||
|  |   void fillScalar(scalar &s,distribution &dist,generator & gen) | ||||||
|   { |   { | ||||||
|     s=dist(gen); |     s=dist(gen); | ||||||
|   } |   } | ||||||
|   template<class distribution,class generator> void fillScalar(ComplexF &s,distribution &dist, generator &gen) |   template<class distribution,class generator>  | ||||||
|  |   void fillScalar(ComplexF &s,distribution &dist, generator &gen) | ||||||
|   { |   { | ||||||
|     s=ComplexF(dist(gen),dist(gen)); |     s=ComplexF(dist(gen),dist(gen)); | ||||||
|   } |   } | ||||||
|   template<class distribution,class generator> void fillScalar(ComplexD &s,distribution &dist,generator &gen) |   template<class distribution,class generator>  | ||||||
|  |   void fillScalar(ComplexD &s,distribution &dist,generator &gen) | ||||||
|   { |   { | ||||||
|     s=ComplexD(dist(gen),dist(gen)); |     s=ComplexD(dist(gen),dist(gen)); | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   class GridRNGbase { |   class GridRNGbase { | ||||||
|  |  | ||||||
|   public: |   public: | ||||||
|  |  | ||||||
|     int _seeded; |  | ||||||
|     // One generator per site. |     // One generator per site. | ||||||
|     // Uniform and Gaussian distributions from these generators. |     // Uniform and Gaussian distributions from these generators. | ||||||
| #ifdef RNG_RANLUX | #ifdef RNG_RANLUX | ||||||
|     typedef uint64_t      RngStateType; |  | ||||||
|     typedef std::ranlux48 RngEngine; |     typedef std::ranlux48 RngEngine; | ||||||
|  |     typedef uint64_t      RngStateType; | ||||||
|     static const int RngStateCount = 15; |     static const int RngStateCount = 15; | ||||||
| #elif RNG_MT19937  | #endif  | ||||||
|  | #ifdef RNG_MT19937  | ||||||
|     typedef std::mt19937 RngEngine; |     typedef std::mt19937 RngEngine; | ||||||
|     typedef uint32_t     RngStateType; |     typedef uint32_t     RngStateType; | ||||||
|     static const int     RngStateCount = std::mt19937::state_size; |     static const int     RngStateCount = std::mt19937::state_size; | ||||||
| #elif RNG_SITMO | #endif | ||||||
|  | #ifdef RNG_SITMO | ||||||
|     typedef sitmo::prng_engine 	RngEngine; |     typedef sitmo::prng_engine 	RngEngine; | ||||||
|     typedef uint64_t    	RngStateType; |     typedef uint64_t    	RngStateType; | ||||||
|     static const int    	RngStateCount = 4; |     static const int    	RngStateCount = 13; | ||||||
| #endif | #endif | ||||||
|     std::vector<RngEngine>                             _generators; |  | ||||||
|     std::vector<std::uniform_real_distribution<RealD>> _uniform; |  | ||||||
|     std::vector<std::normal_distribution<RealD>>       _gaussian; |  | ||||||
|     std::vector<std::discrete_distribution<int32_t>>   _bernoulli; |  | ||||||
|  |  | ||||||
|     void GetState(std::vector<RngStateType> & saved,int gen) { |     std::vector<RngEngine>                             _generators; | ||||||
|  |     std::vector<std::uniform_real_distribution<RealD> > _uniform; | ||||||
|  |     std::vector<std::normal_distribution<RealD> >       _gaussian; | ||||||
|  |     std::vector<std::discrete_distribution<int32_t> >   _bernoulli; | ||||||
|  |     std::vector<std::uniform_int_distribution<uint32_t> > _uid; | ||||||
|  |  | ||||||
|  |     /////////////////////// | ||||||
|  |     // support for parallel init | ||||||
|  |     /////////////////////// | ||||||
|  | #ifdef RNG_FAST_DISCARD | ||||||
|  |     static void Skip(RngEngine &eng) | ||||||
|  |     { | ||||||
|  |       ///////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       // Skip by 2^40 elements between successive lattice sites | ||||||
|  |       // This goes by 10^12. | ||||||
|  |       // Consider quenched updating; likely never exceeding rate of 1000 sweeps | ||||||
|  |       // per second on any machine. This gives us of order 10^9 seconds, or 100 years | ||||||
|  |       // skip ahead. | ||||||
|  |       // For HMC unlikely to go at faster than a solve per second, and  | ||||||
|  |       // tens of seconds per trajectory so this is clean in all reasonable cases, | ||||||
|  |       // and margin of safety is orders of magnitude. | ||||||
|  |       // We could hack Sitmo to skip in the higher order words of state if necessary | ||||||
|  |       ///////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       uint64_t skip = 0x1; skip = skip<<40; | ||||||
|  |       eng.discard(skip); | ||||||
|  |     }  | ||||||
|  | #endif | ||||||
|  |     static RngEngine Reseed(RngEngine &eng) | ||||||
|  |     { | ||||||
|  |       std::vector<uint32_t> newseed; | ||||||
|  |       std::uniform_int_distribution<uint32_t> uid; | ||||||
|  |       return Reseed(eng,newseed,uid); | ||||||
|  |     } | ||||||
|  |     static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed, | ||||||
|  | 			    std::uniform_int_distribution<uint32_t> &uid) | ||||||
|  |     { | ||||||
|  |       const int reseeds=4; | ||||||
|  |        | ||||||
|  |       newseed.resize(reseeds); | ||||||
|  |       for(int i=0;i<reseeds;i++){ | ||||||
|  | 	newseed[i] = uid(eng); | ||||||
|  |       } | ||||||
|  |       std::seed_seq sseq(newseed.begin(),newseed.end()); | ||||||
|  |       return RngEngine(sseq); | ||||||
|  |     }     | ||||||
|  |  | ||||||
|  |     void GetState(std::vector<RngStateType> & saved,RngEngine &eng) { | ||||||
|       saved.resize(RngStateCount); |       saved.resize(RngStateCount); | ||||||
|       std::stringstream ss; |       std::stringstream ss; | ||||||
|       ss<<_generators[gen]; |       ss<<eng; | ||||||
|       ss.seekg(0,ss.beg); |       ss.seekg(0,ss.beg); | ||||||
|       for(int i=0;i<RngStateCount;i++){ |       for(int i=0;i<RngStateCount;i++){ | ||||||
|         ss>>saved[i]; |         ss>>saved[i]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     void SetState(std::vector<RngStateType> & saved,int gen){ |     void GetState(std::vector<RngStateType> & saved,int gen) { | ||||||
|  |       GetState(saved,_generators[gen]); | ||||||
|  |     } | ||||||
|  |     void SetState(std::vector<RngStateType> & saved,RngEngine &eng){ | ||||||
|       assert(saved.size()==RngStateCount); |       assert(saved.size()==RngStateCount); | ||||||
|       std::stringstream ss; |       std::stringstream ss; | ||||||
|       for(int i=0;i<RngStateCount;i++){ |       for(int i=0;i<RngStateCount;i++){ | ||||||
|         ss<< saved[i]<<" "; |         ss<< saved[i]<<" "; | ||||||
|       } |       } | ||||||
|       ss.seekg(0,ss.beg); |       ss.seekg(0,ss.beg); | ||||||
|       ss>>_generators[gen]; |       ss>>eng; | ||||||
|  |     } | ||||||
|  |     void SetState(std::vector<RngStateType> & saved,int gen){ | ||||||
|  |       SetState(saved,_generators[gen]); | ||||||
|  |     } | ||||||
|  |     void SetEngine(RngEngine &Eng, int gen){ | ||||||
|  |       _generators[gen]=Eng; | ||||||
|  |     } | ||||||
|  |     void GetEngine(RngEngine &Eng, int gen){ | ||||||
|  |       Eng=_generators[gen]; | ||||||
|  |     } | ||||||
|  |     template<class source> void Seed(source &src, int gen) | ||||||
|  |     { | ||||||
|  |       _generators[gen] = RngEngine(src); | ||||||
|     }     |     }     | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   class GridSerialRNG : public GridRNGbase { |   class GridSerialRNG : public GridRNGbase { | ||||||
|   public: |   public: | ||||||
|  |  | ||||||
|     // FIXME ... do we require lockstep draws of randoms  |  | ||||||
|     // from all nodes keeping seeds consistent. |  | ||||||
|     // place a barrier/broadcast in the fill routine |  | ||||||
|  |  | ||||||
|     GridSerialRNG() : GridRNGbase() { |     GridSerialRNG() : GridRNGbase() { | ||||||
|       _generators.resize(1); |       _generators.resize(1); | ||||||
|       _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1}); |       _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1}); | ||||||
|       _gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) ); |       _gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) ); | ||||||
|       _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1}); |       _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1}); | ||||||
|       _seeded=0; |       _uid.resize(1,std::uniform_int_distribution<uint32_t>() ); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){ |     template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){ | ||||||
|  |  | ||||||
|       typedef typename sobj::scalar_type scalar_type; |       typedef typename sobj::scalar_type scalar_type; | ||||||
| @@ -250,183 +325,184 @@ namespace Grid { | |||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); |       CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); | ||||||
|     } |     } | ||||||
|      |      | ||||||
|     template<class source> void Seed(source &src) |  | ||||||
|     { |  | ||||||
|       _generators[0] = RngEngine(src); |  | ||||||
|       _seeded=1; |  | ||||||
|     }     |  | ||||||
|     void SeedRandomDevice(void){ |  | ||||||
|       deviceSeed src; |  | ||||||
|       Seed(src); |  | ||||||
|     } |  | ||||||
|     void SeedFixedIntegers(const std::vector<int> &seeds){ |     void SeedFixedIntegers(const std::vector<int> &seeds){ | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); |       CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); | ||||||
|       fixedSeed src(seeds); |       std::seed_seq src(seeds.begin(),seeds.end()); | ||||||
|       Seed(src); |       Seed(src,0); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   class GridParallelRNG : public GridRNGbase { |   class GridParallelRNG : public GridRNGbase { | ||||||
|  |  | ||||||
|  |     double _time_counter; | ||||||
|  |  | ||||||
|   public: |   public: | ||||||
|  |  | ||||||
|     GridBase *_grid; |     GridBase *_grid; | ||||||
|     int _vol; |     unsigned int _vol; | ||||||
|  |  | ||||||
|     int generator_idx(int os,int is){ |     int generator_idx(int os,int is) { | ||||||
|       return is*_grid->oSites()+os; |       return is*_grid->oSites()+os; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     GridParallelRNG(GridBase *grid) : GridRNGbase() { |     GridParallelRNG(GridBase *grid) : GridRNGbase() { | ||||||
|       _grid=grid; |       _grid = grid; | ||||||
|       _vol  =_grid->iSites()*_grid->oSites(); |       _vol  =_grid->iSites()*_grid->oSites(); | ||||||
|  |  | ||||||
|       _generators.resize(_vol); |       _generators.resize(_vol); | ||||||
|       _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); |       _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); | ||||||
|       _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) ); |       _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) ); | ||||||
|       _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); |       _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); | ||||||
|       _seeded=0; |       _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     //FIXME implement generic IO and create state save/restore |  | ||||||
|     //void SaveState(const std::string<char> &file); |  | ||||||
|     //void LoadState(const std::string<char> &file); |  | ||||||
|  |  | ||||||
|     template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){ |     template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){ | ||||||
|  |  | ||||||
|       typedef typename vobj::scalar_object scalar_object; |       typedef typename vobj::scalar_object scalar_object; | ||||||
|       typedef typename vobj::scalar_type scalar_type; |       typedef typename vobj::scalar_type scalar_type; | ||||||
|       typedef typename vobj::vector_type vector_type; |       typedef typename vobj::vector_type vector_type; | ||||||
|  |  | ||||||
|       int multiplicity = RNGfillable(_grid,l._grid); |       double inner_time_counter = usecond(); | ||||||
|  |  | ||||||
|       int     Nsimd =_grid->Nsimd(); |  | ||||||
|       int     osites=_grid->oSites(); |  | ||||||
|       int words=sizeof(scalar_object)/sizeof(scalar_type); |  | ||||||
|  |  | ||||||
|  |       int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid | ||||||
|  |       int Nsimd  = _grid->Nsimd();  // guaranteed to be the same for l._grid too | ||||||
|  |       int osites = _grid->oSites();  // guaranteed to be <= l._grid->oSites() by a factor multiplicity | ||||||
|  |       int words  = sizeof(scalar_object) / sizeof(scalar_type); | ||||||
|  |  | ||||||
|       parallel_for(int ss=0;ss<osites;ss++){ |       parallel_for(int ss=0;ss<osites;ss++){ | ||||||
|  |  | ||||||
|         std::vector<scalar_object> buf(Nsimd); |         std::vector<scalar_object> buf(Nsimd); | ||||||
| 	for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times |         for (int m = 0; m < multiplicity; m++) {  // Draw from same generator multiplicity times | ||||||
|  |  | ||||||
| 	  int sm=multiplicity*ss+m;      // Maps the generator site to the fine site |           int sm = multiplicity * ss + m;  // Maps the generator site to the fine site | ||||||
|  |  | ||||||
| 	  for(int si=0;si<Nsimd;si++){ |           for (int si = 0; si < Nsimd; si++) { | ||||||
| 	    int gdx = generator_idx(ss,si); // index of generator state |              | ||||||
|  |             int gdx = generator_idx(ss, si);  // index of generator state | ||||||
|             scalar_type *pointer = (scalar_type *)&buf[si]; |             scalar_type *pointer = (scalar_type *)&buf[si]; | ||||||
|             dist[gdx].reset(); |             dist[gdx].reset(); | ||||||
| 	    for(int idx=0;idx<words;idx++){ |             for (int idx = 0; idx < words; idx++)  | ||||||
| 	      fillScalar(pointer[idx],dist[gdx],_generators[gdx]); |               fillScalar(pointer[idx], dist[gdx], _generators[gdx]); | ||||||
|  |           } | ||||||
|  |           // merge into SIMD lanes, FIXME suboptimal implementation | ||||||
|  |           merge(l._odata[sm], buf); | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|  |  | ||||||
| 	  // merge into SIMD lanes |       _time_counter += usecond()- inner_time_counter; | ||||||
| 	  merge(l._odata[sm],buf); |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // This loop could be made faster to avoid the Ahmdahl by |     void SeedFixedIntegers(const std::vector<int> &seeds){ | ||||||
|     // i)  seed generators on each timeslice, for x=y=z=0; |  | ||||||
|     // ii) seed generators on each z for x=y=0 |  | ||||||
|     // iii)seed generators on each y,z for x=0 |  | ||||||
|     // iv) seed generators on each y,z,x  |  | ||||||
|     // made possible by physical indexing. |  | ||||||
|     template<class source> void Seed(source &src) |  | ||||||
|     { |  | ||||||
|  |  | ||||||
|       typedef typename source::result_type seed_t; |       // Everyone generates the same seed_seq based on input seeds | ||||||
|       std::uniform_int_distribution<seed_t> uid; |       CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); | ||||||
|  |  | ||||||
|       int numseed=4; |       std::seed_seq source(seeds.begin(),seeds.end()); | ||||||
|       int gsites = _grid->_gsites; |  | ||||||
|       std::vector<seed_t> site_init(numseed); |       RngEngine master_engine(source); | ||||||
|  |  | ||||||
|  | #ifdef RNG_FAST_DISCARD | ||||||
|  |       //////////////////////////////////////////////// | ||||||
|  |       // Skip ahead through a single stream. | ||||||
|  |       // Applicable to SITMO and other has based/crypto RNGs | ||||||
|  |       // Should be applicable to Mersenne Twister, but the C++11 | ||||||
|  |       // MT implementation does not implement fast discard even though | ||||||
|  |       // in principle this is possible | ||||||
|  |       //////////////////////////////////////////////// | ||||||
|       std::vector<int> gcoor; |       std::vector<int> gcoor; | ||||||
|  |  | ||||||
|  |  | ||||||
|       // Master RngEngine |  | ||||||
|       std::vector<seed_t> master_init(numseed);  src.generate(master_init.begin(),master_init.end()); |  | ||||||
|       _grid->Broadcast(0,(void *)&master_init[0],sizeof(seed_t)*numseed); |  | ||||||
|       fixedSeed master_seed(master_init); |  | ||||||
|       RngEngine master_engine(master_seed); |  | ||||||
|  |  | ||||||
|       // Per node RngEngine |  | ||||||
|       std::vector<seed_t> node_init(numseed); |  | ||||||
|       for(int r=0;r<_grid->ProcessorCount();r++) { |  | ||||||
|  |  | ||||||
| 	std::vector<seed_t> rank_init(numseed); |  | ||||||
| 	for(int i=0;i<numseed;i++) rank_init[i] = uid(master_engine); |  | ||||||
|  |  | ||||||
| 	std::cout << GridLogMessage << "SeedSeq for rank "<<r; |  | ||||||
| 	for(int i=0;i<numseed;i++) std::cout<<" "<<rank_init[i]; |  | ||||||
| 	std::cout <<std::endl; |  | ||||||
|  |  | ||||||
| 	if ( r==_grid->ThisRank() ) {  |  | ||||||
| 	  for(int i=0;i<numseed;i++) node_init[i] = rank_init[i]; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       //////////////////////////////////////////////////// |  | ||||||
|       // Set up a seed_seq wrapper with these 8 words |  | ||||||
|       // and draw for each site within node. |  | ||||||
|       //////////////////////////////////////////////////// |  | ||||||
|       fixedSeed node_seed(node_init); |  | ||||||
|       RngEngine node_engine(node_seed); |  | ||||||
|  |  | ||||||
|       for(int gidx=0;gidx<gsites;gidx++){ |  | ||||||
|       int rank,o_idx,i_idx; |       int rank,o_idx,i_idx; | ||||||
|  |  | ||||||
|  |       // Everybody loops over global volume. | ||||||
|  |       for(int gidx=0;gidx<_grid->_gsites;gidx++){ | ||||||
|  |  | ||||||
|  | 	Skip(master_engine); // Skip to next RNG sequence | ||||||
|  |  | ||||||
|  | 	// Where is it? | ||||||
| 	_grid->GlobalIndexToGlobalCoor(gidx,gcoor); | 	_grid->GlobalIndexToGlobalCoor(gidx,gcoor); | ||||||
| 	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); | 	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); | ||||||
|  |  | ||||||
|  | 	// If this is one of mine we take it | ||||||
| 	if( rank == _grid->ThisRank() ){ | 	if( rank == _grid->ThisRank() ){ | ||||||
| 	  int l_idx=generator_idx(o_idx,i_idx); | 	  int l_idx=generator_idx(o_idx,i_idx); | ||||||
| 	  for(int i=0;i<numseed;i++)  site_init[i] = uid(node_engine); | 	  _generators[l_idx] = master_engine; | ||||||
| 	  fixedSeed site_seed(site_init); | 	} | ||||||
| 	  _generators[l_idx] = RngEngine(site_seed); |  | ||||||
|  |       } | ||||||
|  | #else  | ||||||
|  |       //////////////////////////////////////////////////////////////// | ||||||
|  |       // Machine and thread decomposition dependent seeding is efficient | ||||||
|  |       // and maximally parallel; but NOT reproducible from machine to machine.  | ||||||
|  |       // Not ideal, but fastest way to reseed all nodes. | ||||||
|  |       //////////////////////////////////////////////////////////////// | ||||||
|  |       { | ||||||
|  | 	// Obtain one Reseed per processor | ||||||
|  | 	int Nproc = _grid->ProcessorCount(); | ||||||
|  | 	std::vector<RngEngine> seeders(Nproc); | ||||||
|  | 	int me= _grid->ThisRank(); | ||||||
|  | 	for(int p=0;p<Nproc;p++){ | ||||||
|  | 	  seeders[p] = Reseed(master_engine); | ||||||
|  | 	} | ||||||
|  | 	master_engine = seeders[me]; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       { | ||||||
|  | 	// Obtain one reseeded generator per thread | ||||||
|  | 	int Nthread = GridThread::GetThreads(); | ||||||
|  | 	std::vector<RngEngine> seeders(Nthread); | ||||||
|  | 	for(int t=0;t<Nthread;t++){ | ||||||
|  | 	  seeders[t] = Reseed(master_engine); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	parallel_for(int t=0;t<Nthread;t++) { | ||||||
|  | 	  // set up one per local site in threaded fashion | ||||||
|  | 	  std::vector<uint32_t> newseeds; | ||||||
|  | 	  std::uniform_int_distribution<uint32_t> uid;	 | ||||||
|  | 	  for(int l=0;l<_grid->lSites();l++) { | ||||||
|  | 	    if ( (l%Nthread)==t ) { | ||||||
|  | 	      _generators[l] = Reseed(seeders[t],newseeds,uid); | ||||||
| 	    } | 	    } | ||||||
| 	  } | 	  } | ||||||
|       _seeded=1; |  | ||||||
| 	} | 	} | ||||||
|     void SeedRandomDevice(void){ |  | ||||||
|       deviceSeed src; |  | ||||||
|       Seed(src); |  | ||||||
|       } |       } | ||||||
|     void SeedFixedIntegers(const std::vector<int> &seeds){ | #endif | ||||||
|       CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); |     } | ||||||
|       fixedSeed src(seeds); |  | ||||||
|       Seed(src); |     void Report(){ | ||||||
|  |       std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Support for rigorous test of RNG's | ||||||
|  |     // Return uniform random uint32_t from requested site generator | ||||||
|  |     //////////////////////////////////////////////////////////////////////// | ||||||
|  |     uint32_t GlobalU01(int gsite){ | ||||||
|  |  | ||||||
|  |       uint32_t the_number; | ||||||
|  |       // who | ||||||
|  |       std::vector<int> gcoor; | ||||||
|  |       int rank,o_idx,i_idx; | ||||||
|  |       _grid->GlobalIndexToGlobalCoor(gsite,gcoor); | ||||||
|  |       _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); | ||||||
|  |  | ||||||
|  |       // draw | ||||||
|  |       int l_idx=generator_idx(o_idx,i_idx); | ||||||
|  |       if( rank == _grid->ThisRank() ){ | ||||||
|  | 	the_number = _uid[l_idx](_generators[l_idx]); | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       // share & return | ||||||
|  |       _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); | ||||||
|  |       return the_number; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){ |   template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l)   { rng.fill(l,rng._uniform);  } | ||||||
|     rng.fill(l,rng._uniform); |   template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); } | ||||||
|   } |   template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);} | ||||||
|  |  | ||||||
|   template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l){ |   template <class sobj> inline void random(GridSerialRNG &rng,sobj &l)   { rng.fill(l,rng._uniform  ); } | ||||||
|     rng.fill(l,rng._gaussian); |   template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); } | ||||||
|   } |   template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); } | ||||||
|    |  | ||||||
|   template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ |  | ||||||
|     rng.fill(l,rng._bernoulli); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   template <class sobj> inline void random(GridSerialRNG &rng,sobj &l){ |  | ||||||
|     rng.fill(l,rng._uniform); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l){ |  | ||||||
|     rng.fill(l,rng._gaussian); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ |  | ||||||
|     rng.fill(l,rng._bernoulli); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
|     /************************************************************************************* | /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
| @@ -359,7 +359,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out) | |||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog) | void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
| @@ -401,7 +401,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int | |||||||
| } | } | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, int orthog) | void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice, int orthog) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
| @@ -444,7 +444,7 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in | |||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
| @@ -551,7 +551,10 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | |||||||
|  |  | ||||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||||
| template<typename vobj, typename sobj> | template<typename vobj, typename sobj> | ||||||
| typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){ | typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type  | ||||||
|  | unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in) | ||||||
|  | { | ||||||
|  |  | ||||||
|   typedef typename vobj::vector_type vtype; |   typedef typename vobj::vector_type vtype; | ||||||
|    |    | ||||||
|   GridBase* in_grid = in._grid; |   GridBase* in_grid = in._grid; | ||||||
| @@ -590,6 +593,54 @@ typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj> | |||||||
|     extract1(in_vobj, out_ptrs, 0); |     extract1(in_vobj, out_ptrs, 0); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  | //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||||
|  | template<typename vobj, typename sobj> | ||||||
|  | typename std::enable_if<isSIMDvectorized<vobj>::value  | ||||||
|  |                     && !isSIMDvectorized<sobj>::value, void>::type  | ||||||
|  | vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out) | ||||||
|  | { | ||||||
|  |  | ||||||
|  |   typedef typename vobj::vector_type vtype; | ||||||
|  |    | ||||||
|  |   GridBase* grid = out._grid; | ||||||
|  |   assert(in.size()==grid->lSites()); | ||||||
|  |    | ||||||
|  |   int ndim     = grid->Nd(); | ||||||
|  |   int nsimd    = vtype::Nsimd(); | ||||||
|  |  | ||||||
|  |   std::vector<std::vector<int> > icoor(nsimd); | ||||||
|  |        | ||||||
|  |   for(int lane=0; lane < nsimd; lane++){ | ||||||
|  |     icoor[lane].resize(ndim); | ||||||
|  |     grid->iCoorFromIindex(icoor[lane],lane); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index | ||||||
|  |     //Assemble vector of pointers to output elements | ||||||
|  |     std::vector<sobj*> ptrs(nsimd); | ||||||
|  |  | ||||||
|  |     std::vector<int> ocoor(ndim); | ||||||
|  |     grid->oCoorFromOindex(ocoor, oidx); | ||||||
|  |  | ||||||
|  |     std::vector<int> lcoor(grid->Nd()); | ||||||
|  |        | ||||||
|  |     for(int lane=0; lane < nsimd; lane++){ | ||||||
|  |  | ||||||
|  |       for(int mu=0;mu<ndim;mu++){ | ||||||
|  | 	lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu]; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       int lex; | ||||||
|  |       Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions); | ||||||
|  |       ptrs[lane] = &in[lex]; | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     //pack from those ptrs | ||||||
|  |     vobj vecobj; | ||||||
|  |     merge1(vecobj, ptrs, 0); | ||||||
|  |     out._odata[oidx] = vecobj;  | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| //Convert a Lattice from one precision to another | //Convert a Lattice from one precision to another | ||||||
| template<class VobjOut, class VobjIn> | template<class VobjOut, class VobjIn> | ||||||
| @@ -615,7 +666,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | |||||||
|   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); |   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); | ||||||
|   unvectorizeToLexOrdArray(in_slex_conv, in); |   unvectorizeToLexOrdArray(in_slex_conv, in); | ||||||
|      |      | ||||||
|   parallel_for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ |   parallel_for(uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ | ||||||
|     std::vector<int> out_ocoor(ndim); |     std::vector<int> out_ocoor(ndim); | ||||||
|     out_grid->oCoorFromOindex(out_ocoor, out_oidx); |     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -62,14 +62,20 @@ namespace Grid { | |||||||
|     return ret; |     return ret; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ |   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ | ||||||
|     Lattice<obj> ret(rhs._grid); |     Lattice<obj> ret(rhs._grid); | ||||||
|     ret.checkerboard = rhs.checkerboard; |     ret.checkerboard = rhs.checkerboard; | ||||||
|     conformable(ret,rhs); |     conformable(ret,rhs); | ||||||
|     parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ |     parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ | ||||||
|       ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); |       ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return ret; |     return ret; | ||||||
|  |  | ||||||
|  |      | ||||||
|  |      | ||||||
|  |  | ||||||
|  |      | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -30,6 +30,7 @@ directory | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
|  | #include <Grid/util/CompilerCompatible.h> | ||||||
|  |  | ||||||
| #include <cxxabi.h> | #include <cxxabi.h> | ||||||
| #include <memory> | #include <memory> | ||||||
|   | |||||||
| @@ -110,8 +110,8 @@ public: | |||||||
|   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ |   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ | ||||||
|  |  | ||||||
|     if ( log.active ) { |     if ( log.active ) { | ||||||
|       stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : "; |       stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : "; | ||||||
|       stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : "; |       stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : "; | ||||||
|       if ( log.timestamp ) { |       if ( log.timestamp ) { | ||||||
| 	StopWatch.Stop(); | 	StopWatch.Stop(); | ||||||
| 	GridTime now = StopWatch.Elapsed(); | 	GridTime now = StopWatch.Elapsed(); | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										716
									
								
								lib/parallelIO/IldgIO.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										716
									
								
								lib/parallelIO/IldgIO.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,716 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/parallelIO/IldgIO.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef GRID_ILDG_IO_H | ||||||
|  | #define GRID_ILDG_IO_H | ||||||
|  |  | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  | #include <algorithm> | ||||||
|  | #include <fstream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <iostream> | ||||||
|  | #include <map> | ||||||
|  |  | ||||||
|  | #include <pwd.h> | ||||||
|  | #include <sys/utsname.h> | ||||||
|  | #include <unistd.h> | ||||||
|  |  | ||||||
|  | //C-Lime is a must have for this functionality | ||||||
|  | extern "C" {   | ||||||
|  | #include "lime.h" | ||||||
|  | } | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  | namespace QCD { | ||||||
|  |  | ||||||
|  |   ///////////////////////////////// | ||||||
|  |   // Encode word types as strings | ||||||
|  |   ///////////////////////////////// | ||||||
|  |  template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); } | ||||||
|  |  template<> inline std::string ScidacWordMnemonic<double>  (void){ return std::string("D"); } | ||||||
|  |  template<> inline std::string ScidacWordMnemonic<float>   (void){ return std::string("F"); } | ||||||
|  |  template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); } | ||||||
|  |  template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); } | ||||||
|  |  template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); } | ||||||
|  |  template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); } | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////// | ||||||
|  |   // Encode a generic tensor as a string | ||||||
|  |   ///////////////////////////////////////// | ||||||
|  |  template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {  | ||||||
|  |  | ||||||
|  |    typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||||
|  |  | ||||||
|  |    int _ColourN       = indexRank<ColourIndex,vobj>(); | ||||||
|  |    int _ColourScalar  =  isScalar<ColourIndex,vobj>(); | ||||||
|  |    int _ColourVector  =  isVector<ColourIndex,vobj>(); | ||||||
|  |    int _ColourMatrix  =  isMatrix<ColourIndex,vobj>(); | ||||||
|  |  | ||||||
|  |    int _SpinN       = indexRank<SpinIndex,vobj>(); | ||||||
|  |    int _SpinScalar  =  isScalar<SpinIndex,vobj>(); | ||||||
|  |    int _SpinVector  =  isVector<SpinIndex,vobj>(); | ||||||
|  |    int _SpinMatrix  =  isMatrix<SpinIndex,vobj>(); | ||||||
|  |  | ||||||
|  |    int _LorentzN       = indexRank<LorentzIndex,vobj>(); | ||||||
|  |    int _LorentzScalar  =  isScalar<LorentzIndex,vobj>(); | ||||||
|  |    int _LorentzVector  =  isVector<LorentzIndex,vobj>(); | ||||||
|  |    int _LorentzMatrix  =  isMatrix<LorentzIndex,vobj>(); | ||||||
|  |  | ||||||
|  |    std::stringstream stream; | ||||||
|  |  | ||||||
|  |    stream << "GRID_"; | ||||||
|  |    stream << ScidacWordMnemonic<stype>(); | ||||||
|  |  | ||||||
|  |    //   std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl; | ||||||
|  |    //   std::cout << " Spin    N/S/V/M : " << _SpinN   <<" "<<_SpinScalar   <<"/"<<_SpinVector   <<"/"<<_SpinMatrix<<std::endl; | ||||||
|  |    //   std::cout << " Colour  N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl; | ||||||
|  |  | ||||||
|  |    if ( _LorentzVector )   stream << "_LorentzVector"<<_LorentzN; | ||||||
|  |    if ( _LorentzMatrix )   stream << "_LorentzMatrix"<<_LorentzN; | ||||||
|  |  | ||||||
|  |    if ( _SpinVector )   stream << "_SpinVector"<<_SpinN; | ||||||
|  |    if ( _SpinMatrix )   stream << "_SpinMatrix"<<_SpinN; | ||||||
|  |  | ||||||
|  |    if ( _ColourVector )   stream << "_ColourVector"<<_ColourN; | ||||||
|  |    if ( _ColourMatrix )   stream << "_ColourMatrix"<<_ColourN; | ||||||
|  |  | ||||||
|  |    if ( _ColourScalar && _LorentzScalar && _SpinScalar )   stream << "_Complex"; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |    typesize = sizeof(typename vobj::scalar_type); | ||||||
|  |  | ||||||
|  |    if ( _ColourMatrix ) typesize*= _ColourN*_ColourN; | ||||||
|  |    else                 typesize*= _ColourN; | ||||||
|  |  | ||||||
|  |    if ( _SpinMatrix )   typesize*= _SpinN*_SpinN; | ||||||
|  |    else                 typesize*= _SpinN; | ||||||
|  |  | ||||||
|  |    colors    = _ColourN; | ||||||
|  |    spins     = _SpinN; | ||||||
|  |    datacount = _LorentzN; | ||||||
|  |  | ||||||
|  |    return stream.str(); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {  | ||||||
|  |    return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount); | ||||||
|  |  }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  //////////////////////////////////////////////////////////// | ||||||
|  |  // Helper to fill out metadata | ||||||
|  |  //////////////////////////////////////////////////////////// | ||||||
|  |  template<class vobj> void ScidacMetaData(Lattice<vobj> & field, | ||||||
|  | 					  FieldMetaData &header, | ||||||
|  | 					  scidacRecord & _scidacRecord, | ||||||
|  | 					  scidacFile   & _scidacFile)  | ||||||
|  |  { | ||||||
|  |    typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||||
|  |  | ||||||
|  |    ///////////////////////////////////// | ||||||
|  |    // Pull Grid's metadata | ||||||
|  |    ///////////////////////////////////// | ||||||
|  |    PrepareMetaData(field,header); | ||||||
|  |  | ||||||
|  |    ///////////////////////////////////// | ||||||
|  |    // Scidac Private File structure | ||||||
|  |    ///////////////////////////////////// | ||||||
|  |    _scidacFile              = scidacFile(field._grid); | ||||||
|  |  | ||||||
|  |    ///////////////////////////////////// | ||||||
|  |    // Scidac Private Record structure | ||||||
|  |    ///////////////////////////////////// | ||||||
|  |    scidacRecord sr; | ||||||
|  |    sr.datatype   = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount); | ||||||
|  |    sr.date       = header.creation_date; | ||||||
|  |    sr.precision  = ScidacWordMnemonic<stype>(); | ||||||
|  |    sr.recordtype = GRID_IO_FIELD; | ||||||
|  |  | ||||||
|  |    _scidacRecord = sr; | ||||||
|  |  | ||||||
|  |    std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /////////////////////////////////////////////////////// | ||||||
|  |  // Scidac checksum | ||||||
|  |  /////////////////////////////////////////////////////// | ||||||
|  |  static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb) | ||||||
|  |  { | ||||||
|  |    uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16); | ||||||
|  |    uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||||
|  |    if ( scidac_csuma !=scidac_checksuma) return 0; | ||||||
|  |    if ( scidac_csumb !=scidac_checksumb) return 0; | ||||||
|  |     return 1; | ||||||
|  |  } | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Lime, ILDG and Scidac I/O classes | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | class GridLimeReader : public BinaryIO { | ||||||
|  |  public: | ||||||
|  |    /////////////////////////////////////////////////// | ||||||
|  |    // FIXME: format for RNG? Now just binary out instead | ||||||
|  |    /////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |    FILE       *File; | ||||||
|  |    LimeReader *LimeR; | ||||||
|  |    std::string filename; | ||||||
|  |  | ||||||
|  |    ///////////////////////////////////////////// | ||||||
|  |    // Open the file | ||||||
|  |    ///////////////////////////////////////////// | ||||||
|  |    void open(std::string &_filename)  | ||||||
|  |    { | ||||||
|  |      filename= _filename; | ||||||
|  |      File = fopen(filename.c_str(), "r"); | ||||||
|  |      LimeR = limeCreateReader(File); | ||||||
|  |    } | ||||||
|  |    ///////////////////////////////////////////// | ||||||
|  |    // Close the file | ||||||
|  |    ///////////////////////////////////////////// | ||||||
|  |    void close(void){ | ||||||
|  |      fclose(File); | ||||||
|  |      //     limeDestroyReader(LimeR); | ||||||
|  |    } | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   // Read a generic lattice field and verify checksum | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   template<class vobj> | ||||||
|  |   void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) | ||||||
|  |   { | ||||||
|  |     typedef typename vobj::scalar_object sobj; | ||||||
|  |     scidacChecksum scidacChecksum_; | ||||||
|  |     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||||
|  |  | ||||||
|  |     std::string format = getFormatString<vobj>(); | ||||||
|  |  | ||||||
|  |     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl; | ||||||
|  | 	 | ||||||
|  |       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 	off_t offset= ftell(File); | ||||||
|  | 	BinarySimpleMunger<sobj,sobj> munge; | ||||||
|  | 	BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||||
|  |  | ||||||
|  | 	///////////////////////////////////////////// | ||||||
|  | 	// Insist checksum is next record | ||||||
|  | 	///////////////////////////////////////////// | ||||||
|  | 	readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); | ||||||
|  |  | ||||||
|  | 	///////////////////////////////////////////// | ||||||
|  | 	// Verify checksums | ||||||
|  | 	///////////////////////////////////////////// | ||||||
|  | 	scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); | ||||||
|  | 	return; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   // Read a generic serialisable object | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   template<class serialisable_object> | ||||||
|  |   void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) | ||||||
|  |   { | ||||||
|  |     std::string xmlstring; | ||||||
|  |     // should this be a do while; can we miss a first record?? | ||||||
|  |     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||||
|  |  | ||||||
|  |       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||||
|  |  | ||||||
|  |       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { | ||||||
|  | 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||||
|  | 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||||
|  | 	XmlReader RD(&xmlc[0],""); | ||||||
|  | 	read(RD,object_name,object); | ||||||
|  | 	return; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |     }   | ||||||
|  |     assert(0); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class GridLimeWriter : public BinaryIO { | ||||||
|  |  public: | ||||||
|  |    /////////////////////////////////////////////////// | ||||||
|  |    // FIXME: format for RNG? Now just binary out instead | ||||||
|  |    /////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |    FILE       *File; | ||||||
|  |    LimeWriter *LimeW; | ||||||
|  |    std::string filename; | ||||||
|  |  | ||||||
|  |    void open(std::string &_filename) {  | ||||||
|  |      filename= _filename; | ||||||
|  |      File = fopen(filename.c_str(), "w"); | ||||||
|  |      LimeW = limeCreateWriter(File); assert(LimeW != NULL ); | ||||||
|  |    } | ||||||
|  |    ///////////////////////////////////////////// | ||||||
|  |    // Close the file | ||||||
|  |    ///////////////////////////////////////////// | ||||||
|  |    void close(void) { | ||||||
|  |      fclose(File); | ||||||
|  |      //  limeDestroyWriter(LimeW); | ||||||
|  |    } | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|  |   // Lime utility functions | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|  |   int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) | ||||||
|  |   { | ||||||
|  |     LimeRecordHeader *h; | ||||||
|  |     h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize); | ||||||
|  |     assert(limeWriteRecordHeader(h, LimeW) >= 0); | ||||||
|  |     limeDestroyHeader(h); | ||||||
|  |     return LIME_SUCCESS; | ||||||
|  |   } | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   // Write a generic serialisable object | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   template<class serialisable_object> | ||||||
|  |   void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) | ||||||
|  |   { | ||||||
|  |     std::string xmlstring; | ||||||
|  |     { | ||||||
|  |       XmlWriter WR("",""); | ||||||
|  |       write(WR,object_name,object); | ||||||
|  |       xmlstring = WR.XmlString(); | ||||||
|  |     } | ||||||
|  |     uint64_t nbytes = xmlstring.size(); | ||||||
|  |     int err; | ||||||
|  |     LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL); | ||||||
|  |  | ||||||
|  |     err=limeWriteRecordHeader(h, LimeW);                    assert(err>=0); | ||||||
|  |     err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); | ||||||
|  |     err=limeWriterCloseRecord(LimeW);                       assert(err>=0); | ||||||
|  |     limeDestroyHeader(h); | ||||||
|  |   } | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   // Write a generic lattice field and csum | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   template<class vobj> | ||||||
|  |   void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) | ||||||
|  |   { | ||||||
|  |     //////////////////////////////////////////// | ||||||
|  |     // Create record header | ||||||
|  |     //////////////////////////////////////////// | ||||||
|  |     typedef typename vobj::scalar_object sobj; | ||||||
|  |     int err; | ||||||
|  |     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||||
|  |     uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; | ||||||
|  |     createLimeRecordHeader(record_name, 0, 0, PayloadSize); | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////////////////////////////////// | ||||||
|  |     // NB: FILE and iostream are jointly writing disjoint sequences in the | ||||||
|  |     // the same file through different file handles (integer units). | ||||||
|  |     //  | ||||||
|  |     // These are both buffered, so why I think this code is right is as follows. | ||||||
|  |     // | ||||||
|  |     // i)  write record header to FILE *File, telegraphing the size.  | ||||||
|  |     // ii) ftell reads the offset from FILE *File . | ||||||
|  |     // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. | ||||||
|  |     //      Closes iostream and flushes. | ||||||
|  |     // iv) fseek on FILE * to end of this disjoint section. | ||||||
|  |     //  v) Continue writing scidac record. | ||||||
|  |     //////////////////////////////////////////////////////////////////// | ||||||
|  |     off_t offset = ftell(File); | ||||||
|  |     std::string format = getFormatString<vobj>(); | ||||||
|  |     BinarySimpleMunger<sobj,sobj> munge; | ||||||
|  |     BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||||
|  |     err=limeWriterCloseRecord(LimeW);  assert(err>=0); | ||||||
|  |     //////////////////////////////////////// | ||||||
|  |     // Write checksum element, propagaing forward from the BinaryIO | ||||||
|  |     // Always pair a checksum with a binary object, and close message | ||||||
|  |     //////////////////////////////////////// | ||||||
|  |     scidacChecksum checksum; | ||||||
|  |     std::stringstream streama; streama << std::hex << scidac_csuma; | ||||||
|  |     std::stringstream streamb; streamb << std::hex << scidac_csumb; | ||||||
|  |     checksum.suma= streama.str(); | ||||||
|  |     checksum.sumb= streamb.str(); | ||||||
|  |     std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; | ||||||
|  |     writeLimeObject(0,1,checksum,std::string("scidacChecksum"    ),std::string(SCIDAC_CHECKSUM)); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class ScidacWriter : public GridLimeWriter { | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |    template<class SerialisableUserFile> | ||||||
|  |    void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) | ||||||
|  |    { | ||||||
|  |      scidacFile    _scidacFile(grid); | ||||||
|  |      writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); | ||||||
|  |      writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); | ||||||
|  |    } | ||||||
|  |   //////////////////////////////////////////////// | ||||||
|  |   // Write generic lattice field in scidac format | ||||||
|  |   //////////////////////////////////////////////// | ||||||
|  |    template <class vobj, class userRecord> | ||||||
|  |   void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)  | ||||||
|  |   { | ||||||
|  |     typedef typename vobj::scalar_object sobj; | ||||||
|  |     uint64_t nbytes; | ||||||
|  |     GridBase * grid = field._grid; | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////// | ||||||
|  |     // fill the Grid header | ||||||
|  |     //////////////////////////////////////// | ||||||
|  |     FieldMetaData header; | ||||||
|  |     scidacRecord  _scidacRecord; | ||||||
|  |     scidacFile    _scidacFile; | ||||||
|  |  | ||||||
|  |     ScidacMetaData(field,header,_scidacRecord,_scidacFile); | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////// | ||||||
|  |     // Fill the Lime file record by record | ||||||
|  |     ////////////////////////////////////////////// | ||||||
|  |     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  | ||||||
|  |     writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); | ||||||
|  |     writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); | ||||||
|  |     writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));      // Closes message with checksum | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class IldgWriter : public ScidacWriter { | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |   /////////////////////////////////// | ||||||
|  |   // A little helper | ||||||
|  |   /////////////////////////////////// | ||||||
|  |   void writeLimeIldgLFN(std::string &LFN) | ||||||
|  |   { | ||||||
|  |     uint64_t PayloadSize = LFN.size(); | ||||||
|  |     int err; | ||||||
|  |     createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize); | ||||||
|  |     err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0); | ||||||
|  |     err=limeWriterCloseRecord(LimeW); assert(err>=0); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////// | ||||||
|  |   // Special ILDG operations ; gauge configs only. | ||||||
|  |   // Don't require scidac records EXCEPT checksum | ||||||
|  |   // Use Grid MetaData object if present. | ||||||
|  |   //////////////////////////////////////////////////////////////// | ||||||
|  |   template <class vsimd> | ||||||
|  |   void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)  | ||||||
|  |   { | ||||||
|  |     GridBase * grid = Umu._grid; | ||||||
|  |     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||||
|  |     typedef iLorentzColourMatrix<vsimd> vobj; | ||||||
|  |     typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
|  |     uint64_t nbytes; | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////// | ||||||
|  |     // fill the Grid header | ||||||
|  |     //////////////////////////////////////// | ||||||
|  |     FieldMetaData header; | ||||||
|  |     scidacRecord  _scidacRecord; | ||||||
|  |     scidacFile    _scidacFile; | ||||||
|  |  | ||||||
|  |     ScidacMetaData(Umu,header,_scidacRecord,_scidacFile); | ||||||
|  |  | ||||||
|  |     std::string format = header.floating_point; | ||||||
|  |     header.ensemble_id    = description; | ||||||
|  |     header.ensemble_label = description; | ||||||
|  |     header.sequence_number = sequence; | ||||||
|  |     header.ildg_lfn = LFN; | ||||||
|  |  | ||||||
|  |     assert ( (format == std::string("IEEE32BIG"))   | ||||||
|  |            ||(format == std::string("IEEE64BIG")) ); | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////// | ||||||
|  |     // Fill ILDG header data struct | ||||||
|  |     ////////////////////////////////////////////////////// | ||||||
|  |     ildgFormat ildgfmt ; | ||||||
|  |     ildgfmt.field     = std::string("su3gauge"); | ||||||
|  |  | ||||||
|  |     if ( format == std::string("IEEE32BIG") ) {  | ||||||
|  |       ildgfmt.precision = 32; | ||||||
|  |     } else {  | ||||||
|  |       ildgfmt.precision = 64; | ||||||
|  |     } | ||||||
|  |     ildgfmt.version = 1.0; | ||||||
|  |     ildgfmt.lx = header.dimension[0]; | ||||||
|  |     ildgfmt.ly = header.dimension[1]; | ||||||
|  |     ildgfmt.lz = header.dimension[2]; | ||||||
|  |     ildgfmt.lt = header.dimension[3]; | ||||||
|  |     assert(header.nd==4); | ||||||
|  |     assert(header.nd==header.dimension.size()); | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Fill the USQCD info field | ||||||
|  |     ////////////////////////////////////////////////////////////////////////////// | ||||||
|  |     usqcdInfo info; | ||||||
|  |     info.version=1.0; | ||||||
|  |     info.plaq   = header.plaquette; | ||||||
|  |     info.linktr = header.link_trace; | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl; | ||||||
|  |     ////////////////////////////////////////////// | ||||||
|  |     // Fill the Lime file record by record | ||||||
|  |     ////////////////////////////////////////////// | ||||||
|  |     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  | ||||||
|  |     writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); | ||||||
|  |     writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); | ||||||
|  |     writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); | ||||||
|  |     writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); | ||||||
|  |     writeLimeObject(0,0,ildgfmt,std::string("ildgFormat")   ,std::string(ILDG_FORMAT)); // rec | ||||||
|  |     writeLimeIldgLFN(header.ildg_lfn);                                                 // rec | ||||||
|  |     writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA));      // Closes message with checksum | ||||||
|  |     //    limeDestroyWriter(LimeW); | ||||||
|  |     fclose(File); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class IldgReader : public GridLimeReader { | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////// | ||||||
|  |   // Read either Grid/SciDAC/ILDG configuration | ||||||
|  |   // Don't require scidac records EXCEPT checksum | ||||||
|  |   // Use Grid MetaData object if present. | ||||||
|  |   // Else use ILDG MetaData object if present. | ||||||
|  |   // Else use SciDAC MetaData object if present. | ||||||
|  |   //////////////////////////////////////////////////////////////// | ||||||
|  |   template <class vsimd> | ||||||
|  |   void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) { | ||||||
|  |  | ||||||
|  |     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||||
|  |     typedef typename GaugeField::vector_object  vobj; | ||||||
|  |     typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
|  |     typedef LorentzColourMatrixF fobj; | ||||||
|  |     typedef LorentzColourMatrixD dobj; | ||||||
|  |  | ||||||
|  |     GridBase *grid = Umu._grid; | ||||||
|  |  | ||||||
|  |     std::vector<int> dims = Umu._grid->FullDimensions(); | ||||||
|  |  | ||||||
|  |     assert(dims.size()==4); | ||||||
|  |  | ||||||
|  |     // Metadata holders | ||||||
|  |     ildgFormat     ildgFormat_    ; | ||||||
|  |     std::string    ildgLFN_       ; | ||||||
|  |     scidacChecksum scidacChecksum_;  | ||||||
|  |     usqcdInfo      usqcdInfo_     ; | ||||||
|  |  | ||||||
|  |     // track what we read from file | ||||||
|  |     int found_ildgFormat    =0; | ||||||
|  |     int found_ildgLFN       =0; | ||||||
|  |     int found_scidacChecksum=0; | ||||||
|  |     int found_usqcdInfo     =0; | ||||||
|  |     int found_ildgBinary =0; | ||||||
|  |     int found_FieldMetaData =0; | ||||||
|  |  | ||||||
|  |     uint32_t nersc_csum; | ||||||
|  |     uint32_t scidac_csuma; | ||||||
|  |     uint32_t scidac_csumb; | ||||||
|  |  | ||||||
|  |     // Binary format | ||||||
|  |     std::string format; | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Loop over all records | ||||||
|  |     // -- Order is poorly guaranteed except ILDG header preceeds binary section. | ||||||
|  |     // -- Run like an event loop. | ||||||
|  |     // -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing | ||||||
|  |     //    that Scidac.  | ||||||
|  |     // -- Insist on Scidac checksum record. | ||||||
|  |     ////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||||
|  |  | ||||||
|  |       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||||
|  |        | ||||||
|  |       ////////////////////////////////////////////////////////////////// | ||||||
|  |       // If not BINARY_DATA read a string and parse | ||||||
|  |       ////////////////////////////////////////////////////////////////// | ||||||
|  |       if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) )  ) { | ||||||
|  | 	 | ||||||
|  | 	// Copy out the string | ||||||
|  | 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||||
|  | 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||||
|  | 	std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; | ||||||
|  |  | ||||||
|  | 	////////////////////////////////// | ||||||
|  | 	// ILDG format record | ||||||
|  | 	if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {  | ||||||
|  |  | ||||||
|  | 	  XmlReader RD(&xmlc[0],""); | ||||||
|  | 	  read(RD,"ildgFormat",ildgFormat_); | ||||||
|  |  | ||||||
|  | 	  if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG"); | ||||||
|  | 	  if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG"); | ||||||
|  |  | ||||||
|  | 	  assert( ildgFormat_.lx == dims[0]); | ||||||
|  | 	  assert( ildgFormat_.ly == dims[1]); | ||||||
|  | 	  assert( ildgFormat_.lz == dims[2]); | ||||||
|  | 	  assert( ildgFormat_.lt == dims[3]); | ||||||
|  |  | ||||||
|  | 	  found_ildgFormat = 1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) { | ||||||
|  | 	  FieldMetaData_.ildg_lfn = std::string(&xmlc[0]); | ||||||
|  | 	  found_ildgLFN = 1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {  | ||||||
|  |  | ||||||
|  | 	  XmlReader RD(&xmlc[0],""); | ||||||
|  | 	  read(RD,"FieldMetaData",FieldMetaData_); | ||||||
|  |  | ||||||
|  | 	  format = FieldMetaData_.floating_point; | ||||||
|  |  | ||||||
|  | 	  assert(FieldMetaData_.dimension[0] == dims[0]); | ||||||
|  | 	  assert(FieldMetaData_.dimension[1] == dims[1]); | ||||||
|  | 	  assert(FieldMetaData_.dimension[2] == dims[2]); | ||||||
|  | 	  assert(FieldMetaData_.dimension[3] == dims[3]); | ||||||
|  |  | ||||||
|  | 	  found_FieldMetaData = 1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {  | ||||||
|  | 	  std::string xmls(&xmlc[0]); | ||||||
|  | 	  // is it a USQCD info field | ||||||
|  | 	  if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {  | ||||||
|  | 	    std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; | ||||||
|  | 	    XmlReader RD(&xmlc[0],""); | ||||||
|  | 	    read(RD,"usqcdInfo",usqcdInfo_); | ||||||
|  | 	    found_usqcdInfo = 1; | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {  | ||||||
|  | 	  XmlReader RD(&xmlc[0],""); | ||||||
|  | 	  read(RD,"scidacChecksum",scidacChecksum_); | ||||||
|  | 	  found_scidacChecksum = 1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  |       } else {   | ||||||
|  | 	///////////////////////////////// | ||||||
|  | 	// Binary data | ||||||
|  | 	///////////////////////////////// | ||||||
|  | 	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl; | ||||||
|  | 	off_t offset= ftell(File); | ||||||
|  |  | ||||||
|  | 	if ( format == std::string("IEEE64BIG") ) { | ||||||
|  | 	  GaugeSimpleMunger<dobj, sobj> munge; | ||||||
|  | 	  BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||||
|  | 	} else {  | ||||||
|  | 	  GaugeSimpleMunger<fobj, sobj> munge; | ||||||
|  | 	  BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	found_ildgBinary = 1; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////// | ||||||
|  |     // Minimally must find binary segment and checksum | ||||||
|  |     // Since this is an ILDG reader require ILDG format | ||||||
|  |     ////////////////////////////////////////////////////// | ||||||
|  |     assert(found_ildgBinary); | ||||||
|  |     assert(found_ildgFormat); | ||||||
|  |     assert(found_scidacChecksum); | ||||||
|  |  | ||||||
|  |     // Must find something with the lattice dimensions | ||||||
|  |     assert(found_FieldMetaData||found_ildgFormat); | ||||||
|  |  | ||||||
|  |     if ( found_FieldMetaData ) { | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl; | ||||||
|  |  | ||||||
|  |     } else {  | ||||||
|  |  | ||||||
|  |       assert(found_ildgFormat); | ||||||
|  |       assert ( ildgFormat_.field == std::string("su3gauge") ); | ||||||
|  |  | ||||||
|  |       /////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |       // Populate our Grid metadata as best we can | ||||||
|  |       /////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |       std::ostringstream vers; vers << ildgFormat_.version; | ||||||
|  |       FieldMetaData_.hdr_version = vers.str(); | ||||||
|  |       FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); | ||||||
|  |  | ||||||
|  |       FieldMetaData_.nd=4; | ||||||
|  |       FieldMetaData_.dimension.resize(4); | ||||||
|  |  | ||||||
|  |       FieldMetaData_.dimension[0] = ildgFormat_.lx ; | ||||||
|  |       FieldMetaData_.dimension[1] = ildgFormat_.ly ; | ||||||
|  |       FieldMetaData_.dimension[2] = ildgFormat_.lz ; | ||||||
|  |       FieldMetaData_.dimension[3] = ildgFormat_.lt ; | ||||||
|  |  | ||||||
|  |       if ( found_usqcdInfo ) {  | ||||||
|  | 	FieldMetaData_.plaquette = usqcdInfo_.plaq; | ||||||
|  | 	FieldMetaData_.link_trace= usqcdInfo_.linktr; | ||||||
|  | 	std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl; | ||||||
|  | 	std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl; | ||||||
|  | 	std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl; | ||||||
|  |       } else {  | ||||||
|  | 	FieldMetaData_.plaquette = 0.0; | ||||||
|  | 	FieldMetaData_.link_trace= 0.0; | ||||||
|  | 	std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////////////////////////// | ||||||
|  |     // Really really want to mandate a scidac checksum | ||||||
|  |     //////////////////////////////////////////////////////////// | ||||||
|  |     if ( found_scidacChecksum ) { | ||||||
|  |       FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16); | ||||||
|  |       FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||||
|  |       scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); | ||||||
|  |       assert( scidac_csuma ==FieldMetaData_.scidac_checksuma); | ||||||
|  |       assert( scidac_csumb ==FieldMetaData_.scidac_checksumb); | ||||||
|  |       std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl; | ||||||
|  |     } else {  | ||||||
|  |       std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl; | ||||||
|  |       assert(0); // Can I insist always checksum ? | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if ( found_FieldMetaData || found_usqcdInfo ) { | ||||||
|  |       FieldMetaData checker; | ||||||
|  |       GaugeStatistics(Umu,checker); | ||||||
|  |       assert(fabs(checker.plaquette  - FieldMetaData_.plaquette )<1.0e-5); | ||||||
|  |       assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5); | ||||||
|  |       std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  }; | ||||||
|  |  | ||||||
|  | }} | ||||||
|  |  | ||||||
|  | //HAVE_LIME | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										231
									
								
								lib/parallelIO/IldgIOtypes.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										231
									
								
								lib/parallelIO/IldgIOtypes.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,231 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/parallelIO/IldgIO.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef GRID_ILDGTYPES_IO_H | ||||||
|  | #define GRID_ILDGTYPES_IO_H | ||||||
|  |  | ||||||
|  | #ifdef HAVE_LIME | ||||||
|  | extern "C" { // for linkage | ||||||
|  | #include "lime.h" | ||||||
|  | } | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Data representation of records that enter ILDG and SciDac formats | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | #define GRID_FORMAT      "grid-format" | ||||||
|  | #define ILDG_FORMAT      "ildg-format" | ||||||
|  | #define ILDG_BINARY_DATA "ildg-binary-data" | ||||||
|  | #define ILDG_DATA_LFN    "ildg-data-lfn" | ||||||
|  | #define SCIDAC_CHECKSUM           "scidac-checksum" | ||||||
|  | #define SCIDAC_PRIVATE_FILE_XML   "scidac-private-file-xml" | ||||||
|  | #define SCIDAC_FILE_XML           "scidac-file-xml" | ||||||
|  | #define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml" | ||||||
|  | #define SCIDAC_RECORD_XML         "scidac-record-xml" | ||||||
|  | #define SCIDAC_BINARY_DATA        "scidac-binary-data" | ||||||
|  | // Unused SCIDAC records names; could move to support this functionality | ||||||
|  | #define SCIDAC_SITELIST           "scidac-sitelist" | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |   const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat | ||||||
|  |   const int GRID_IO_MULTIFILE  = 1; // hardcode lift from QIO compat | ||||||
|  |   const int GRID_IO_FIELD      = 0; // hardcode lift from QIO compat | ||||||
|  |   const int GRID_IO_GLOBAL     = 1; // hardcode lift from QIO compat | ||||||
|  |   //////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // QIO uses mandatory "private" records fixed format | ||||||
|  | // Private is in principle "opaque" however it can't be changed now because that would break existing  | ||||||
|  | // file compatability, so should be correct to assume the undocumented but defacto file structure. | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | //////////////////////// | ||||||
|  | // Scidac private file xml | ||||||
|  | // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile> | ||||||
|  | //////////////////////// | ||||||
|  | struct scidacFile : Serializable { | ||||||
|  |  public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile, | ||||||
|  |                                   double, version, | ||||||
|  |                                   int, spacetime, | ||||||
|  | 				  std::string, dims, // must convert to int | ||||||
|  |                                   int, volfmt); | ||||||
|  |  | ||||||
|  |   std::vector<int> getDimensions(void) {  | ||||||
|  |     std::stringstream stream(dims); | ||||||
|  |     std::vector<int> dimensions; | ||||||
|  |     int n; | ||||||
|  |     while(stream >> n){ | ||||||
|  |       dimensions.push_back(n); | ||||||
|  |     } | ||||||
|  |     return dimensions; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void setDimensions(std::vector<int> dimensions) {  | ||||||
|  |     char delimiter = ' '; | ||||||
|  |     std::stringstream stream; | ||||||
|  |     for(int i=0;i<dimensions.size();i++){  | ||||||
|  |       stream << dimensions[i]; | ||||||
|  |       if ( i != dimensions.size()-1) {  | ||||||
|  | 	stream << delimiter <<std::endl; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     dims = stream.str(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Constructor provides Grid | ||||||
|  |   scidacFile() =default; // default constructor | ||||||
|  |   scidacFile(GridBase * grid){ | ||||||
|  |     version      = 1.0; | ||||||
|  |     spacetime    = grid->_ndimension; | ||||||
|  |     setDimensions(grid->FullDimensions());  | ||||||
|  |     volfmt       = GRID_IO_SINGLEFILE; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | /////////////////////////////////////////////////////////////////////// | ||||||
|  | // scidac-private-record-xml : example | ||||||
|  | // <scidacRecord> | ||||||
|  | // <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype> | ||||||
|  | // <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins> | ||||||
|  | // <typesize>144</typesize><datacount>4</datacount> | ||||||
|  | // </scidacRecord> | ||||||
|  | /////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | struct scidacRecord : Serializable { | ||||||
|  |  public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord, | ||||||
|  |                                   double, version, | ||||||
|  |                                   std::string, date, | ||||||
|  | 				  int, recordtype, | ||||||
|  | 				  std::string, datatype, | ||||||
|  | 				  std::string, precision, | ||||||
|  | 				  int, colors, | ||||||
|  | 				  int, spins, | ||||||
|  | 				  int, typesize, | ||||||
|  | 				  int, datacount); | ||||||
|  |  | ||||||
|  |   scidacRecord() { version =1.0; } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | //////////////////////// | ||||||
|  | // ILDG format | ||||||
|  | //////////////////////// | ||||||
|  | struct ildgFormat : Serializable { | ||||||
|  | public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat, | ||||||
|  | 				  double, version, | ||||||
|  | 				  std::string, field, | ||||||
|  | 				  int, precision, | ||||||
|  | 				  int, lx, | ||||||
|  | 				  int, ly, | ||||||
|  | 				  int, lz, | ||||||
|  | 				  int, lt); | ||||||
|  |   ildgFormat() { version=1.0; }; | ||||||
|  | }; | ||||||
|  | //////////////////////// | ||||||
|  | // USQCD info | ||||||
|  | //////////////////////// | ||||||
|  | struct usqcdInfo : Serializable {  | ||||||
|  |  public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo, | ||||||
|  | 				  double, version, | ||||||
|  | 				  double, plaq, | ||||||
|  | 				  double, linktr, | ||||||
|  | 				  std::string, info); | ||||||
|  |   usqcdInfo() {  | ||||||
|  |     version=1.0;  | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | //////////////////////// | ||||||
|  | // Scidac Checksum | ||||||
|  | //////////////////////// | ||||||
|  | struct scidacChecksum : Serializable {  | ||||||
|  |  public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, | ||||||
|  | 				  double, version, | ||||||
|  | 				  std::string, suma, | ||||||
|  | 				  std::string, sumb); | ||||||
|  |   scidacChecksum() {  | ||||||
|  |     version=1.0;  | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Type:           scidac-file-xml         <title>MILC ILDG archival gauge configuration</title> | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Type:            | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | //////////////////////// | ||||||
|  | // Scidac private file xml  | ||||||
|  | // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>  | ||||||
|  | ////////////////////////                                                                                                                                                                               | ||||||
|  |  | ||||||
|  | #if 0 | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | struct usqcdPropFile : Serializable {  | ||||||
|  |  public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile, | ||||||
|  | 				  double, version, | ||||||
|  | 				  std::string, type, | ||||||
|  | 				  std::string, info); | ||||||
|  |   usqcdPropFile() {  | ||||||
|  |     version=1.0;  | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | struct usqcdSourceInfo : Serializable {  | ||||||
|  |  public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo, | ||||||
|  | 				  double, version, | ||||||
|  | 				  std::string, info); | ||||||
|  |   usqcdSourceInfo() {  | ||||||
|  |     version=1.0;  | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | struct usqcdPropInfo : Serializable {  | ||||||
|  |  public: | ||||||
|  |   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo, | ||||||
|  | 				  double, version, | ||||||
|  | 				  int, spin, | ||||||
|  | 				  int, color, | ||||||
|  | 				  std::string, info); | ||||||
|  |   usqcdPropInfo() {  | ||||||
|  |     version=1.0;  | ||||||
|  |   }; | ||||||
|  | }; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
							
								
								
									
										325
									
								
								lib/parallelIO/MetaData.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										325
									
								
								lib/parallelIO/MetaData.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,325 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/parallelIO/NerscIO.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #include <algorithm> | ||||||
|  | #include <iostream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <fstream> | ||||||
|  | #include <map> | ||||||
|  | #include <unistd.h> | ||||||
|  | #include <sys/utsname.h> | ||||||
|  | #include <pwd.h> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|  |   // Precision mapping | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|  |   template<class vobj> static std::string getFormatString (void) | ||||||
|  |   { | ||||||
|  |     std::string format; | ||||||
|  |     typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||||
|  |     if ( sizeof(stype) == sizeof(float) ) { | ||||||
|  |       format = std::string("IEEE32BIG"); | ||||||
|  |     } | ||||||
|  |     if ( sizeof(stype) == sizeof(double) ) { | ||||||
|  |       format = std::string("IEEE64BIG"); | ||||||
|  |     } | ||||||
|  |     return format; | ||||||
|  |   } | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // header specification/interpretation | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|  |     class FieldMetaData : Serializable { | ||||||
|  |     public: | ||||||
|  |  | ||||||
|  |       GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData, | ||||||
|  | 				      int, nd, | ||||||
|  | 				      std::vector<int>, dimension, | ||||||
|  | 				      std::vector<std::string>, boundary, | ||||||
|  | 				      int, data_start, | ||||||
|  | 				      std::string, hdr_version, | ||||||
|  | 				      std::string, storage_format, | ||||||
|  | 				      double, link_trace, | ||||||
|  | 				      double, plaquette, | ||||||
|  | 				      uint32_t, checksum, | ||||||
|  | 				      uint32_t, scidac_checksuma, | ||||||
|  | 				      uint32_t, scidac_checksumb, | ||||||
|  | 				      unsigned int, sequence_number, | ||||||
|  | 				      std::string, data_type, | ||||||
|  | 				      std::string, ensemble_id, | ||||||
|  | 				      std::string, ensemble_label, | ||||||
|  | 				      std::string, ildg_lfn, | ||||||
|  | 				      std::string, creator, | ||||||
|  | 				      std::string, creator_hardware, | ||||||
|  | 				      std::string, creation_date, | ||||||
|  | 				      std::string, archive_date, | ||||||
|  | 				      std::string, floating_point); | ||||||
|  |       FieldMetaData(void) {  | ||||||
|  | 	nd=4; | ||||||
|  | 	dimension.resize(4); | ||||||
|  | 	boundary.resize(4); | ||||||
|  |       } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   namespace QCD { | ||||||
|  |  | ||||||
|  |     using namespace Grid; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Bit and Physical Checksumming and QA of data | ||||||
|  |     ////////////////////////////////////////////////////////////////////// | ||||||
|  |     inline void GridMetaData(GridBase *grid,FieldMetaData &header) | ||||||
|  |     { | ||||||
|  |       int nd = grid->_ndimension; | ||||||
|  |       header.nd = nd; | ||||||
|  |       header.dimension.resize(nd); | ||||||
|  |       header.boundary.resize(nd); | ||||||
|  |       for(int d=0;d<nd;d++) { | ||||||
|  | 	header.dimension[d] = grid->_fdimensions[d]; | ||||||
|  |       } | ||||||
|  |       for(int d=0;d<nd;d++) { | ||||||
|  | 	header.boundary[d] = std::string("PERIODIC"); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     inline void MachineCharacteristics(FieldMetaData &header) | ||||||
|  |     { | ||||||
|  |       // Who | ||||||
|  |       struct passwd *pw = getpwuid (getuid()); | ||||||
|  |       if (pw) header.creator = std::string(pw->pw_name);  | ||||||
|  |  | ||||||
|  |       // When | ||||||
|  |       std::time_t t = std::time(nullptr); | ||||||
|  |       std::tm tm_ = *std::localtime(&t); | ||||||
|  |       std::ostringstream oss;  | ||||||
|  |       //      oss << std::put_time(&tm_, "%c %Z"); | ||||||
|  |       header.creation_date = oss.str(); | ||||||
|  |       header.archive_date  = header.creation_date; | ||||||
|  |  | ||||||
|  |       // What | ||||||
|  |       struct utsname name;  uname(&name); | ||||||
|  |       header.creator_hardware = std::string(name.nodename)+"-"; | ||||||
|  |       header.creator_hardware+= std::string(name.machine)+"-"; | ||||||
|  |       header.creator_hardware+= std::string(name.sysname)+"-"; | ||||||
|  |       header.creator_hardware+= std::string(name.release); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | #define dump_meta_data(field, s)					\ | ||||||
|  |       s << "BEGIN_HEADER"      << std::endl;				\ | ||||||
|  |       s << "HDR_VERSION = "    << field.hdr_version    << std::endl;	\ | ||||||
|  |       s << "DATATYPE = "       << field.data_type      << std::endl;	\ | ||||||
|  |       s << "STORAGE_FORMAT = " << field.storage_format << std::endl;	\ | ||||||
|  |       for(int i=0;i<4;i++){						\ | ||||||
|  | 	s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ | ||||||
|  |       }									\ | ||||||
|  |       s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ | ||||||
|  |       s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl; \ | ||||||
|  |       for(int i=0;i<4;i++){						\ | ||||||
|  | 	s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;	\ | ||||||
|  |       }									\ | ||||||
|  | 									\ | ||||||
|  |       s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \ | ||||||
|  |       s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \ | ||||||
|  |       s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \ | ||||||
|  |       s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;	\ | ||||||
|  |       s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;	\ | ||||||
|  |       s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;	\ | ||||||
|  |       s << "CREATOR = "         << field.creator          << std::endl;	\ | ||||||
|  |       s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;	\ | ||||||
|  |       s << "CREATION_DATE = "   << field.creation_date    << std::endl;	\ | ||||||
|  |       s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;	\ | ||||||
|  |       s << "FLOATING_POINT = "  << field.floating_point   << std::endl;	\ | ||||||
|  |       s << "END_HEADER"         << std::endl; | ||||||
|  |  | ||||||
|  | template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header) | ||||||
|  | { | ||||||
|  |   GridBase *grid = field._grid; | ||||||
|  |   std::string format = getFormatString<vobj>(); | ||||||
|  |    header.floating_point = format; | ||||||
|  |    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||||
|  |    GridMetaData(grid,header);  | ||||||
|  |    MachineCharacteristics(header); | ||||||
|  |  } | ||||||
|  |  inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header) | ||||||
|  |  { | ||||||
|  |    // How to convert data precision etc... | ||||||
|  |    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data); | ||||||
|  |    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data); | ||||||
|  |  } | ||||||
|  |  inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header) | ||||||
|  |  { | ||||||
|  |    // How to convert data precision etc... | ||||||
|  |    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data); | ||||||
|  |    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data); | ||||||
|  |  } | ||||||
|  |  template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header) | ||||||
|  |  { | ||||||
|  |     | ||||||
|  |    GridBase *grid = field._grid; | ||||||
|  |    std::string format = getFormatString<vLorentzColourMatrixF>(); | ||||||
|  |    header.floating_point = format; | ||||||
|  |    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||||
|  |    GridMetaData(grid,header);  | ||||||
|  |    GaugeStatistics(field,header); | ||||||
|  |    MachineCharacteristics(header); | ||||||
|  |  } | ||||||
|  |  template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header) | ||||||
|  |  { | ||||||
|  |    GridBase *grid = field._grid; | ||||||
|  |    std::string format = getFormatString<vLorentzColourMatrixD>(); | ||||||
|  |    header.floating_point = format; | ||||||
|  |    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||||
|  |    GridMetaData(grid,header);  | ||||||
|  |    GaugeStatistics(field,header); | ||||||
|  |    MachineCharacteristics(header); | ||||||
|  |  } | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Utilities ; these are QCD aware | ||||||
|  |     ////////////////////////////////////////////////////////////////////// | ||||||
|  |     inline void reconstruct3(LorentzColourMatrix & cm) | ||||||
|  |     { | ||||||
|  |       const int x=0; | ||||||
|  |       const int y=1; | ||||||
|  |       const int z=2; | ||||||
|  |       for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy | ||||||
|  | 	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz | ||||||
|  | 	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Some data types for intermediate storage | ||||||
|  |     //////////////////////////////////////////////////////////////////////////////// | ||||||
|  |     template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >; | ||||||
|  |  | ||||||
|  |     typedef iLorentzColour2x3<Complex>  LorentzColour2x3; | ||||||
|  |     typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; | ||||||
|  |     typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; | ||||||
|  |  | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Simple classes for precision conversion | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////// | ||||||
|  | template <class fobj, class sobj> | ||||||
|  | struct BinarySimpleUnmunger { | ||||||
|  |   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; | ||||||
|  |   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; | ||||||
|  |    | ||||||
|  |   void operator()(sobj &in, fobj &out) { | ||||||
|  |     // take word by word and transform accoding to the status | ||||||
|  |     fobj_stype *out_buffer = (fobj_stype *)&out; | ||||||
|  |     sobj_stype *in_buffer = (sobj_stype *)∈ | ||||||
|  |     size_t fobj_words = sizeof(out) / sizeof(fobj_stype); | ||||||
|  |     size_t sobj_words = sizeof(in) / sizeof(sobj_stype); | ||||||
|  |     assert(fobj_words == sobj_words); | ||||||
|  |      | ||||||
|  |     for (unsigned int word = 0; word < sobj_words; word++) | ||||||
|  |       out_buffer[word] = in_buffer[word];  // type conversion on the fly | ||||||
|  |      | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <class fobj, class sobj> | ||||||
|  | struct BinarySimpleMunger { | ||||||
|  |   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; | ||||||
|  |   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; | ||||||
|  |  | ||||||
|  |   void operator()(fobj &in, sobj &out) { | ||||||
|  |     // take word by word and transform accoding to the status | ||||||
|  |     fobj_stype *in_buffer = (fobj_stype *)∈ | ||||||
|  |     sobj_stype *out_buffer = (sobj_stype *)&out; | ||||||
|  |     size_t fobj_words = sizeof(in) / sizeof(fobj_stype); | ||||||
|  |     size_t sobj_words = sizeof(out) / sizeof(sobj_stype); | ||||||
|  |     assert(fobj_words == sobj_words); | ||||||
|  |      | ||||||
|  |     for (unsigned int word = 0; word < sobj_words; word++) | ||||||
|  |       out_buffer[word] = in_buffer[word];  // type conversion on the fly | ||||||
|  |      | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     template<class fobj,class sobj> | ||||||
|  |     struct GaugeSimpleMunger{ | ||||||
|  |       void operator()(fobj &in, sobj &out) { | ||||||
|  |         for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |           for (int i = 0; i < Nc; i++) { | ||||||
|  |           for (int j = 0; j < Nc; j++) { | ||||||
|  | 	    out(mu)()(i, j) = in(mu)()(i, j); | ||||||
|  | 	  }} | ||||||
|  |         } | ||||||
|  |       }; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     template <class fobj, class sobj> | ||||||
|  |     struct GaugeSimpleUnmunger { | ||||||
|  |  | ||||||
|  |       void operator()(sobj &in, fobj &out) { | ||||||
|  |         for (int mu = 0; mu < Nd; mu++) { | ||||||
|  |           for (int i = 0; i < Nc; i++) { | ||||||
|  |           for (int j = 0; j < Nc; j++) { | ||||||
|  | 	    out(mu)()(i, j) = in(mu)()(i, j); | ||||||
|  | 	  }} | ||||||
|  |         } | ||||||
|  |       }; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     template<class fobj,class sobj> | ||||||
|  |     struct Gauge3x2munger{ | ||||||
|  |       void operator() (fobj &in,sobj &out){ | ||||||
|  | 	for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	  for(int i=0;i<2;i++){ | ||||||
|  | 	  for(int j=0;j<3;j++){ | ||||||
|  | 	    out(mu)()(i,j) = in(mu)(i)(j); | ||||||
|  | 	  }} | ||||||
|  | 	} | ||||||
|  | 	reconstruct3(out); | ||||||
|  |       } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     template<class fobj,class sobj> | ||||||
|  |     struct Gauge3x2unmunger{ | ||||||
|  |       void operator() (sobj &in,fobj &out){ | ||||||
|  | 	for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	  for(int i=0;i<2;i++){ | ||||||
|  | 	  for(int j=0;j<3;j++){ | ||||||
|  | 	    out(mu)(i)(j) = in(mu)()(i,j); | ||||||
|  | 	  }} | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |     }; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | } | ||||||
| @@ -1,4 +1,4 @@ | |||||||
|     /************************************************************************************* | /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
| @@ -6,9 +6,9 @@ | |||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Matt Spraggs <matthew.spraggs@gmail.com> |     Author: Matt Spraggs <matthew.spraggs@gmail.com> | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -25,232 +25,38 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ | *************************************************************************************/ | ||||||
|     /*  END LEGAL */ | /*  END LEGAL */ | ||||||
| #ifndef GRID_NERSC_IO_H | #ifndef GRID_NERSC_IO_H | ||||||
| #define GRID_NERSC_IO_H | #define GRID_NERSC_IO_H | ||||||
|  |  | ||||||
| #include <algorithm> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <fstream> |  | ||||||
| #include <map> |  | ||||||
|  |  | ||||||
| #include <unistd.h> |  | ||||||
| #include <sys/utsname.h> |  | ||||||
| #include <pwd.h> |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| namespace QCD { |   namespace QCD { | ||||||
|  |  | ||||||
| using namespace Grid; |     using namespace Grid; | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////////////// | ||||||
| // Some data types for intermediate storage |     // Write and read from fstream; comput header offset for payload | ||||||
| //////////////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////////////// | ||||||
|   template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >; |     class NerscIO : public BinaryIO {  | ||||||
|  |  | ||||||
|   typedef iLorentzColour2x3<Complex>  LorentzColour2x3; |  | ||||||
|   typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; |  | ||||||
|   typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // header specification/interpretation |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// |  | ||||||
| class NerscField { |  | ||||||
|  public: |  | ||||||
|     // header strings (not in order) |  | ||||||
|     int dimension[4]; |  | ||||||
|     std::string boundary[4];  |  | ||||||
|     int data_start; |  | ||||||
|     std::string hdr_version; |  | ||||||
|     std::string storage_format; |  | ||||||
|     // Checks on data |  | ||||||
|     double link_trace; |  | ||||||
|     double plaquette; |  | ||||||
|     uint32_t checksum; |  | ||||||
|     unsigned int sequence_number; |  | ||||||
|     std::string data_type; |  | ||||||
|     std::string ensemble_id ; |  | ||||||
|     std::string ensemble_label ; |  | ||||||
|     std::string creator ; |  | ||||||
|     std::string creator_hardware ; |  | ||||||
|     std::string creation_date ; |  | ||||||
|     std::string archive_date ; |  | ||||||
|     std::string floating_point; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////// |  | ||||||
| // Bit and Physical Checksumming and QA of data |  | ||||||
| ////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| inline void NerscGrid(GridBase *grid,NerscField &header) |  | ||||||
| { |  | ||||||
|   assert(grid->_ndimension==4); |  | ||||||
|   for(int d=0;d<4;d++) { |  | ||||||
|     header.dimension[d] = grid->_fdimensions[d]; |  | ||||||
|   } |  | ||||||
|   for(int d=0;d<4;d++) { |  | ||||||
|     header.boundary[d] = std::string("PERIODIC"); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| template<class GaugeField> |  | ||||||
| inline void NerscStatistics(GaugeField & data,NerscField &header) |  | ||||||
| { |  | ||||||
|   // How to convert data precision etc... |  | ||||||
|   header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data); |  | ||||||
|   header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| inline void NerscMachineCharacteristics(NerscField &header) |  | ||||||
| { |  | ||||||
|   // Who |  | ||||||
|   struct passwd *pw = getpwuid (getuid()); |  | ||||||
|   if (pw) header.creator = std::string(pw->pw_name);  |  | ||||||
|  |  | ||||||
|   // When |  | ||||||
|   std::time_t t = std::time(nullptr); |  | ||||||
|   std::tm tm = *std::localtime(&t); |  | ||||||
|   std::ostringstream oss;  |  | ||||||
|   //  oss << std::put_time(&tm, "%c %Z"); |  | ||||||
|   header.creation_date = oss.str(); |  | ||||||
|   header.archive_date  = header.creation_date; |  | ||||||
|  |  | ||||||
|   // What |  | ||||||
|   struct utsname name;  uname(&name); |  | ||||||
|   header.creator_hardware = std::string(name.nodename)+"-"; |  | ||||||
|   header.creator_hardware+= std::string(name.machine)+"-"; |  | ||||||
|   header.creator_hardware+= std::string(name.sysname)+"-"; |  | ||||||
|   header.creator_hardware+= std::string(name.release); |  | ||||||
|  |  | ||||||
| } |  | ||||||
| ////////////////////////////////////////////////////////////////////// |  | ||||||
| // Utilities ; these are QCD aware |  | ||||||
| ////////////////////////////////////////////////////////////////////// |  | ||||||
|     inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) |  | ||||||
|     { |  | ||||||
|       BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum); |  | ||||||
|     } |  | ||||||
|     inline void reconstruct3(LorentzColourMatrix & cm) |  | ||||||
|     { |  | ||||||
|       const int x=0; |  | ||||||
|       const int y=1; |  | ||||||
|       const int z=2; |  | ||||||
|       for(int mu=0;mu<4;mu++){ |  | ||||||
| 	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy |  | ||||||
| 	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz |  | ||||||
| 	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     template<class fobj,class sobj> |  | ||||||
|     struct NerscSimpleMunger{ |  | ||||||
|  |  | ||||||
|       void operator() (fobj &in,sobj &out,uint32_t &csum){ |  | ||||||
|  |  | ||||||
|       for(int mu=0;mu<4;mu++){ |  | ||||||
|       for(int i=0;i<3;i++){ |  | ||||||
|       for(int j=0;j<3;j++){ |  | ||||||
| 	out(mu)()(i,j) = in(mu)()(i,j); |  | ||||||
|       }}} |  | ||||||
|       NerscChecksum((uint32_t *)&in,sizeof(in),csum);  |  | ||||||
|       }; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class fobj,class sobj> |  | ||||||
|     struct NerscSimpleUnmunger{ |  | ||||||
|       void operator() (sobj &in,fobj &out,uint32_t &csum){ |  | ||||||
| 	for(int mu=0;mu<Nd;mu++){ |  | ||||||
| 	for(int i=0;i<Nc;i++){ |  | ||||||
| 	for(int j=0;j<Nc;j++){ |  | ||||||
| 	  out(mu)()(i,j) = in(mu)()(i,j); |  | ||||||
| 	}}} |  | ||||||
| 	NerscChecksum((uint32_t *)&out,sizeof(out),csum);  |  | ||||||
|       }; |  | ||||||
|     }; |  | ||||||
|   |  | ||||||
|     template<class fobj,class sobj> |  | ||||||
|     struct Nersc3x2munger{ |  | ||||||
|       void operator() (fobj &in,sobj &out,uint32_t &csum){ |  | ||||||
|       |  | ||||||
| 	NerscChecksum((uint32_t *)&in,sizeof(in),csum);  |  | ||||||
|  |  | ||||||
| 	for(int mu=0;mu<4;mu++){ |  | ||||||
| 	  for(int i=0;i<2;i++){ |  | ||||||
| 	    for(int j=0;j<3;j++){ |  | ||||||
| 	      out(mu)()(i,j) = in(mu)(i)(j); |  | ||||||
| 	    }} |  | ||||||
| 	} |  | ||||||
| 	reconstruct3(out); |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class fobj,class sobj> |  | ||||||
|     struct Nersc3x2unmunger{ |  | ||||||
|  |  | ||||||
|       void operator() (sobj &in,fobj &out,uint32_t &csum){ |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	for(int mu=0;mu<4;mu++){ |  | ||||||
| 	  for(int i=0;i<2;i++){ |  | ||||||
| 	    for(int j=0;j<3;j++){ |  | ||||||
| 	      out(mu)(i)(j) = in(mu)()(i,j); |  | ||||||
| 	    }} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	NerscChecksum((uint32_t *)&out,sizeof(out),csum);  |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Write and read from fstream; comput header offset for payload |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// |  | ||||||
| class NerscIO : public BinaryIO {  |  | ||||||
|     public: |     public: | ||||||
|  |  | ||||||
|       static inline void truncate(std::string file){ |       static inline void truncate(std::string file){ | ||||||
| 	std::ofstream fout(file,std::ios::out); | 	std::ofstream fout(file,std::ios::out); | ||||||
|       } |       } | ||||||
|    |    | ||||||
|   #define dump_nersc_header(field, s)\ |       static inline unsigned int writeHeader(FieldMetaData &field,std::string file) | ||||||
|   s << "BEGIN_HEADER"      << std::endl;\ |  | ||||||
|   s << "HDR_VERSION = "    << field.hdr_version    << std::endl;\ |  | ||||||
|   s << "DATATYPE = "       << field.data_type      << std::endl;\ |  | ||||||
|   s << "STORAGE_FORMAT = " << field.storage_format << std::endl;\ |  | ||||||
|   for(int i=0;i<4;i++){\ |  | ||||||
|     s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;\ |  | ||||||
|   }\ |  | ||||||
|   s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;\ |  | ||||||
|   s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;\ |  | ||||||
|   for(int i=0;i<4;i++){\ |  | ||||||
|     s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;\ |  | ||||||
|   }\ |  | ||||||
|   \ |  | ||||||
|   s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;\ |  | ||||||
|   s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;\ |  | ||||||
|   s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;\ |  | ||||||
|   s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;\ |  | ||||||
|   s << "CREATOR = "         << field.creator          << std::endl;\ |  | ||||||
|   s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;\ |  | ||||||
|   s << "CREATION_DATE = "   << field.creation_date    << std::endl;\ |  | ||||||
|   s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;\ |  | ||||||
|   s << "FLOATING_POINT = "  << field.floating_point   << std::endl;\ |  | ||||||
|   s << "END_HEADER"         << std::endl; |  | ||||||
|    |  | ||||||
|   static inline unsigned int writeHeader(NerscField &field,std::string file) |  | ||||||
|       { |       { | ||||||
|       std::ofstream fout(file,std::ios::out|std::ios::in); |       std::ofstream fout(file,std::ios::out|std::ios::in); | ||||||
|       fout.seekp(0,std::ios::beg); |       fout.seekp(0,std::ios::beg); | ||||||
|     dump_nersc_header(field, fout); |       dump_meta_data(field, fout); | ||||||
|       field.data_start = fout.tellp(); |       field.data_start = fout.tellp(); | ||||||
|       return field.data_start; |       return field.data_start; | ||||||
| } |     } | ||||||
|  |  | ||||||
| // for the header-reader |       // for the header-reader | ||||||
| static inline int readHeader(std::string file,GridBase *grid,  NerscField &field) |       static inline int readHeader(std::string file,GridBase *grid,  FieldMetaData &field) | ||||||
| { |       { | ||||||
|       int offset=0; |       int offset=0; | ||||||
|       std::map<std::string,std::string> header; |       std::map<std::string,std::string> header; | ||||||
|       std::string line; |       std::string line; | ||||||
| @@ -319,23 +125,23 @@ static inline int readHeader(std::string file,GridBase *grid,  NerscField &field | |||||||
|       field.floating_point   = header["FLOATING_POINT"]; |       field.floating_point   = header["FLOATING_POINT"]; | ||||||
|  |  | ||||||
|       return field.data_start; |       return field.data_start; | ||||||
| } |     } | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Now the meat: the object readers |     // Now the meat: the object readers | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| #define PARALLEL_READ |  | ||||||
| #define PARALLEL_WRITE |  | ||||||
|  |  | ||||||
| template<class vsimd> |     template<class vsimd> | ||||||
| static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file) |     static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, | ||||||
| { | 					 FieldMetaData& header, | ||||||
|  | 					 std::string file) | ||||||
|  |     { | ||||||
|       typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; |       typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||||
|  |  | ||||||
|       GridBase *grid = Umu._grid; |       GridBase *grid = Umu._grid; | ||||||
|       int offset = readHeader(file,Umu._grid,header); |       int offset = readHeader(file,Umu._grid,header); | ||||||
|  |  | ||||||
|   NerscField clone(header); |       FieldMetaData clone(header); | ||||||
|  |  | ||||||
|       std::string format(header.floating_point); |       std::string format(header.floating_point); | ||||||
|  |  | ||||||
| @@ -344,177 +150,172 @@ static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, | |||||||
|       int ieee64big = (format == std::string("IEEE64BIG")); |       int ieee64big = (format == std::string("IEEE64BIG")); | ||||||
|       int ieee64    = (format == std::string("IEEE64")); |       int ieee64    = (format == std::string("IEEE64")); | ||||||
|  |  | ||||||
|   uint32_t csum; |       uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||||
|       // depending on datatype, set up munger; |       // depending on datatype, set up munger; | ||||||
|       // munger is a function of <floating point, Real, data_type> |       // munger is a function of <floating point, Real, data_type> | ||||||
|       if ( header.data_type == std::string("4D_SU3_GAUGE") ) { |       if ( header.data_type == std::string("4D_SU3_GAUGE") ) { | ||||||
| 	if ( ieee32 || ieee32big ) { | 	if ( ieee32 || ieee32big ) { | ||||||
| #ifdef PARALLEL_READ | 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | ||||||
|       csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | 	    (Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format, | ||||||
| 	(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); | 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||||
| #else |  | ||||||
|       csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  |  | ||||||
| 	(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); |  | ||||||
| #endif |  | ||||||
| 	} | 	} | ||||||
| 	if ( ieee64 || ieee64big ) { | 	if ( ieee64 || ieee64big ) { | ||||||
| #ifdef PARALLEL_READ | 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | ||||||
|       csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | 	    (Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format, | ||||||
|       	(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); | 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||||
| #else  |  | ||||||
|       csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  |  | ||||||
|       	(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); |  | ||||||
| #endif |  | ||||||
| 	} | 	} | ||||||
|       } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { |       } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { | ||||||
| 	if ( ieee32 || ieee32big ) { | 	if ( ieee32 || ieee32big ) { | ||||||
| #ifdef PARALLEL_READ | 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | ||||||
|       csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | 	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format, | ||||||
| 	(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); | 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||||
| #else |  | ||||||
|       csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> |  | ||||||
| 	(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); |  | ||||||
| #endif |  | ||||||
| 	} | 	} | ||||||
| 	if ( ieee64 || ieee64big ) { | 	if ( ieee64 || ieee64big ) { | ||||||
| #ifdef PARALLEL_READ | 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | ||||||
|       csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | 	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format, | ||||||
| 	(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); | 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||||
| #else |  | ||||||
|       csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> |  | ||||||
| 	(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); |  | ||||||
| #endif |  | ||||||
| 	} | 	} | ||||||
|       } else { |       } else { | ||||||
| 	assert(0); | 	assert(0); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|   NerscStatistics<GaugeField>(Umu,clone); |       GaugeStatistics(Umu,clone); | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<            csum<< std::dec |       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec | ||||||
| 	       <<" header   "<<std::hex<<header.checksum<<std::dec <<std::endl; | 	       <<" header   "<<std::hex<<header.checksum<<std::dec <<std::endl; | ||||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette |       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette | ||||||
| 	       <<" header    "<<header.plaquette<<std::endl; | 	       <<" header    "<<header.plaquette<<std::endl; | ||||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace |       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace | ||||||
| 	       <<" header    "<<header.link_trace<<std::endl; | 	       <<" header    "<<header.link_trace<<std::endl; | ||||||
|  |  | ||||||
|  |       if ( fabs(clone.plaquette -header.plaquette ) >=  1.0e-5 ) {  | ||||||
|  | 	std::cout << " Plaquette mismatch "<<std::endl; | ||||||
|  | 	std::cout << Umu[0]<<std::endl; | ||||||
|  | 	std::cout << Umu[1]<<std::endl; | ||||||
|  |       } | ||||||
|  |       if ( nersc_csum != header.checksum ) {  | ||||||
|  | 	std::cerr << " checksum mismatch " << std::endl; | ||||||
|  | 	std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl; | ||||||
|  | 	std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl; | ||||||
|  | 	std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; | ||||||
|  | 	exit(0); | ||||||
|  |       } | ||||||
|       assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); |       assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); | ||||||
|       assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); |       assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); | ||||||
|   assert(csum == header.checksum ); |       assert(nersc_csum == header.checksum ); | ||||||
|        |        | ||||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; |       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; | ||||||
| } |     } | ||||||
|  |  | ||||||
| template<class vsimd> |       template<class vsimd> | ||||||
| static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32) |       static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, | ||||||
| { | 					    std::string file,  | ||||||
|  | 					    int two_row, | ||||||
|  | 					    int bits32) | ||||||
|  |       { | ||||||
| 	typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | 	typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||||
|  |  | ||||||
| 	typedef iLorentzColourMatrix<vsimd> vobj; | 	typedef iLorentzColourMatrix<vsimd> vobj; | ||||||
| 	typedef typename vobj::scalar_object sobj; | 	typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
|  | 	FieldMetaData header; | ||||||
|  | 	/////////////////////////////////////////// | ||||||
| 	// Following should become arguments | 	// Following should become arguments | ||||||
|   NerscField header; | 	/////////////////////////////////////////// | ||||||
| 	header.sequence_number = 1; | 	header.sequence_number = 1; | ||||||
| 	header.ensemble_id     = "UKQCD"; | 	header.ensemble_id     = "UKQCD"; | ||||||
| 	header.ensemble_label  = "DWF"; | 	header.ensemble_label  = "DWF"; | ||||||
|  |  | ||||||
| 	typedef LorentzColourMatrixD fobj3D; | 	typedef LorentzColourMatrixD fobj3D; | ||||||
| 	typedef LorentzColour2x3D    fobj2D; | 	typedef LorentzColour2x3D    fobj2D; | ||||||
|   typedef LorentzColourMatrixF fobj3f; |  | ||||||
|   typedef LorentzColour2x3F    fobj2f; |  | ||||||
|    |    | ||||||
| 	GridBase *grid = Umu._grid; | 	GridBase *grid = Umu._grid; | ||||||
|  |  | ||||||
|   NerscGrid(grid,header); | 	GridMetaData(grid,header); | ||||||
|   NerscStatistics<GaugeField>(Umu,header); | 	assert(header.nd==4); | ||||||
|   NerscMachineCharacteristics(header); | 	GaugeStatistics(Umu,header); | ||||||
|  | 	MachineCharacteristics(header); | ||||||
|  |  | ||||||
|   uint32_t csum; |  | ||||||
| 	int offset; | 	int offset; | ||||||
|    |    | ||||||
| 	truncate(file); | 	truncate(file); | ||||||
|  |  | ||||||
|   if ( two_row ) {  | 	// Sod it -- always write 3x3 double | ||||||
|  |  | ||||||
|     header.floating_point = std::string("IEEE64BIG"); |  | ||||||
|     header.data_type      = std::string("4D_SU3_GAUGE"); |  | ||||||
|     Nersc3x2unmunger<fobj2D,sobj> munge; |  | ||||||
|     BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum); |  | ||||||
|     offset = writeHeader(header,file); |  | ||||||
| #ifdef PARALLEL_WRITE |  | ||||||
|     csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); |  | ||||||
| #else |  | ||||||
|     csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
|   } else {  |  | ||||||
| 	header.floating_point = std::string("IEEE64BIG"); | 	header.floating_point = std::string("IEEE64BIG"); | ||||||
| 	header.data_type      = std::string("4D_SU3_GAUGE_3x3"); | 	header.data_type      = std::string("4D_SU3_GAUGE_3x3"); | ||||||
|     NerscSimpleUnmunger<fobj3D,sobj> munge; | 	GaugeSimpleUnmunger<fobj3D,sobj> munge; | ||||||
|     BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum); |  | ||||||
| 	offset = writeHeader(header,file); | 	offset = writeHeader(header,file); | ||||||
| #ifdef PARALLEL_WRITE |  | ||||||
|     csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); |  | ||||||
| #else |  | ||||||
|     csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); |  | ||||||
| #endif |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl; | 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||||
|  | 	BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point, | ||||||
|  | 								  nersc_csum,scidac_csuma,scidac_csumb); | ||||||
|  | 	header.checksum = nersc_csum; | ||||||
|  | 	writeHeader(header,file); | ||||||
|  |  | ||||||
|  | 	std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum " | ||||||
|  | 		 <<std::hex<<header.checksum | ||||||
|  | 		 <<std::dec<<" plaq "<< header.plaquette <<std::endl; | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|  |  | ||||||
|       /////////////////////////////// |       /////////////////////////////// | ||||||
|       // RNG state |       // RNG state | ||||||
|       /////////////////////////////// |       /////////////////////////////// | ||||||
| static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file) |       static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file) | ||||||
| { |       { | ||||||
| 	typedef typename GridParallelRNG::RngStateType RngStateType; | 	typedef typename GridParallelRNG::RngStateType RngStateType; | ||||||
|  |  | ||||||
| 	// Following should become arguments | 	// Following should become arguments | ||||||
|   NerscField header; | 	FieldMetaData header; | ||||||
| 	header.sequence_number = 1; | 	header.sequence_number = 1; | ||||||
| 	header.ensemble_id     = "UKQCD"; | 	header.ensemble_id     = "UKQCD"; | ||||||
| 	header.ensemble_label  = "DWF"; | 	header.ensemble_label  = "DWF"; | ||||||
|  |  | ||||||
| 	GridBase *grid = parallel._grid; | 	GridBase *grid = parallel._grid; | ||||||
|  |  | ||||||
|   NerscGrid(grid,header); | 	GridMetaData(grid,header); | ||||||
|  | 	assert(header.nd==4); | ||||||
| 	header.link_trace=0.0; | 	header.link_trace=0.0; | ||||||
| 	header.plaquette=0.0; | 	header.plaquette=0.0; | ||||||
|   NerscMachineCharacteristics(header); | 	MachineCharacteristics(header); | ||||||
|  |  | ||||||
|   uint32_t csum; |  | ||||||
| 	int offset; | 	int offset; | ||||||
|    |    | ||||||
| #ifdef RNG_RANLUX | #ifdef RNG_RANLUX | ||||||
| 	header.floating_point = std::string("UINT64"); | 	header.floating_point = std::string("UINT64"); | ||||||
| 	header.data_type      = std::string("RANLUX48"); | 	header.data_type      = std::string("RANLUX48"); | ||||||
| #else | #endif | ||||||
|  | #ifdef RNG_MT19937 | ||||||
| 	header.floating_point = std::string("UINT32"); | 	header.floating_point = std::string("UINT32"); | ||||||
| 	header.data_type      = std::string("MT19937"); | 	header.data_type      = std::string("MT19937"); | ||||||
| #endif | #endif | ||||||
|  | #ifdef RNG_SITMO | ||||||
|  | 	header.floating_point = std::string("UINT64"); | ||||||
|  | 	header.data_type      = std::string("SITMO"); | ||||||
|  | #endif | ||||||
|  |  | ||||||
| 	truncate(file); | 	truncate(file); | ||||||
| 	offset = writeHeader(header,file); | 	offset = writeHeader(header,file); | ||||||
|   csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset); | 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||||
|   header.checksum = csum; | 	BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); | ||||||
|  | 	header.checksum = nersc_csum; | ||||||
| 	offset = writeHeader(header,file); | 	offset = writeHeader(header,file); | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl; | 	std::cout<<GridLogMessage  | ||||||
|  | 		 <<"Written NERSC RNG STATE "<<file<< " checksum " | ||||||
|  | 		 <<std::hex<<header.checksum | ||||||
|  | 		 <<std::dec<<std::endl; | ||||||
|  |  | ||||||
|       } |       } | ||||||
|      |      | ||||||
| static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file) |       static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file) | ||||||
| { |       { | ||||||
| 	typedef typename GridParallelRNG::RngStateType RngStateType; | 	typedef typename GridParallelRNG::RngStateType RngStateType; | ||||||
|  |  | ||||||
| 	GridBase *grid = parallel._grid; | 	GridBase *grid = parallel._grid; | ||||||
|  |  | ||||||
| 	int offset = readHeader(file,grid,header); | 	int offset = readHeader(file,grid,header); | ||||||
|  |  | ||||||
|   NerscField clone(header); | 	FieldMetaData clone(header); | ||||||
|  |  | ||||||
| 	std::string format(header.floating_point); | 	std::string format(header.floating_point); | ||||||
| 	std::string data_type(header.data_type); | 	std::string data_type(header.data_type); | ||||||
| @@ -522,22 +323,31 @@ static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel | |||||||
| #ifdef RNG_RANLUX | #ifdef RNG_RANLUX | ||||||
| 	assert(format == std::string("UINT64")); | 	assert(format == std::string("UINT64")); | ||||||
| 	assert(data_type == std::string("RANLUX48")); | 	assert(data_type == std::string("RANLUX48")); | ||||||
| #else | #endif | ||||||
|  | #ifdef RNG_MT19937 | ||||||
| 	assert(format == std::string("UINT32")); | 	assert(format == std::string("UINT32")); | ||||||
| 	assert(data_type == std::string("MT19937")); | 	assert(data_type == std::string("MT19937")); | ||||||
| #endif | #endif | ||||||
|  | #ifdef RNG_SITMO | ||||||
|  | 	assert(format == std::string("UINT64")); | ||||||
|  | 	assert(data_type == std::string("SITMO")); | ||||||
|  | #endif | ||||||
|  |  | ||||||
| 	// depending on datatype, set up munger; | 	// depending on datatype, set up munger; | ||||||
| 	// munger is a function of <floating point, Real, data_type> | 	// munger is a function of <floating point, Real, data_type> | ||||||
|   uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); | 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||||
|  | 	BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); | ||||||
|  |  | ||||||
|   assert(csum == header.checksum ); | 	if ( nersc_csum != header.checksum ) {  | ||||||
|  | 	  std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl; | ||||||
|  | 	  exit(0); | ||||||
|  | 	} | ||||||
|  | 	assert(nersc_csum == header.checksum ); | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl; | 	std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl; | ||||||
| } |       } | ||||||
|  |  | ||||||
| }; |     }; | ||||||
|  |  | ||||||
|  |   }} | ||||||
| }} |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -205,13 +205,14 @@ public: | |||||||
|   void Stop(void) { |   void Stop(void) { | ||||||
|     count=0; |     count=0; | ||||||
|     cycles=0; |     cycles=0; | ||||||
|     size_t ign; |  | ||||||
| #ifdef __linux__ | #ifdef __linux__ | ||||||
|  |     ssize_t ign; | ||||||
|     if ( fd!= -1) { |     if ( fd!= -1) { | ||||||
|       ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); |       ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); | ||||||
|       ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); |       ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); | ||||||
|       ign=::read(fd, &count, sizeof(long long)); |       ign=::read(fd, &count, sizeof(long long)); | ||||||
|       ign=::read(cyclefd, &cycles, sizeof(long long)); |       ign+=::read(cyclefd, &cycles, sizeof(long long)); | ||||||
|  |       assert(ign=2*sizeof(long long)); | ||||||
|     } |     } | ||||||
|     elapsed = cyclecount() - begin; |     elapsed = cyclecount() - begin; | ||||||
| #else | #else | ||||||
|   | |||||||
							
								
								
									
										124
									
								
								lib/qcd/LatticeTheories.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								lib/qcd/LatticeTheories.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,124 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/qcd/QCD.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||||
|  | Author: neo <cossu@post.kek.jp> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef GRID_LT_H | ||||||
|  | #define GRID_LT_H | ||||||
|  | namespace Grid{ | ||||||
|  |  | ||||||
|  | // First steps in the complete generalization of the Physics part | ||||||
|  | // Design not final | ||||||
|  | namespace LatticeTheories { | ||||||
|  |  | ||||||
|  | template <int Dimensions> | ||||||
|  | struct LatticeTheory { | ||||||
|  |   static const int Nd = Dimensions; | ||||||
|  |   static const int Nds = Dimensions * 2;  // double stored field | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iSinglet = iScalar<iScalar<iScalar<vtype> > >; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <int Dimensions, int Colours> | ||||||
|  | struct LatticeGaugeTheory : public LatticeTheory<Dimensions> { | ||||||
|  |   static const int Nds = Dimensions * 2; | ||||||
|  |   static const int Nd = Dimensions; | ||||||
|  |   static const int Nc = Colours; | ||||||
|  |  | ||||||
|  |   template <typename vtype>  | ||||||
|  |   using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > >; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd>; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <int Dimensions, int Colours, int Spin> | ||||||
|  | struct FermionicLatticeGaugeTheory | ||||||
|  |     : public LatticeGaugeTheory<Dimensions, Colours> { | ||||||
|  |   static const int Nd = Dimensions; | ||||||
|  |   static const int Nds = Dimensions * 2; | ||||||
|  |   static const int Nc = Colours; | ||||||
|  |   static const int Ns = Spin; | ||||||
|  |  | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >; | ||||||
|  |   // These 2 only if Spin is a multiple of 2 | ||||||
|  |   static const int Nhs = Spin / 2; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >; | ||||||
|  |   template <typename vtype> | ||||||
|  |   using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >; | ||||||
|  |  | ||||||
|  |   //tests | ||||||
|  |   typedef iColourMatrix<Complex> ColourMatrix; | ||||||
|  |   typedef iColourMatrix<ComplexF> ColourMatrixF; | ||||||
|  |   typedef iColourMatrix<ComplexD> ColourMatrixD; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Examples, not complete now. | ||||||
|  | struct QCD : public FermionicLatticeGaugeTheory<4, 3, 4> { | ||||||
|  |     static const int Xp = 0; | ||||||
|  |     static const int Yp = 1; | ||||||
|  |     static const int Zp = 2; | ||||||
|  |     static const int Tp = 3; | ||||||
|  |     static const int Xm = 4; | ||||||
|  |     static const int Ym = 5; | ||||||
|  |     static const int Zm = 6; | ||||||
|  |     static const int Tm = 7; | ||||||
|  |  | ||||||
|  |     typedef FermionicLatticeGaugeTheory FLGT; | ||||||
|  |  | ||||||
|  |     typedef FLGT::iSpinMatrix<Complex  >          SpinMatrix; | ||||||
|  |     typedef FLGT::iSpinMatrix<ComplexF >          SpinMatrixF; | ||||||
|  |     typedef FLGT::iSpinMatrix<ComplexD >          SpinMatrixD; | ||||||
|  |  | ||||||
|  | }; | ||||||
|  | struct QED : public FermionicLatticeGaugeTheory<4, 1, 4> {//fill | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <int Dimensions> | ||||||
|  | struct Scalar : public LatticeTheory<Dimensions> {}; | ||||||
|  |  | ||||||
|  | };  // LatticeTheories | ||||||
|  |  | ||||||
|  | } // Grid | ||||||
|  |  | ||||||
|  | #endif | ||||||
| @@ -32,9 +32,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_QCD_BASE_H | #ifndef GRID_QCD_BASE_H | ||||||
| #define GRID_QCD_BASE_H | #define GRID_QCD_BASE_H | ||||||
| namespace Grid{ | namespace Grid{ | ||||||
|  |  | ||||||
| namespace QCD { | namespace QCD { | ||||||
|  |  | ||||||
|  |     static const int Xdir = 0; | ||||||
|  |     static const int Ydir = 1; | ||||||
|  |     static const int Zdir = 2; | ||||||
|  |     static const int Tdir = 3; | ||||||
|  |  | ||||||
|     static const int Xp = 0; |     static const int Xp = 0; | ||||||
|     static const int Yp = 1; |     static const int Yp = 1; | ||||||
| @@ -492,6 +495,38 @@ namespace QCD { | |||||||
| }   //namespace QCD | }   //namespace QCD | ||||||
| } // Grid | } // Grid | ||||||
|  |  | ||||||
|  | /* | ||||||
|  | <<<<<<< HEAD | ||||||
|  | #include <Grid/qcd/utils/SpaceTimeGrid.h> | ||||||
|  | #include <Grid/qcd/spin/Dirac.h> | ||||||
|  | #include <Grid/qcd/spin/TwoSpinor.h> | ||||||
|  | #include <Grid/qcd/utils/LinalgUtils.h> | ||||||
|  | #include <Grid/qcd/utils/CovariantCshift.h> | ||||||
|  |  | ||||||
|  | // Include representations   | ||||||
|  | #include <Grid/qcd/utils/SUn.h> | ||||||
|  | #include <Grid/qcd/utils/SUnAdjoint.h> | ||||||
|  | #include <Grid/qcd/utils/SUnTwoIndex.h> | ||||||
|  | #include <Grid/qcd/representations/hmc_types.h> | ||||||
|  |  | ||||||
|  | // Scalar field | ||||||
|  | #include <Grid/qcd/utils/ScalarObjs.h> | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/action/Actions.h> | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/smearing/Smearing.h> | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/hmc/integrators/Integrator.h> | ||||||
|  | #include <Grid/qcd/hmc/integrators/Integrator_algorithm.h> | ||||||
|  | #include <Grid/qcd/observables/hmc_observable.h> | ||||||
|  | #include <Grid/qcd/hmc/HMC.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  | //#include <Grid/qcd/modules/mods.h> | ||||||
|  | ======= | ||||||
|  |  | ||||||
|  | >>>>>>> develop | ||||||
|  | */ | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -4,10 +4,11 @@ Grid physics library, www.github.com/paboyle/Grid | |||||||
|  |  | ||||||
| Source file: ./lib/qcd/action/ActionBase.h | Source file: ./lib/qcd/action/ActionBase.h | ||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015-2016 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: neo <cossu@post.kek.jp> | Author: neo <cossu@post.kek.jp> | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -27,128 +28,29 @@ See the full license in the file "LICENSE" in the top level distribution | |||||||
| directory | directory | ||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
| #ifndef QCD_ACTION_BASE |  | ||||||
| #define QCD_ACTION_BASE | #ifndef ACTION_BASE_H | ||||||
|  | #define ACTION_BASE_H | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| namespace QCD { | namespace QCD { | ||||||
|  |  | ||||||
| template <class GaugeField> | template <class GaugeField > | ||||||
| class Action { | class Action  | ||||||
|  | { | ||||||
|  |  | ||||||
|  public: |  public: | ||||||
|   bool is_smeared = false; |   bool is_smeared = false; | ||||||
|   // Boundary conditions? // Heatbath? |   // Heatbath? | ||||||
|   virtual void refresh(const GaugeField& U, |   virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions | ||||||
|                        GridParallelRNG& pRNG) = 0;  // refresh pseudofermions |  | ||||||
|   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action |   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action | ||||||
|   virtual void deriv(const GaugeField& U, |   virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0;        // evaluate the action derivative | ||||||
|                      GaugeField& dSdU) = 0;  // evaluate the action derivative |   virtual std::string action_name()    = 0;                             // return the action name | ||||||
|   virtual ~Action(){}; |   virtual std::string LogParameters()  = 0;                             // prints action parameters | ||||||
|  |   virtual ~Action(){} | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Indexing of tuple types |  | ||||||
| template <class T, class Tuple> |  | ||||||
| struct Index; |  | ||||||
|  |  | ||||||
| template <class T, class... Types> |  | ||||||
| struct Index<T, std::tuple<T, Types...>> { |  | ||||||
|   static const std::size_t value = 0; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <class T, class U, class... Types> |  | ||||||
| struct Index<T, std::tuple<U, Types...>> { |  | ||||||
|   static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /* |  | ||||||
| template <class GaugeField> |  | ||||||
| struct ActionLevel { |  | ||||||
|  public: |  | ||||||
|   typedef Action<GaugeField>* |  | ||||||
|       ActPtr;  // now force the same colours as the rest of the code |  | ||||||
|  |  | ||||||
|   //Add supported representations here |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   unsigned int multiplier; |  | ||||||
|  |  | ||||||
|   std::vector<ActPtr> actions; |  | ||||||
|  |  | ||||||
|   ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) { |  | ||||||
|     assert(mul >= 1); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void push_back(ActPtr ptr) { actions.push_back(ptr); } |  | ||||||
| }; |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| template <class GaugeField, class Repr = NoHirep > |  | ||||||
| struct ActionLevel { |  | ||||||
|  public: |  | ||||||
|   unsigned int multiplier;  |  | ||||||
|  |  | ||||||
|   // Fundamental repr actions separated because of the smearing |  | ||||||
|   typedef Action<GaugeField>* ActPtr; |  | ||||||
|  |  | ||||||
|   // construct a tuple of vectors of the actions for the corresponding higher |  | ||||||
|   // representation fields |  | ||||||
|   typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection; |  | ||||||
|   action_collection actions_hirep; |  | ||||||
|   typedef typename  AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types; |  | ||||||
|  |  | ||||||
|   std::vector<ActPtr>& actions; |  | ||||||
|  |  | ||||||
|   // Temporary conversion between ActionLevel and ActionLevelHirep |  | ||||||
|   //ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){} |  | ||||||
|  |  | ||||||
|   ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) { |  | ||||||
|     // initialize the hirep vectors to zero. |  | ||||||
|     //apply(this->resize, actions_hirep, 0); //need a working resize |  | ||||||
|     assert(mul >= 1); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   //void push_back(ActPtr ptr) { actions.push_back(ptr); } |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   template < class Field > |  | ||||||
|   void push_back(Action<Field>* ptr) { |  | ||||||
|     // insert only in the correct vector |  | ||||||
|     std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   |  | ||||||
|  |  | ||||||
|   template < class ActPtr> |  | ||||||
|   static void resize(ActPtr ap, unsigned int n){ |  | ||||||
|     ap->resize(n); |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   //template <std::size_t I> |  | ||||||
|   //auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U)  {return std::get<I>(R).U;} |  | ||||||
|  |  | ||||||
|   // Loop on tuple for a callable function |  | ||||||
|   template <std::size_t I = 1, typename Callable, typename ...Args> |  | ||||||
|   inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply( |  | ||||||
|       Callable, Repr& R,Args&...) const {} |  | ||||||
|  |  | ||||||
|   template <std::size_t I = 1, typename Callable, typename ...Args> |  | ||||||
|   inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply( |  | ||||||
|       Callable fn, Repr& R, Args&... arguments) const { |  | ||||||
|     fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...); |  | ||||||
|     apply<I + 1>(fn, R, arguments...); |  | ||||||
|   }   |  | ||||||
|  |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| //template <class GaugeField> |  | ||||||
| //using ActionSet = std::vector<ActionLevel<GaugeField> >; |  | ||||||
|  |  | ||||||
| template <class GaugeField, class R> |  | ||||||
| using ActionSet = std::vector<ActionLevel<GaugeField, R> >; |  | ||||||
|  |  | ||||||
| } | } | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif // ACTION_BASE_H | ||||||
|   | |||||||
| @@ -31,15 +31,31 @@ directory | |||||||
| #define QCD_ACTION_CORE | #define QCD_ACTION_CORE | ||||||
|  |  | ||||||
| #include <Grid/qcd/action/ActionBase.h> | #include <Grid/qcd/action/ActionBase.h> | ||||||
|  | #include <Grid/qcd/action/ActionSet.h> | ||||||
| #include <Grid/qcd/action/ActionParams.h> | #include <Grid/qcd/action/ActionParams.h> | ||||||
|  |  | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| // Gauge Actions | // Gauge Actions | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| #include <Grid/qcd/action/gauge/Gauge.h> | #include <Grid/qcd/action/gauge/Gauge.h> | ||||||
|  |  | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| // Fermion prereqs | // Fermion prereqs | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> | #include <Grid/qcd/action/fermion/FermionCore.h> | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////// | ||||||
|  | // Scalar Actions | ||||||
|  | //////////////////////////////////////////// | ||||||
|  | #include <Grid/qcd/action/scalar/Scalar.h> | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////// | ||||||
|  | // Utility functions | ||||||
|  | //////////////////////////////////////////// | ||||||
|  | #include <Grid/qcd/utils/Metric.h> | ||||||
|  | #include <Grid/qcd/utils/CovariantLaplacian.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -1,31 +1,34 @@ | |||||||
|     /************************************************************************************* | /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|     Source file: ./lib/qcd/action/ActionParams.h | Source file: ./lib/qcd/action/ActionParams.h | ||||||
|  |  | ||||||
|     Copyright (C) 2015 | Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or | the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. | (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, | This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. | GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along | You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_QCD_ACTION_PARAMS_H | #ifndef GRID_QCD_ACTION_PARAMS_H | ||||||
| #define GRID_QCD_ACTION_PARAMS_H | #define GRID_QCD_ACTION_PARAMS_H | ||||||
|  |  | ||||||
| @@ -36,32 +39,54 @@ namespace QCD { | |||||||
|   struct GparityWilsonImplParams { |   struct GparityWilsonImplParams { | ||||||
|     bool overlapCommsCompute; |     bool overlapCommsCompute; | ||||||
|     std::vector<int> twists; |     std::vector<int> twists; | ||||||
|       GparityWilsonImplParams () : twists(Nd,0), overlapCommsCompute(false) {}; |     GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){}; | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   struct WilsonImplParams { |   struct WilsonImplParams { | ||||||
|     bool overlapCommsCompute; |     bool overlapCommsCompute; | ||||||
|       WilsonImplParams() : overlapCommsCompute(false) {}; |     std::vector<Complex> boundary_phases; | ||||||
|  |     WilsonImplParams() : overlapCommsCompute(false) { | ||||||
|  |       boundary_phases.resize(Nd, 1.0); | ||||||
|  |     }; | ||||||
|  |     WilsonImplParams(const std::vector<Complex> phi) | ||||||
|  |       : boundary_phases(phi), overlapCommsCompute(false) {} | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   struct StaggeredImplParams { |   struct StaggeredImplParams { | ||||||
|     StaggeredImplParams()  {}; |     StaggeredImplParams()  {}; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|     struct OneFlavourRationalParams {  |   struct OneFlavourRationalParams : Serializable { | ||||||
|       RealD  lo; |     GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,  | ||||||
|       RealD  hi; | 				    RealD, lo,  | ||||||
|       int MaxIter;   // Vector? | 				    RealD, hi,  | ||||||
|       RealD tolerance; // Vector?  | 				    int,   MaxIter,  | ||||||
|       int    degree=10; | 				    RealD, tolerance,  | ||||||
|       int precision=64; | 				    int,   degree,  | ||||||
|  | 				    int,   precision); | ||||||
|      |      | ||||||
|       OneFlavourRationalParams (RealD _lo,RealD _hi,int _maxit,RealD tol=1.0e-8,int _degree = 10,int _precision=64) : |     // MaxIter and tolerance, vectors?? | ||||||
|         lo(_lo), hi(_hi), MaxIter(_maxit), tolerance(tol), degree(_degree), precision(_precision) |      | ||||||
|       {}; |     // constructor  | ||||||
|  |     OneFlavourRationalParams(	RealD _lo      = 0.0,  | ||||||
|  | 				RealD _hi      = 1.0,  | ||||||
|  | 				int _maxit     = 1000, | ||||||
|  | 				RealD tol      = 1.0e-8,  | ||||||
|  |                            	int _degree    = 10, | ||||||
|  | 				int _precision = 64) | ||||||
|  |       : lo(_lo), | ||||||
|  | 	hi(_hi), | ||||||
|  | 	MaxIter(_maxit), | ||||||
|  | 	tolerance(tol), | ||||||
|  | 	degree(_degree), | ||||||
|  | 	precision(_precision){}; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
| }} |    | ||||||
|  | } | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
							
								
								
									
										116
									
								
								lib/qcd/action/ActionSet.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								lib/qcd/action/ActionSet.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/qcd/action/ActionSet.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: neo <cossu@post.kek.jp> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef ACTION_SET_H | ||||||
|  | #define ACTION_SET_H | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | // Should drop this namespace here | ||||||
|  | namespace QCD { | ||||||
|  |  | ||||||
|  | ////////////////////////////////// | ||||||
|  | // Indexing of tuple types | ||||||
|  | ////////////////////////////////// | ||||||
|  |  | ||||||
|  | template <class T, class Tuple> | ||||||
|  | struct Index; | ||||||
|  |  | ||||||
|  | template <class T, class... Types> | ||||||
|  | struct Index<T, std::tuple<T, Types...>> { | ||||||
|  |   static const std::size_t value = 0; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <class T, class U, class... Types> | ||||||
|  | struct Index<T, std::tuple<U, Types...>> { | ||||||
|  |   static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////// | ||||||
|  | // Action Level | ||||||
|  | // Action collection  | ||||||
|  | // in a integration level | ||||||
|  | // (for multilevel integration schemes) | ||||||
|  | //////////////////////////////////////////// | ||||||
|  |  | ||||||
|  | template <class Field, class Repr = NoHirep > | ||||||
|  | struct ActionLevel { | ||||||
|  |  public: | ||||||
|  |   unsigned int multiplier; | ||||||
|  |  | ||||||
|  |   // Fundamental repr actions separated because of the smearing | ||||||
|  |   typedef Action<Field>* ActPtr; | ||||||
|  |  | ||||||
|  |   // construct a tuple of vectors of the actions for the corresponding higher | ||||||
|  |   // representation fields | ||||||
|  |   typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection; | ||||||
|  |   typedef typename  AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types; | ||||||
|  |  | ||||||
|  |   action_collection actions_hirep; | ||||||
|  |   std::vector<ActPtr>& actions; | ||||||
|  |  | ||||||
|  |   explicit ActionLevel(unsigned int mul = 1) :  | ||||||
|  |   actions(std::get<0>(actions_hirep)), multiplier(mul) { | ||||||
|  |     // initialize the hirep vectors to zero. | ||||||
|  |     // apply(this->resize, actions_hirep, 0); //need a working resize | ||||||
|  |     assert(mul >= 1); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   template < class GenField > | ||||||
|  |   void push_back(Action<GenField>* ptr) { | ||||||
|  |     // insert only in the correct vector | ||||||
|  |     std::get< Index < GenField, action_hirep_types>::value >(actions_hirep).push_back(ptr); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   template <class ActPtr> | ||||||
|  |   static void resize(ActPtr ap, unsigned int n) { | ||||||
|  |     ap->resize(n); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Loop on tuple for a callable function | ||||||
|  |   template <std::size_t I = 1, typename Callable, typename ...Args> | ||||||
|  |   inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(Callable, Repr& R,Args&...) const {} | ||||||
|  |  | ||||||
|  |   template <std::size_t I = 1, typename Callable, typename ...Args> | ||||||
|  |   inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(Callable fn, Repr& R, Args&... arguments) const { | ||||||
|  |     fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...); | ||||||
|  |     apply<I + 1>(fn, R, arguments...); | ||||||
|  |   }   | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Define the ActionSet | ||||||
|  | template <class GaugeField, class R> | ||||||
|  | using ActionSet = std::vector<ActionLevel<GaugeField, R> >; | ||||||
|  |  | ||||||
|  | } // QCD | ||||||
|  | } // Grid | ||||||
|  |  | ||||||
|  | #endif  // ACTION_SET_H | ||||||
| @@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
|  |  | ||||||
| #include <Grid/Eigen/Dense> | #include <Grid/Grid_Eigen_Dense.h> | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> | #include <Grid/qcd/action/fermion/FermionCore.h> | ||||||
| #include <Grid/qcd/action/fermion/CayleyFermion5D.h> | #include <Grid/qcd/action/fermion/CayleyFermion5D.h> | ||||||
|  |  | ||||||
| @@ -170,7 +170,6 @@ void CayleyFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField & | |||||||
|   lower[0]   =-mass*lower[0]; |   lower[0]   =-mass*lower[0]; | ||||||
|   M5D(psi,psi,chi,lower,diag,upper); |   M5D(psi,psi,chi,lower,diag,upper); | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi) | void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi) | ||||||
| { | { | ||||||
| @@ -192,7 +191,7 @@ void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField & | |||||||
|       lower[s]=-cee[s-1]; |       lower[s]=-cee[s-1]; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   // Conjugate the terms ? |   // Conjugate the terms  | ||||||
|   for (int s=0;s<Ls;s++){ |   for (int s=0;s<Ls;s++){ | ||||||
|     diag[s] =conjugate(diag[s]); |     diag[s] =conjugate(diag[s]); | ||||||
|     upper[s]=conjugate(upper[s]); |     upper[s]=conjugate(upper[s]); | ||||||
| @@ -220,13 +219,21 @@ void CayleyFermion5D<Impl>::MeooeDag5D    (const FermionField &psi, FermionField | |||||||
|   std::vector<Coeff_t> diag =bs; |   std::vector<Coeff_t> diag =bs; | ||||||
|   std::vector<Coeff_t> upper=cs; |   std::vector<Coeff_t> upper=cs; | ||||||
|   std::vector<Coeff_t> lower=cs;  |   std::vector<Coeff_t> lower=cs;  | ||||||
|   upper[Ls-1]=-mass*upper[Ls-1]; |  | ||||||
|   lower[0]   =-mass*lower[0]; |  | ||||||
|   // Conjugate the terms ? |  | ||||||
|   for (int s=0;s<Ls;s++){ |   for (int s=0;s<Ls;s++){ | ||||||
|     diag[s] =conjugate(diag[s]); |     if ( s== 0 ) { | ||||||
|     upper[s]=conjugate(upper[s]); |       upper[s] = cs[s+1]; | ||||||
|     lower[s]=conjugate(lower[s]); |       lower[s] =-mass*cs[Ls-1]; | ||||||
|  |     } else if ( s==(Ls-1) ) {  | ||||||
|  |       upper[s] =-mass*cs[0]; | ||||||
|  |       lower[s] = cs[s-1]; | ||||||
|  |     } else {  | ||||||
|  |       upper[s] = cs[s+1]; | ||||||
|  |       lower[s] = cs[s-1]; | ||||||
|  |     } | ||||||
|  |     upper[s] = conjugate(upper[s]); | ||||||
|  |     lower[s] = conjugate(lower[s]); | ||||||
|  |     diag[s]  = conjugate(diag[s]); | ||||||
|   } |   } | ||||||
|   M5Ddag(psi,psi,Din,lower,diag,upper); |   M5Ddag(psi,psi,Din,lower,diag,upper); | ||||||
| } | } | ||||||
| @@ -373,6 +380,8 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | |||||||
|   /////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////// | ||||||
|   // The Cayley coeffs (unprec) |   // The Cayley coeffs (unprec) | ||||||
|   /////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////// | ||||||
|  |   assert(gamma.size()==Ls); | ||||||
|  |  | ||||||
|   omega.resize(Ls); |   omega.resize(Ls); | ||||||
|   bs.resize(Ls); |   bs.resize(Ls); | ||||||
|   cs.resize(Ls); |   cs.resize(Ls); | ||||||
| @@ -405,6 +414,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | |||||||
|   for(int i=0; i < Ls; i++){ |   for(int i=0; i < Ls; i++){ | ||||||
|     as[i] = 1.0; |     as[i] = 1.0; | ||||||
|     omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code |     omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code | ||||||
|  |     //    assert(fabs(omega[i])>0.0); | ||||||
|     bs[i] = 0.5*(bpc/omega[i] + bmc); |     bs[i] = 0.5*(bpc/omega[i] + bmc); | ||||||
|     cs[i] = 0.5*(bpc/omega[i] - bmc); |     cs[i] = 0.5*(bpc/omega[i] - bmc); | ||||||
|   } |   } | ||||||
| @@ -419,11 +429,11 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | |||||||
|    |    | ||||||
|   for(int i=0;i<Ls;i++){ |   for(int i=0;i<Ls;i++){ | ||||||
|     bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);      |     bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);      | ||||||
|  |     //    assert(fabs(bee[i])>0.0); | ||||||
|     cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); |     cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); | ||||||
|     beo[i]=as[i]*bs[i]; |     beo[i]=as[i]*bs[i]; | ||||||
|     ceo[i]=-as[i]*cs[i]; |     ceo[i]=-as[i]*cs[i]; | ||||||
|   } |   } | ||||||
|    |  | ||||||
|   aee.resize(Ls); |   aee.resize(Ls); | ||||||
|   aeo.resize(Ls); |   aeo.resize(Ls); | ||||||
|   for(int i=0;i<Ls;i++){ |   for(int i=0;i<Ls;i++){ | ||||||
| @@ -467,14 +477,16 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | |||||||
| 	 | 	 | ||||||
|   {  |   {  | ||||||
|     Coeff_t delta_d=mass*cee[Ls-1]; |     Coeff_t delta_d=mass*cee[Ls-1]; | ||||||
|     for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j]; |     for(int j=0;j<Ls-1;j++) { | ||||||
|  |       //      assert(fabs(bee[j])>0.0); | ||||||
|  |       delta_d *= cee[j]/bee[j]; | ||||||
|  |     } | ||||||
|     dee[Ls-1] += delta_d; |     dee[Ls-1] += delta_d; | ||||||
|   }   |   }   | ||||||
|  |  | ||||||
|   int inv=1; |   int inv=1; | ||||||
|   this->MooeeInternalCompute(0,inv,MatpInv,MatmInv); |   this->MooeeInternalCompute(0,inv,MatpInv,MatmInv); | ||||||
|   this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); |   this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -488,7 +500,9 @@ void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv, | |||||||
|   GridBase *grid = this->FermionRedBlackGrid(); |   GridBase *grid = this->FermionRedBlackGrid(); | ||||||
|   int LLs = grid->_rdimensions[0]; |   int LLs = grid->_rdimensions[0]; | ||||||
|  |  | ||||||
|   if ( LLs == Ls ) return; // Not vectorised in 5th direction |   if ( LLs == Ls ) { | ||||||
|  |     return; // Not vectorised in 5th direction | ||||||
|  |   } | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd Pplus  = Eigen::MatrixXcd::Zero(Ls,Ls); |   Eigen::MatrixXcd Pplus  = Eigen::MatrixXcd::Zero(Ls,Ls); | ||||||
|   Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); |   Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); | ||||||
|   | |||||||
| @@ -194,7 +194,9 @@ template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const Fermion | |||||||
| template void CayleyFermion5D< A >::MooeeInv    (const FermionField &psi, FermionField &chi); \ | template void CayleyFermion5D< A >::MooeeInv    (const FermionField &psi, FermionField &chi); \ | ||||||
| template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); | template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); | ||||||
|  |  | ||||||
|  | #undef  CAYLEY_DPERP_DENSE | ||||||
| #define  CAYLEY_DPERP_CACHE | #define  CAYLEY_DPERP_CACHE | ||||||
| #undef  CAYLEY_DPERP_LINALG | #undef  CAYLEY_DPERP_LINALG | ||||||
|  | #define CAYLEY_DPERP_VEC | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -181,6 +181,18 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField & | |||||||
|   assert(psi.checkerboard == psi.checkerboard); |   assert(psi.checkerboard == psi.checkerboard); | ||||||
|   chi.checkerboard=psi.checkerboard; |   chi.checkerboard=psi.checkerboard; | ||||||
|  |  | ||||||
|  |   std::vector<Coeff_t> ueec(Ls); | ||||||
|  |   std::vector<Coeff_t> deec(Ls); | ||||||
|  |   std::vector<Coeff_t> leec(Ls); | ||||||
|  |   std::vector<Coeff_t> ueemc(Ls); | ||||||
|  |   std::vector<Coeff_t> leemc(Ls); | ||||||
|  |   for(int s=0;s<ueec.size();s++){ | ||||||
|  |     ueec[s] = conjugate(uee[s]); | ||||||
|  |     deec[s] = conjugate(dee[s]); | ||||||
|  |     leec[s] = conjugate(lee[s]); | ||||||
|  |     ueemc[s]= conjugate(ueem[s]); | ||||||
|  |     leemc[s]= conjugate(leem[s]); | ||||||
|  |   } | ||||||
|   MooeeInvCalls++; |   MooeeInvCalls++; | ||||||
|   MooeeInvTime-=usecond(); |   MooeeInvTime-=usecond(); | ||||||
|  |  | ||||||
| @@ -192,25 +204,25 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField & | |||||||
|     chi[ss]=psi[ss]; |     chi[ss]=psi[ss]; | ||||||
|     for (int s=1;s<Ls;s++){ |     for (int s=1;s<Ls;s++){ | ||||||
|                             spProj5m(tmp,chi[ss+s-1]); |                             spProj5m(tmp,chi[ss+s-1]); | ||||||
|       chi[ss+s] = psi[ss+s]-uee[s-1]*tmp; |       chi[ss+s] = psi[ss+s]-ueec[s-1]*tmp; | ||||||
|     } |     } | ||||||
|     // U_m^{-\dagger}  |     // U_m^{-\dagger}  | ||||||
|     for (int s=0;s<Ls-1;s++){ |     for (int s=0;s<Ls-1;s++){ | ||||||
|                                    spProj5p(tmp,chi[ss+s]); |                                    spProj5p(tmp,chi[ss+s]); | ||||||
|       chi[ss+Ls-1] = chi[ss+Ls-1] - ueem[s]*tmp; |       chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // L_m^{-\dagger} D^{-dagger} |     // L_m^{-\dagger} D^{-dagger} | ||||||
|     for (int s=0;s<Ls-1;s++){ |     for (int s=0;s<Ls-1;s++){ | ||||||
|       spProj5m(tmp,chi[ss+Ls-1]); |       spProj5m(tmp,chi[ss+Ls-1]); | ||||||
|       chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(leem[s]/dee[Ls-1])*tmp; |       chi[ss+s] = (1.0/deec[s])*chi[ss+s]-(leemc[s]/deec[Ls-1])*tmp; | ||||||
|     }	 |     }	 | ||||||
|     chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1]; |     chi[ss+Ls-1]= (1.0/deec[Ls-1])*chi[ss+Ls-1]; | ||||||
|    |    | ||||||
|     // Apply L^{-dagger} |     // Apply L^{-dagger} | ||||||
|     for (int s=Ls-2;s>=0;s--){ |     for (int s=Ls-2;s>=0;s--){ | ||||||
|       spProj5p(tmp,chi[ss+s+1]); |       spProj5p(tmp,chi[ss+s+1]); | ||||||
|       chi[ss+s] = chi[ss+s] - lee[s]*tmp; |       chi[ss+s] = chi[ss+s] - leec[s]*tmp; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -225,6 +237,13 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField & | |||||||
|   INSTANTIATE_DPERP(GparityWilsonImplD); |   INSTANTIATE_DPERP(GparityWilsonImplD); | ||||||
|   INSTANTIATE_DPERP(ZWilsonImplF); |   INSTANTIATE_DPERP(ZWilsonImplF); | ||||||
|   INSTANTIATE_DPERP(ZWilsonImplD); |   INSTANTIATE_DPERP(ZWilsonImplD); | ||||||
|  |  | ||||||
|  |   INSTANTIATE_DPERP(WilsonImplFH); | ||||||
|  |   INSTANTIATE_DPERP(WilsonImplDF); | ||||||
|  |   INSTANTIATE_DPERP(GparityWilsonImplFH); | ||||||
|  |   INSTANTIATE_DPERP(GparityWilsonImplDF); | ||||||
|  |   INSTANTIATE_DPERP(ZWilsonImplFH); | ||||||
|  |   INSTANTIATE_DPERP(ZWilsonImplDF); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| }} | }} | ||||||
|   | |||||||
| @@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
|  |  | ||||||
| #include <Grid/Eigen/Dense> | #include <Grid/Grid_Eigen_Dense.h> | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> | #include <Grid/qcd/action/fermion/FermionCore.h> | ||||||
| #include <Grid/qcd/action/fermion/CayleyFermion5D.h> | #include <Grid/qcd/action/fermion/CayleyFermion5D.h> | ||||||
|  |  | ||||||
| @@ -39,20 +39,17 @@ namespace QCD { | |||||||
|   /* |   /* | ||||||
|    * Dense matrix versions of routines |    * Dense matrix versions of routines | ||||||
|    */ |    */ | ||||||
|  |  | ||||||
|   /* |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | ||||||
| { | { | ||||||
|   this->MooeeInternal(psi,chi,DaggerYes,InverseYes); |   this->MooeeInternal(psi,chi,DaggerYes,InverseYes); | ||||||
| } | } | ||||||
|    |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi) | void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi) | ||||||
| { | { | ||||||
|   this->MooeeInternal(psi,chi,DaggerNo,InverseYes); |   this->MooeeInternal(psi,chi,DaggerNo,InverseYes); | ||||||
| } | } | ||||||
|   */ |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) | void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) | ||||||
| { | { | ||||||
| @@ -126,9 +123,34 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #ifdef CAYLEY_DPERP_DENSE | ||||||
|  | INSTANTIATE_DPERP(GparityWilsonImplF); | ||||||
|  | INSTANTIATE_DPERP(GparityWilsonImplD); | ||||||
|  | INSTANTIATE_DPERP(WilsonImplF); | ||||||
|  | INSTANTIATE_DPERP(WilsonImplD); | ||||||
|  | INSTANTIATE_DPERP(ZWilsonImplF); | ||||||
|  | INSTANTIATE_DPERP(ZWilsonImplD); | ||||||
|  |  | ||||||
| template void CayleyFermion5D<GparityWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<GparityWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
| template void CayleyFermion5D<GparityWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<GparityWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
| template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
| template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<ZWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<ZWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  |  | ||||||
|  | INSTANTIATE_DPERP(GparityWilsonImplFH); | ||||||
|  | INSTANTIATE_DPERP(GparityWilsonImplDF); | ||||||
|  | INSTANTIATE_DPERP(WilsonImplFH); | ||||||
|  | INSTANTIATE_DPERP(WilsonImplDF); | ||||||
|  | INSTANTIATE_DPERP(ZWilsonImplFH); | ||||||
|  | INSTANTIATE_DPERP(ZWilsonImplDF); | ||||||
|  |  | ||||||
|  | template void CayleyFermion5D<GparityWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<GparityWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<WilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<WilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<ZWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<ZWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | #endif | ||||||
|  |  | ||||||
| }} | }} | ||||||
|   | |||||||
| @@ -37,7 +37,6 @@ namespace Grid { | |||||||
| namespace QCD { | namespace QCD { | ||||||
|  |  | ||||||
|   // FIXME -- make a version of these routines with site loop outermost for cache reuse. |   // FIXME -- make a version of these routines with site loop outermost for cache reuse. | ||||||
|  |  | ||||||
|   // Pminus fowards |   // Pminus fowards | ||||||
|   // Pplus  backwards |   // Pplus  backwards | ||||||
| template<class Impl>   | template<class Impl>   | ||||||
| @@ -48,17 +47,18 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi, | |||||||
| 				std::vector<Coeff_t> &diag, | 				std::vector<Coeff_t> &diag, | ||||||
| 				std::vector<Coeff_t> &upper) | 				std::vector<Coeff_t> &upper) | ||||||
| { | { | ||||||
|  |   Coeff_t one(1.0); | ||||||
|   int Ls=this->Ls; |   int Ls=this->Ls; | ||||||
|   for(int s=0;s<Ls;s++){ |   for(int s=0;s<Ls;s++){ | ||||||
|     if ( s==0 ) { |     if ( s==0 ) { | ||||||
|       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1); |       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1); | ||||||
|       axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,Ls-1); |       axpby_ssp_pplus (chi,one,chi,lower[s],psi,s,Ls-1); | ||||||
|     } else if ( s==(Ls-1)) {  |     } else if ( s==(Ls-1)) {  | ||||||
|       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,0); |       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,0); | ||||||
|       axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,s-1); |       axpby_ssp_pplus (chi,one,chi,lower[s],psi,s,s-1); | ||||||
|     } else { |     } else { | ||||||
|       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1); |       axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1); | ||||||
|       axpby_ssp_pplus(chi,1.0,chi,lower[s],psi,s,s-1); |       axpby_ssp_pplus(chi,one,chi,lower[s],psi,s,s-1); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
| @@ -70,17 +70,18 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, | |||||||
| 				   std::vector<Coeff_t> &diag, | 				   std::vector<Coeff_t> &diag, | ||||||
| 				   std::vector<Coeff_t> &upper) | 				   std::vector<Coeff_t> &upper) | ||||||
| { | { | ||||||
|  |   Coeff_t one(1.0); | ||||||
|   int Ls=this->Ls; |   int Ls=this->Ls; | ||||||
|   for(int s=0;s<Ls;s++){ |   for(int s=0;s<Ls;s++){ | ||||||
|     if ( s==0 ) { |     if ( s==0 ) { | ||||||
|       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1); |       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1); | ||||||
|       axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,Ls-1); |       axpby_ssp_pminus(chi,one,chi,lower[s],psi,s,Ls-1); | ||||||
|     } else if ( s==(Ls-1)) {  |     } else if ( s==(Ls-1)) {  | ||||||
|       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,0); |       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,0); | ||||||
|       axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1); |       axpby_ssp_pminus(chi,one,chi,lower[s],psi,s,s-1); | ||||||
|     } else { |     } else { | ||||||
|       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1); |       axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1); | ||||||
|       axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1); |       axpby_ssp_pminus(chi,one,chi,lower[s],psi,s,s-1); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
| @@ -88,62 +89,75 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, | |||||||
| template<class Impl> | template<class Impl> | ||||||
| void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi) | void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi) | ||||||
| { | { | ||||||
|  |   Coeff_t one(1.0); | ||||||
|  |   Coeff_t czero(0.0); | ||||||
|   chi.checkerboard=psi.checkerboard; |   chi.checkerboard=psi.checkerboard; | ||||||
|   int Ls=this->Ls; |   int Ls=this->Ls; | ||||||
|   // Apply (L^{\prime})^{-1} |   // Apply (L^{\prime})^{-1} | ||||||
|   axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0] |   axpby_ssp (chi,one,psi,     czero,psi,0,0);      // chi[0]=psi[0] | ||||||
|   for (int s=1;s<Ls;s++){ |   for (int s=1;s<Ls;s++){ | ||||||
|     axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1] |     axpby_ssp_pplus(chi,one,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1] | ||||||
|   } |   } | ||||||
|   // L_m^{-1}  |   // L_m^{-1}  | ||||||
|   for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi |   for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||||
|     axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s); |     axpby_ssp_pminus(chi,one,chi,-leem[s],chi,Ls-1,s); | ||||||
|   } |   } | ||||||
|   // U_m^{-1} D^{-1} |   // U_m^{-1} D^{-1} | ||||||
|   for (int s=0;s<Ls-1;s++){ |   for (int s=0;s<Ls-1;s++){ | ||||||
|     // Chi[s] + 1/d chi[s]  |     // Chi[s] + 1/d chi[s]  | ||||||
|     axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1); |     axpby_ssp_pplus(chi,one/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1); | ||||||
|   }	 |   }	 | ||||||
|   axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable  |   axpby_ssp(chi,one/dee[Ls-1],chi,czero,chi,Ls-1,Ls-1); // Modest avoidable  | ||||||
|    |    | ||||||
|   // Apply U^{-1} |   // Apply U^{-1} | ||||||
|   for (int s=Ls-2;s>=0;s--){ |   for (int s=Ls-2;s>=0;s--){ | ||||||
|     axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1);  // chi[Ls] |     axpby_ssp_pminus (chi,one,chi,-uee[s],chi,s,s+1);  // chi[Ls] | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) | ||||||
| { | { | ||||||
|  |   Coeff_t one(1.0); | ||||||
|  |   Coeff_t czero(0.0); | ||||||
|   chi.checkerboard=psi.checkerboard; |   chi.checkerboard=psi.checkerboard; | ||||||
|   int Ls=this->Ls; |   int Ls=this->Ls; | ||||||
|   // Apply (U^{\prime})^{-dagger} |   // Apply (U^{\prime})^{-dagger} | ||||||
|   axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0] |   axpby_ssp (chi,one,psi,     czero,psi,0,0);      // chi[0]=psi[0] | ||||||
|   for (int s=1;s<Ls;s++){ |   for (int s=1;s<Ls;s++){ | ||||||
|     axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1); |     axpby_ssp_pminus(chi,one,psi,-conjugate(uee[s-1]),chi,s,s-1); | ||||||
|   } |   } | ||||||
|   // U_m^{-\dagger}  |   // U_m^{-\dagger}  | ||||||
|   for (int s=0;s<Ls-1;s++){ |   for (int s=0;s<Ls-1;s++){ | ||||||
|     axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s); |     axpby_ssp_pplus(chi,one,chi,-conjugate(ueem[s]),chi,Ls-1,s); | ||||||
|   } |   } | ||||||
|   // L_m^{-\dagger} D^{-dagger} |   // L_m^{-\dagger} D^{-dagger} | ||||||
|   for (int s=0;s<Ls-1;s++){ |   for (int s=0;s<Ls-1;s++){ | ||||||
|     axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1); |     axpby_ssp_pminus(chi,one/conjugate(dee[s]),chi,-conjugate(leem[s]/dee[Ls-1]),chi,s,Ls-1); | ||||||
|   }	 |   }	 | ||||||
|   axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable  |   axpby_ssp(chi,one/conjugate(dee[Ls-1]),chi,czero,chi,Ls-1,Ls-1); // Modest avoidable  | ||||||
|    |    | ||||||
|   // Apply L^{-dagger} |   // Apply L^{-dagger} | ||||||
|   for (int s=Ls-2;s>=0;s--){ |   for (int s=Ls-2;s>=0;s--){ | ||||||
|     axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1);  // chi[Ls] |     axpby_ssp_pplus (chi,one,chi,-conjugate(lee[s]),chi,s,s+1);  // chi[Ls] | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| #ifdef CAYLEY_DPERP_LINALG | #ifdef CAYLEY_DPERP_LINALG | ||||||
|   INSTANTIATE(WilsonImplF); |   INSTANTIATE_DPERP(WilsonImplF); | ||||||
|   INSTANTIATE(WilsonImplD); |   INSTANTIATE_DPERP(WilsonImplD); | ||||||
|   INSTANTIATE(GparityWilsonImplF); |   INSTANTIATE_DPERP(GparityWilsonImplF); | ||||||
|   INSTANTIATE(GparityWilsonImplD); |   INSTANTIATE_DPERP(GparityWilsonImplD); | ||||||
|  |   INSTANTIATE_DPERP(ZWilsonImplF); | ||||||
|  |   INSTANTIATE_DPERP(ZWilsonImplD); | ||||||
|  |  | ||||||
|  |   INSTANTIATE_DPERP(WilsonImplFH); | ||||||
|  |   INSTANTIATE_DPERP(WilsonImplDF); | ||||||
|  |   INSTANTIATE_DPERP(GparityWilsonImplFH); | ||||||
|  |   INSTANTIATE_DPERP(GparityWilsonImplDF); | ||||||
|  |   INSTANTIATE_DPERP(ZWilsonImplFH); | ||||||
|  |   INSTANTIATE_DPERP(ZWilsonImplDF); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -35,7 +35,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| namespace QCD {  /* | namespace QCD {   | ||||||
|  |   /* | ||||||
|    * Dense matrix versions of routines |    * Dense matrix versions of routines | ||||||
|    */ |    */ | ||||||
| template<class Impl> | template<class Impl> | ||||||
| @@ -807,10 +808,21 @@ INSTANTIATE_DPERP(DomainWallVec5dImplF); | |||||||
| INSTANTIATE_DPERP(ZDomainWallVec5dImplD); | INSTANTIATE_DPERP(ZDomainWallVec5dImplD); | ||||||
| INSTANTIATE_DPERP(ZDomainWallVec5dImplF); | INSTANTIATE_DPERP(ZDomainWallVec5dImplF); | ||||||
|  |  | ||||||
|  | INSTANTIATE_DPERP(DomainWallVec5dImplDF); | ||||||
|  | INSTANTIATE_DPERP(DomainWallVec5dImplFH); | ||||||
|  | INSTANTIATE_DPERP(ZDomainWallVec5dImplDF); | ||||||
|  | INSTANTIATE_DPERP(ZDomainWallVec5dImplFH); | ||||||
|  |  | ||||||
| template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
| template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
| template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
| template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  |  | ||||||
|  | template void CayleyFermion5D<DomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<DomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  | template void CayleyFermion5D<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| }} | }} | ||||||
|   | |||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user