mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-26 17:49:33 +00:00 
			
		
		
		
	Compare commits
	
		
			587 Commits
		
	
	
		
			bugfix/dmi
			...
			feature/mu
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | ad89abb018 | ||
|  | 80c5bce5bb | ||
|  | f68b5de9c8 | ||
|  | d0f3d525d5 | ||
|  | 3a58217405 | ||
|  | c289699d9a | ||
|  | c3b1263e75 | ||
| 102ea9ae66 | |||
|  | 5fa386ddc9 | ||
|  | d9cd4f0273 | ||
|  | b49bec0cec | ||
|  | ae56e556c6 | ||
|  | 1cdf999668 | ||
|  | 11062fb686 | ||
|  | 383ca7d392 | ||
|  | a446d95c33 | ||
|  | be66e7dd95 | ||
|  | 6d0d064a6c | ||
|  | bfef525ed2 | ||
|  | 0b0cf62193 | ||
|  | 7d88198387 | ||
|  | 2f619482b8 | ||
|  | d6472eda8d | ||
|  | 9e658de238 | ||
|  | bcefdd7c4e | ||
|  | fd367d8bfd | ||
|  | 8a3fe60a27 | ||
|  | 44051aecd1 | ||
|  | 06e6f8de00 | ||
|  | dbe4d7850c | ||
|  | 4fe182e5a7 | ||
|  | 175f393f9d | ||
|  | 14d53e1c9e | ||
|  | 8bd869da37 | ||
|  | c7036f6717 | ||
|  | c0485d799d | ||
|  | 7abc5613bd | ||
|  | 237cfd11ab | ||
|  | a4b7dddb67 | ||
|  | 5696781862 | ||
| c3f0889eda | |||
|  | 0f214ad427 | ||
|  | fe4912880d | ||
|  | f038c6babe | ||
|  | 169f4b2711 | ||
|  | 2d8aff36fe | ||
|  | 659d7d1a40 | ||
|  | dc6f078246 | ||
|  | 8a4714a4a6 | ||
|  | 40e119c61c | ||
|  | 7b0237b081 | ||
|  | b68ad0cc0b | ||
|  | 37263fd9b1 | ||
|  | 3d09e3e9e0 | ||
|  | 1354b46338 | ||
|  | 251a97fe1b | ||
|  | e18929eaa0 | ||
|  | f3b0a92e71 | ||
|  | a0be3f7330 | ||
|  | b5a6e4f1fd | ||
|  | 7a788db3dc | ||
|  | f20eceb6cd | ||
|  | 38325ebbc6 | ||
|  | b73bd151bb | ||
|  | 694b305cab | ||
|  | 2d3737a133 | ||
|  | ac1f1838bc | ||
|  | 09d09d0fe5 | ||
|  | bf630a6821 | ||
|  | 8859a151cc | ||
|  | 688a39cfd9 | ||
|  | 6f5a5cd9b3 | ||
|  | 0933aeefd4 | ||
|  | 322f61acee | ||
|  | 08e04b9676 | ||
| feaa2ac947 | |||
| 07de925127 | |||
|  | a9c816a268 | ||
|  | e43a8b6b8a | ||
|  | bf729766dd | ||
|  | dafb351d38 | ||
| 0b707b861c | |||
| 15e87a4607 | |||
| 7d7220cbd7 | |||
|  | 54e94360ad | ||
| 0af740dc15 | |||
| d2e8372df3 | |||
|  | 869b99ec1e | ||
| 4372d04ad4 | |||
|  | 56abbdf4c2 | ||
|  | af71c63f4c | ||
|  | 0440d4ce66 | ||
| b22eab8c8b | |||
|  | a7d56523ab | ||
|  | 9e56c65730 | ||
|  | ef4f2b8c41 | ||
|  | e8b95bd35b | ||
|  | 7e35286860 | ||
|  | 0486ff8e79 | ||
| 1e8a2e1621 | |||
| 7587df831a | |||
|  | e9cc21900f | ||
|  | 0a8faac271 | ||
|  | abc4de0fd2 | ||
|  | cfe3cd76d1 | ||
|  | 3fa5e3109f | ||
|  | 8b7049f737 | ||
|  | c85024683e | ||
|  | 1300b0b04b | ||
|  | e6d984b484 | ||
|  | 1d18d95d4f | ||
|  | ae39ec85a3 | ||
|  | b96daf53a0 | ||
|  | 46879e1658 | ||
|  | ae4de94798 | ||
|  | 0ab555b4f5 | ||
|  | 8e9be9f84f | ||
|  | d572170170 | ||
| 81b18f843a | |||
|  | a833f88c32 | ||
|  | 07b2c1b253 | ||
|  | 735cbdb983 | ||
|  | 2ad54c5a02 | ||
|  | 12ccc73cf5 | ||
|  | 3d04dc33c6 | ||
|  | e7564f8330 | ||
|  | 91199a8ea0 | ||
|  | 0494feec98 | ||
|  | a16b1e134e | ||
|  | 769ad578f5 | ||
|  | eaac0044b5 | ||
|  | 56042f002c | ||
|  | 3bfd1f13e6 | ||
|  | 70ab598c96 | ||
|  | 1d0ca65e28 | ||
|  | 2bc4d0a20e | ||
| 2490816297 | |||
| 5f55bca378 | |||
| f6aa82b7f2 | |||
| 22749699a3 | |||
| 0503c028be | |||
|  | 092dcd4e04 | ||
|  | 4a8c4ccfba | ||
|  | 9b44189d5a | ||
|  | 7da4856e8e | ||
|  | aaf1e33a77 | ||
|  | 094c3d091a | ||
|  | 4b98e524a0 | ||
|  | 1a1f6d55f9 | ||
|  | 21421656ab | ||
|  | 6f687a67cd | ||
|  | b30754e762 | ||
|  | 1e429a0d57 | ||
|  | d38a4de36c | ||
|  | ef1b7db374 | ||
|  | 53a9aeb965 | ||
|  | e30fa9f4b8 | ||
|  | 58e8d0a10d | ||
|  | 62cf9cf638 | ||
|  | 0fb458879d | ||
|  | 725c513d94 | ||
| d8648307ff | |||
| 064315c00b | |||
|  | 7c6cc85df6 | ||
|  | a6691ef87c | ||
|  | 8e0ced627a | ||
|  | 0de314870d | ||
|  | ffb91e53d2 | ||
|  | f4e8bf2858 | ||
| a74c34315c | |||
|  | 69470ccc10 | ||
|  | b8b5934193 | ||
|  | 75856f2945 | ||
|  | 3c112a7a25 | ||
|  | ab3596d4d3 | ||
|  | a8c10b1933 | ||
|  | 15e801af3f | ||
|  | 0ffc235741 | ||
|  | 8e19c99c7d | ||
|  | a0bc0ad06f | ||
|  | a8fb2835ca | ||
|  | bc862ce3ab | ||
| 22f4feee7b | |||
| 3f858d6755 | |||
|  | 3267683e22 | ||
|  | f46a67ffb3 | ||
|  | f7b8383ef5 | ||
|  | 10f2872aae | ||
| 35fa3d1dfd | |||
|  | cd73897b8d | ||
|  | c4435e6beb | ||
|  | 7a8f6af5f8 | ||
|  | 49a5d9bac7 | ||
|  | 2b3fdd4a58 | ||
|  | 34502ec471 | ||
|  | 8a43e88b4f | ||
| d1ece74137 | |||
|  | 238df20370 | ||
|  | 97a32a6145 | ||
|  | 655492a443 | ||
|  | 1cab06f6bd | ||
| 43c817cc67 | |||
|  | f8024c262b | ||
|  | 4cc5f01f4a | ||
|  | 9c12c37aaf | ||
|  | 806eaa0530 | ||
|  | 01d0e54594 | ||
|  | 5aafa335fe | ||
|  | 8ba0494485 | ||
|  | d99d98d9fd | ||
|  | 95a017a4ae | ||
|  | 92f92379e6 | ||
|  | 529e78d43f | ||
|  | 4ec746d262 | ||
|  | 51bf1501fc | ||
|  | 66d819c054 | ||
|  | 3f3686f869 | ||
|  | 26bb829f8c | ||
|  | 67cb04fc66 | ||
|  | a40bd68aed | ||
|  | 36495e0fd2 | ||
|  | 93f6c15772 | ||
|  | cb93eeff21 | ||
|  | c7cc7e6101 | ||
|  | c349aa6511 | ||
|  | 3bae0a2d5c | ||
|  | c1c7566089 | ||
|  | 2439999ec8 | ||
|  | 1d96f662e3 | ||
|  | 41d1889941 | ||
|  | 0c3981e0c3 | ||
|  | c727bd4609 | ||
|  | db23749b67 | ||
|  | 751f2b9703 | ||
|  | 741bc836f6 | ||
|  | 697c0603ce | ||
|  | 14bedebb11 | ||
|  | 8546d01a4c | ||
|  | 47b5c07ffb | ||
|  | da86a2bf54 | ||
|  | c1cb60a0b3 | ||
|  | 5ed5b4bfbf | ||
|  | de84aacdfd | ||
|  | 2888003765 | ||
|  | da06bf5b95 | ||
|  | 20999c1370 | ||
|  | 33f0ed1a33 | ||
|  | 50be56433b | ||
|  | 43924007db | ||
|  | 78ef10e60f | ||
| 679ae98b14 | |||
|  | 90f6bc16bb | ||
|  | 9b5b639546 | ||
|  | 945767c6d8 | ||
|  | 422cdf4979 | ||
|  | 38db174f3b | ||
|  | 92e364a35f | ||
| 58299b8ba2 | |||
| 124bf4d829 | |||
| e8e56b3414 | |||
| 89c430136d | |||
| ea9aef7baa | |||
| c9e9e8061d | |||
|  | 453cf2a1c6 | ||
|  | de7bbfa5f9 | ||
| dda8d77c87 | |||
| aa29f4346a | |||
|  | 86116dbed6 | ||
|  | 7bd31e3f7c | ||
|  | 74f451715f | ||
|  | 655be8ed76 | ||
|  | 4063238943 | ||
|  | 3344788fa1 | ||
|  | 99220f6531 | ||
|  | 2a6d093749 | ||
|  | c947947fad | ||
|  | f555b50547 | ||
|  | 738c1a11c2 | ||
|  | f8797e1e3e | ||
|  | fd1eb7de13 | ||
|  | 2ce898efa3 | ||
|  | ab66bac4e6 | ||
|  | 56277a11c8 | ||
|  | 916e9e1d3e | ||
|  | 5b55867a7a | ||
|  | 3accb1ef89 | ||
|  | e3d0e31525 | ||
|  | 5812eb8a8c | ||
|  | 4dd3763294 | ||
|  | c429ace748 | ||
|  | ac58565d0a | ||
|  | 3703b718aa | ||
|  | b722889234 | ||
|  | abba44a837 | ||
|  | f301be94ce | ||
|  | 1d1b225497 | ||
|  | 53a785a3dd | ||
|  | 736bf3c866 | ||
|  | b9bbe5d188 | ||
|  | 3844bcf800 | ||
|  | e1a2319d01 | ||
|  | 180c732b4c | ||
|  | 957a706d0b | ||
|  | d2312e9874 | ||
|  | fc4ab9ccd5 | ||
|  | 4a340aa5ca | ||
|  | 3b7de792d5 | ||
|  | 557c3fa109 | ||
|  | ec18e9f7f6 | ||
|  | a839d5bc55 | ||
|  | de41b84c5c | ||
|  | 8e161152e4 | ||
|  | 3141ebac10 | ||
|  | 7ede696126 | ||
|  | bf516c3b81 | ||
|  | 441a52ee5d | ||
|  | a8db024c92 | ||
|  | a9c22d5f43 | ||
|  | 3ca41458a3 | ||
|  | 9e2d29c644 | ||
|  | 951be75292 | ||
|  | b9113ed310 | ||
| 1407418755 | |||
| a6a0da873f | |||
|  | 42fb49d3fd | ||
|  | 2a54c9aaab | ||
|  | 0957378679 | ||
|  | 2ed6c76fc5 | ||
|  | d3b9a7fa14 | ||
|  | 75ea306ce9 | ||
|  | 4226c633c4 | ||
|  | 5a4eafbf7e | ||
|  | eb8e26018b | ||
|  | db5ea001a3 | ||
|  | 2846f079e5 | ||
|  | 1d502e4ed6 | ||
|  | 73cdf0fffe | ||
|  | 1c25773319 | ||
|  | c38400b26f | ||
|  | 9c3065b860 | ||
|  | 94eb829d08 | ||
|  | 68392ddb5b | ||
|  | cb6b81ae82 | ||
| 90ec6eda0c | |||
| fe8d625694 | |||
| 53e76b41d2 | |||
| 8ef4300412 | |||
| 98a24ebf31 | |||
|  | b12dc89d26 | ||
|  | d80d802f9d | ||
|  | 3d99b09dba | ||
|  | db5f6d3ae3 | ||
|  | 683550f116 | ||
|  | 5e477ec553 | ||
|  | 55d0329624 | ||
|  | 86aaa35294 | ||
|  | 172d3dc93a | ||
|  | 7b03d8d087 | ||
|  | 4b759b8f2a | ||
|  | 8c540333d5 | ||
| ff4e54ef80 | |||
|  | cd1bd921bd | ||
|  | fff5751b1a | ||
|  | 2c81696fdd | ||
|  | c9dc22efa1 | ||
|  | 0ab04a000f | ||
|  | 4c1ea8677e | ||
|  | 120fb59978 | ||
|  | fd56b3ff38 | ||
|  | 0ec6829edc | ||
|  | 18b7845b7b | ||
|  | 3d0fe15374 | ||
|  | 91886068fe | ||
|  | 6d1e9e5f92 | ||
|  | b640230b1e | ||
|  | 038b6ee9cd | ||
|  | 38806343a8 | ||
|  | 831ca4e3bf | ||
|  | b3dede4dd3 | ||
|  | 4e34132f4d | ||
|  | c07cb10247 | ||
|  | d7767a2a62 | ||
|  | ec035983fd | ||
|  | 596dcd85b2 | ||
|  | 7270c6a150 | ||
|  | f8b9ad7d50 | ||
|  | 04a1959895 | ||
|  | 93cc270016 | ||
|  | 29b60f7e1a | ||
|  | 902afcfbaf | ||
|  | 97a6b61551 | ||
|  | f011bdb869 | ||
|  | bafb101e4f | ||
|  | 08fdf05528 | ||
|  | 9e72a6b22e | ||
|  | 1c12c5612c | ||
|  | a8193c4bcb | ||
|  | c3d7ec65fa | ||
|  | 8b6a6c8236 | ||
|  | e0571c872b | ||
|  | c67f41887b | ||
|  | 84687ccf1f | ||
|  | 3274561cf8 | ||
| e08fbb3771 | |||
|  | d7464aa0fe | ||
|  | 00d29153f0 | ||
| 2ce989f220 | |||
|  | d7a1dc85be | ||
|  | fc19503673 | ||
|  | beba824136 | ||
|  | 6ebf8b12b6 | ||
|  | e5a7ed4362 | ||
|  | b9f7ea47c3 | ||
|  | 06f7ee202e | ||
|  | 2b2fc6453f | ||
|  | bdd2765461 | ||
|  | 4a45c06dd7 | ||
|  | d6a7d7d1e0 | ||
|  | 1a122a0dd8 | ||
|  | 20e20733e8 | ||
|  | b7cd1a19e3 | ||
|  | f510002a62 | ||
| eedcaf6470 | |||
|  | 1e257a1251 | ||
|  | 522f6bf91a | ||
|  | d35d87d2c2 | ||
|  | 74a5cda84b | ||
|  | 5be05d85b8 | ||
|  | 35ac85aea8 | ||
|  | fa237401ff | ||
|  | 97053adcb5 | ||
|  | f8fbe4d7a3 | ||
|  | ef31c012bf | ||
|  | 9e9f621d5d | ||
|  | 651e1a7cbc | ||
|  | c4d3672720 | ||
|  | 16be6d378c | ||
|  | f05d0565aa | ||
| b39f0d1fb6 | |||
| 9f1267dfe6 | |||
| 2e90285232 | |||
| e254de982e | |||
| 28d99b5297 | |||
|  | 9bf4108d1f | ||
|  | ee93f0218b | ||
|  | 6929a84c70 | ||
|  | 5c779a789b | ||
| 161ed102a5 | |||
|  | e863a948e3 | ||
|  | f65a585236 | ||
|  | 977f34dca6 | ||
|  | 90ad956340 | ||
|  | 7996f06335 | ||
|  | 7b40a3e3e5 | ||
|  | f7fbbaaca3 | ||
|  | 17629b8d9e | ||
|  | 0baa20d292 | ||
|  | 4571c918a4 | ||
|  | 5251ea4d30 | ||
|  | 7f456b4173 | ||
|  | ae99e99da2 | ||
|  | c291ef77b5 | ||
|  | 7dd2764bb2 | ||
|  | 244f8fb6dc | ||
| f3ca29af6c | |||
| 37988221a8 | |||
|  | 27dfe816fa | ||
|  | af29be2c90 | ||
|  | f96fac0aee | ||
| 7a327a3f28 | |||
|  | 07f2ebea1b | ||
|  | 851f2ad8ef | ||
|  | 23e0561dd6 | ||
|  | 8ae1a95ec6 | ||
|  | 82b7d4eaf0 | ||
|  | 78774fbdc0 | ||
|  | 924130833e | ||
|  | 0157274762 | ||
|  | 87e8aad5a0 | ||
|  | c6f59c2933 | ||
|  | b7f90aa011 | ||
| 92f8950a56 | |||
| 65987a8a58 | |||
| 889d828bc2 | |||
|  | f22b79da8f | ||
|  | 3855673ebf | ||
|  | 4db82da0db | ||
|  | 0cdc3d2fa5 | ||
| ad98b6193d | |||
| fc760016b3 | |||
| 2da86f7dae | |||
|  | 0dfda4bb90 | ||
|  | 1189ebc8b5 | ||
| 97843e2b58 | |||
| 82b3f54697 | |||
|  | 1bb8578173 | ||
| 673994b281 | |||
| bbc0eff078 | |||
| 4c60e31070 | |||
| afbf7d4c37 | |||
| 8c3cc32364 | |||
|  | 5214846341 | ||
| 4c3fd9fa3f | |||
| 17b3a10d46 | |||
| 149a46b92c | |||
|  | ce1a115e0b | ||
| db9c28a773 | |||
| 9ac3ac41df | |||
| 2af9ab9034 | |||
| 6f1ea96293 | |||
| 2e3c5890b6 | |||
| bc6678732f | |||
| b10ae00c8a | |||
|  | 0bd296dda4 | ||
|  | af0ccdd8e9 | ||
|  | 2fb92dbc6e | ||
|  | 5c74b6028b | ||
|  | e0be2b6e6c | ||
|  | ef72f322d2 | ||
|  | 7bc2065113 | ||
|  | 2bd4233919 | ||
|  | 143c70e29f | ||
|  | b812d5e39c | ||
|  | 01480da0a8 | ||
|  | 6ad73145bc | ||
| f7293f2ddb | |||
|  | 62749d05a6 | ||
|  | 3834feb4b7 | ||
|  | 6b8ee7bae0 | ||
|  | 739c2308b5 | ||
|  | 454302414d | ||
|  | a71b69389b | ||
|  | d49e502f53 | ||
|  | 92ec3404f8 | ||
|  | f4ebea3381 | ||
|  | cf167d0cd1 | ||
|  | 6f8b771a37 | ||
|  | 4e1ffdd17c | ||
|  | a783282b8b | ||
|  | 19b85d8486 | ||
|  | c363bdd784 | ||
|  | c30d96ea50 | ||
|  | 7ffe17ada1 | ||
| 330a9b3f4c | |||
|  | 28ff66a381 | ||
|  | 78c7bcee36 | ||
| 00a7b95631 | |||
| 94d8321d01 | |||
|  | ac24cc9f99 | ||
|  | 1d666771f9 | ||
|  | d50055cd96 | ||
|  | 3ab4c8c0bb | ||
|  | 47c7159177 | ||
|  | f415db583a | ||
|  | f55c16f984 | ||
|  | df67e013ca | ||
|  | 3e990c9d0a | ||
|  | 4b740fc8fd | ||
|  | cccd14b09e | ||
|  | e6acffdfc2 | ||
| 26d124283e | |||
| 0d889b7041 | |||
| ab31ad006a | |||
|  | 392130a537 | ||
|  | deef2673b2 | ||
|  | 977b0a6dd9 | ||
|  | 977d844394 | ||
| 6e4a06e180 | |||
|  | 590675e2ca | ||
|  | 8c65bdf6d3 | ||
|  | 74f1ed3bc5 | ||
|  | 79270ef510 | ||
|  | e250e6b7bb | ||
|  | 261342c15f | ||
|  | eda4dd622e | ||
|  | c68a2b9637 | ||
|  | 293df6cd20 | ||
|  | 65f61bb3bf | ||
|  | 26b9740d53 | ||
|  | 6eb873dd96 | ||
|  | 11b4c80b27 | ||
|  | c065e454c3 | ||
|  | d9b5fbd374 | ||
|  | cfbc1a26b8 | ||
|  | 257f69f931 | ||
|  | e415260961 | ||
|  | 446c768cd3 | 
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -92,6 +92,7 @@ build*/* | |||||||
| ##################### | ##################### | ||||||
| *.xcodeproj/* | *.xcodeproj/* | ||||||
| build.sh | build.sh | ||||||
|  | .vscode | ||||||
|  |  | ||||||
| # Eigen source # | # Eigen source # | ||||||
| ################ | ################ | ||||||
| @@ -106,6 +107,10 @@ lib/fftw/* | |||||||
| m4/lt* | m4/lt* | ||||||
| m4/libtool.m4 | m4/libtool.m4 | ||||||
|  |  | ||||||
|  | # github pages # | ||||||
|  | ################ | ||||||
|  | gh-pages/ | ||||||
|  |  | ||||||
| # Buck files # | # Buck files # | ||||||
| ############## | ############## | ||||||
| .buck* | .buck* | ||||||
| @@ -117,3 +122,4 @@ make-bin-BUCK.sh | |||||||
| ##################### | ##################### | ||||||
| lib/qcd/spin/gamma-gen/*.h | lib/qcd/spin/gamma-gen/*.h | ||||||
| lib/qcd/spin/gamma-gen/*.cc | lib/qcd/spin/gamma-gen/*.cc | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										76
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										76
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -7,64 +7,8 @@ cache: | |||||||
| matrix: | matrix: | ||||||
|   include: |   include: | ||||||
|     - os:        osx |     - os:        osx | ||||||
|       osx_image: xcode7.2 |       osx_image: xcode8.3 | ||||||
|       compiler: clang |       compiler: clang | ||||||
|     - compiler: gcc |  | ||||||
|       addons: |  | ||||||
|         apt: |  | ||||||
|           sources: |  | ||||||
|             - ubuntu-toolchain-r-test |  | ||||||
|           packages: |  | ||||||
|             - g++-4.9 |  | ||||||
|             - libmpfr-dev |  | ||||||
|             - libgmp-dev |  | ||||||
|             - libmpc-dev |  | ||||||
|             - libopenmpi-dev |  | ||||||
|             - openmpi-bin |  | ||||||
|             - binutils-dev |  | ||||||
|       env: VERSION=-4.9 |  | ||||||
|     - compiler: gcc |  | ||||||
|       addons: |  | ||||||
|         apt: |  | ||||||
|           sources: |  | ||||||
|             - ubuntu-toolchain-r-test |  | ||||||
|           packages: |  | ||||||
|             - g++-5 |  | ||||||
|             - libmpfr-dev |  | ||||||
|             - libgmp-dev |  | ||||||
|             - libmpc-dev |  | ||||||
|             - libopenmpi-dev |  | ||||||
|             - openmpi-bin |  | ||||||
|             - binutils-dev |  | ||||||
|       env: VERSION=-5 |  | ||||||
|     - compiler: clang |  | ||||||
|       addons: |  | ||||||
|         apt: |  | ||||||
|           sources: |  | ||||||
|             - ubuntu-toolchain-r-test |  | ||||||
|           packages: |  | ||||||
|             - g++-4.8 |  | ||||||
|             - libmpfr-dev |  | ||||||
|             - libgmp-dev |  | ||||||
|             - libmpc-dev |  | ||||||
|             - libopenmpi-dev |  | ||||||
|             - openmpi-bin |  | ||||||
|             - binutils-dev |  | ||||||
|       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz |  | ||||||
|     - compiler: clang |  | ||||||
|       addons: |  | ||||||
|         apt: |  | ||||||
|           sources: |  | ||||||
|             - ubuntu-toolchain-r-test |  | ||||||
|           packages: |  | ||||||
|             - g++-4.8 |  | ||||||
|             - libmpfr-dev |  | ||||||
|             - libgmp-dev |  | ||||||
|             - libmpc-dev |  | ||||||
|             - libopenmpi-dev |  | ||||||
|             - openmpi-bin |  | ||||||
|             - binutils-dev |  | ||||||
|       env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz |  | ||||||
|        |        | ||||||
| before_install: | before_install: | ||||||
|     - export GRIDDIR=`pwd` |     - export GRIDDIR=`pwd` | ||||||
| @@ -73,13 +17,15 @@ before_install: | |||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi |  | ||||||
|      |      | ||||||
| install: | install: | ||||||
|     - export CC=$CC$VERSION |     - export CC=$CC$VERSION | ||||||
|     - export CXX=$CXX$VERSION |     - export CXX=$CXX$VERSION | ||||||
|     - echo $PATH |     - echo $PATH | ||||||
|  |     - which autoconf | ||||||
|  |     - autoconf  --version | ||||||
|  |     - which automake | ||||||
|  |     - automake  --version | ||||||
|     - which $CC |     - which $CC | ||||||
|     - $CC  --version |     - $CC  --version | ||||||
|     - which $CXX |     - which $CXX | ||||||
| @@ -92,15 +38,9 @@ script: | |||||||
|     - cd build |     - cd build | ||||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4  |     - make -j4  | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 |     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||||
|     - echo make clean |     - echo make clean | ||||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4 |     - make -j4 | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 |     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||||
|     - echo make clean |     - make check | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi |  | ||||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto |  | ||||||
|     - make -j4 |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							| @@ -3,10 +3,15 @@ SUBDIRS = lib benchmarks tests extras | |||||||
|  |  | ||||||
| include $(top_srcdir)/doxygen.inc | include $(top_srcdir)/doxygen.inc | ||||||
|  |  | ||||||
| tests: all | bin_SCRIPTS=grid-config | ||||||
| 	$(MAKE) -C tests tests |  | ||||||
|  |  | ||||||
| .PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) |  | ||||||
|  | .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) | ||||||
|  |  | ||||||
|  | tests-local: all | ||||||
|  | bench-local: all | ||||||
|  | check-local: all | ||||||
|  |  | ||||||
| AM_CXXFLAGS += -I$(top_builddir)/include | AM_CXXFLAGS += -I$(top_builddir)/include | ||||||
|  |  | ||||||
| ACLOCAL_AMFLAGS = -I m4 | ACLOCAL_AMFLAGS = -I m4 | ||||||
|   | |||||||
							
								
								
									
										298
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										298
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,41 +1,13 @@ | |||||||
| # Grid | # Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid) | ||||||
| <table> |  | ||||||
| <tr> |  | ||||||
|     <td>Last stable release</td> |  | ||||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> |  | ||||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a> |  | ||||||
|     </td> |  | ||||||
| </tr> |  | ||||||
| <tr> |  | ||||||
|     <td>Development branch</td> |  | ||||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> |  | ||||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a> |  | ||||||
|     </td> |  | ||||||
| </tr> |  | ||||||
| </table> |  | ||||||
|  |  | ||||||
| **Data parallel C++ mathematical object library.** | **Data parallel C++ mathematical object library.** | ||||||
|  |  | ||||||
| License: GPL v2. | License: GPL v2. | ||||||
|  |  | ||||||
| Last update Nov 2016. | Last update June 2017. | ||||||
|  |  | ||||||
| _Please do not send pull requests to the `master` branch which is reserved for releases._ | _Please do not send pull requests to the `master` branch which is reserved for releases._ | ||||||
|  |  | ||||||
| ### Bug report |  | ||||||
|  |  | ||||||
| _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ |  | ||||||
|  |  | ||||||
| When you file an issue, please go though the following checklist: |  | ||||||
|  |  | ||||||
| 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  |  | ||||||
| 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. |  | ||||||
| 3. Give the exact `configure` command used. |  | ||||||
| 4. Attach `config.log`. |  | ||||||
| 5. Attach `config.summary`. |  | ||||||
| 6. Attach the output of `make V=1`. |  | ||||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Description | ### Description | ||||||
| @@ -58,13 +30,68 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi | |||||||
| for most programmers. | for most programmers. | ||||||
|  |  | ||||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||||
| Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). | Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. | ||||||
|  |  | ||||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. | These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.  | ||||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||||
|  |  | ||||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | MPI, OpenMP, and SIMD parallelism are present in the library. | ||||||
| Please see https://arxiv.org/abs/1512.03487 for more detail. | Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ### Compilers | ||||||
|  |  | ||||||
|  | Intel ICPC v16.0.3 and later | ||||||
|  |  | ||||||
|  | Clang v3.5 and later (need 3.8 and later for OpenMP) | ||||||
|  |  | ||||||
|  | GCC   v4.9.x (recommended) | ||||||
|  |  | ||||||
|  | GCC   v6.3 and later | ||||||
|  |  | ||||||
|  | ### Important:  | ||||||
|  |  | ||||||
|  | Some versions of GCC appear to have a bug under high optimisation (-O2, -O3). | ||||||
|  |  | ||||||
|  | The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates. | ||||||
|  |  | ||||||
|  | GCC   v5.x | ||||||
|  |  | ||||||
|  | GCC   v6.1, v6.2 | ||||||
|  |  | ||||||
|  | ### Bug report | ||||||
|  |  | ||||||
|  | _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | ||||||
|  |  | ||||||
|  | When you file an issue, please go though the following checklist: | ||||||
|  |  | ||||||
|  | 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  | ||||||
|  | 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | ||||||
|  | 3. Give the exact `configure` command used. | ||||||
|  | 4. Attach `config.log`. | ||||||
|  | 5. Attach `grid.config.summary`. | ||||||
|  | 6. Attach the output of `make V=1`. | ||||||
|  | 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||||
|  |  | ||||||
|  | ### Required libraries | ||||||
|  | Grid requires: | ||||||
|  |  | ||||||
|  | [GMP](https://gmplib.org/),  | ||||||
|  |  | ||||||
|  | [MPFR](http://www.mpfr.org/)  | ||||||
|  |  | ||||||
|  | Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. | ||||||
|  |  | ||||||
|  | Grid optionally uses: | ||||||
|  |  | ||||||
|  | [HDF5](https://support.hdfgroup.org/HDF5/)   | ||||||
|  |  | ||||||
|  | [LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.  | ||||||
|  |  | ||||||
|  | [FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. | ||||||
|  |  | ||||||
|  | LAPACK either generic version or Intel MKL library. | ||||||
|  |  | ||||||
|  |  | ||||||
| ### Quick start | ### Quick start | ||||||
| First, start by cloning the repository: | First, start by cloning the repository: | ||||||
| @@ -95,10 +122,10 @@ install Grid. Other options are detailed in the next section, you can also use ` | |||||||
| `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | ||||||
| customise the build. | customise the build. | ||||||
|  |  | ||||||
| Finally, you can build and install Grid: | Finally, you can build, check, and install Grid: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| make; make install | make; make check; make install | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | ||||||
| @@ -121,7 +148,7 @@ If you want to build all the tests at once just use `make tests`. | |||||||
| - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | ||||||
| - `--enable-precision={single|double}`: set the default precision (default: `double`). | - `--enable-precision={single|double}`: set the default precision (default: `double`). | ||||||
| - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | ||||||
| - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). | - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). | ||||||
| - `--disable-timers`: disable system dependent high-resolution timers. | - `--disable-timers`: disable system dependent high-resolution timers. | ||||||
| - `--enable-chroma`: enable Chroma regression tests. | - `--enable-chroma`: enable Chroma regression tests. | ||||||
| - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | ||||||
| @@ -135,7 +162,6 @@ The following options can be use with the `--enable-comms=` option to target dif | |||||||
| | `none`         | no communications                                             | | | `none`         | no communications                                             | | ||||||
| | `mpi[-auto]`   | MPI communications                                            | | | `mpi[-auto]`   | MPI communications                                            | | ||||||
| | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | ||||||
| | `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | |  | ||||||
| | `shmem `       | Cray SHMEM communications                                     | | | `shmem `       | Cray SHMEM communications                                     | | ||||||
|  |  | ||||||
| For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | ||||||
| @@ -153,13 +179,13 @@ The following options can be use with the `--enable-simd=` option to target diff | |||||||
| | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | ||||||
| | `AVX2`      | AVX 2 (256 bit)                        | | | `AVX2`      | AVX 2 (256 bit)                        | | ||||||
| | `AVX512`    | AVX 512 bit                            | | | `AVX512`    | AVX 512 bit                            | | ||||||
| | `QPX`       | QPX (256 bit)                          | | | `NEONv8`    | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit)                     | | ||||||
|  | | `QPX`       | IBM QPX (256 bit)                      | | ||||||
|  |  | ||||||
| Alternatively, some CPU codenames can be directly used: | Alternatively, some CPU codenames can be directly used: | ||||||
|  |  | ||||||
| | `<code>`    | Description                            | | | `<code>`    | Description                            | | ||||||
| | ----------- | -------------------------------------- | | | ----------- | -------------------------------------- | | ||||||
| | `KNC`       | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | |  | ||||||
| | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | ||||||
| | `BGQ`       | Blue Gene/Q                            | | | `BGQ`       | Blue Gene/Q                            | | ||||||
|  |  | ||||||
| @@ -177,20 +203,204 @@ The following configuration is recommended for the Intel Knights Landing platfor | |||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi-auto  \ |              --enable-comms=mpi-auto  \ | ||||||
|              --with-gmp=<path>        \ |  | ||||||
|              --with-mpfr=<path>       \ |  | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=icpc MPICXX=mpiicpc |              CXX=icpc MPICXX=mpiicpc | ||||||
| ``` | ``` | ||||||
|  | The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||||
|  |  | ||||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||||
|  |  | ||||||
| ``` bash | ``` bash | ||||||
| ../configure --enable-precision=double\ | ../configure --enable-precision=double\ | ||||||
|              --enable-simd=KNL        \ |              --enable-simd=KNL        \ | ||||||
|              --enable-comms=mpi       \ |              --enable-comms=mpi       \ | ||||||
|              --with-gmp=<path>        \ |  | ||||||
|              --with-mpfr=<path>       \ |  | ||||||
|              --enable-mkl             \ |              --enable-mkl             \ | ||||||
|              CXX=CC CC=cc |              CXX=CC CC=cc | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
|  | If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||||
|  | ``` bash | ||||||
|  |                --with-gmp=<path>        \ | ||||||
|  |                --with-mpfr=<path>       \ | ||||||
|  | ``` | ||||||
|  | where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||||
|  |  | ||||||
|  | Knight's Landing with Intel Omnipath adapters with two adapters per node  | ||||||
|  | presently performs better with use of more than one rank per node, using shared memory  | ||||||
|  | for interior communication. This is the mpi3 communications implementation.  | ||||||
|  | We recommend four ranks per node for best performance, but optimum is local volume dependent. | ||||||
|  |  | ||||||
|  | ``` bash | ||||||
|  | ../configure --enable-precision=double\ | ||||||
|  |              --enable-simd=KNL        \ | ||||||
|  |              --enable-comms=mpi3-auto \ | ||||||
|  |              --enable-mkl             \ | ||||||
|  |              CC=icpc MPICXX=mpiicpc  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ### Build setup for Intel Haswell Xeon platform | ||||||
|  |  | ||||||
|  | The following configuration is recommended for the Intel Haswell platform: | ||||||
|  |  | ||||||
|  | ``` bash | ||||||
|  | ../configure --enable-precision=double\ | ||||||
|  |              --enable-simd=AVX2       \ | ||||||
|  |              --enable-comms=mpi3-auto \ | ||||||
|  |              --enable-mkl             \ | ||||||
|  |              CXX=icpc MPICXX=mpiicpc | ||||||
|  | ``` | ||||||
|  | The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||||
|  |  | ||||||
|  | If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||||
|  | ``` bash | ||||||
|  |                --with-gmp=<path>        \ | ||||||
|  |                --with-mpfr=<path>       \ | ||||||
|  | ``` | ||||||
|  | where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||||
|  |  | ||||||
|  | If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||||
|  |  | ||||||
|  | ``` bash | ||||||
|  | ../configure --enable-precision=double\ | ||||||
|  |              --enable-simd=AVX2       \ | ||||||
|  |              --enable-comms=mpi3      \ | ||||||
|  |              --enable-mkl             \ | ||||||
|  |              CXX=CC CC=cc | ||||||
|  | ``` | ||||||
|  | Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  | ||||||
|  | one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using | ||||||
|  | ``` | ||||||
|  |         export I_MPI_PIN=1 | ||||||
|  | ``` | ||||||
|  | This is the default. | ||||||
|  |  | ||||||
|  | ### Build setup for Intel Skylake Xeon platform | ||||||
|  |  | ||||||
|  | The following configuration is recommended for the Intel Skylake platform: | ||||||
|  |  | ||||||
|  | ``` bash | ||||||
|  | ../configure --enable-precision=double\ | ||||||
|  |              --enable-simd=AVX512     \ | ||||||
|  |              --enable-comms=mpi3      \ | ||||||
|  |              --enable-mkl             \ | ||||||
|  |              CXX=mpiicpc | ||||||
|  | ``` | ||||||
|  | The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||||
|  |  | ||||||
|  | If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||||
|  | ``` bash | ||||||
|  |                --with-gmp=<path>        \ | ||||||
|  |                --with-mpfr=<path>       \ | ||||||
|  | ``` | ||||||
|  | where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||||
|  |  | ||||||
|  | If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||||
|  |  | ||||||
|  | ``` bash | ||||||
|  | ../configure --enable-precision=double\ | ||||||
|  |              --enable-simd=AVX512     \ | ||||||
|  |              --enable-comms=mpi3      \ | ||||||
|  |              --enable-mkl             \ | ||||||
|  |              CXX=CC CC=cc | ||||||
|  | ``` | ||||||
|  | Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  | ||||||
|  | one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using | ||||||
|  | ```  | ||||||
|  |         export I_MPI_PIN=1 | ||||||
|  | ``` | ||||||
|  | This is the default.  | ||||||
|  |  | ||||||
|  | #### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):  | ||||||
|  |  | ||||||
|  | mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18  | ||||||
|  |  | ||||||
|  | TBA | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ### Build setup for AMD EPYC / RYZEN | ||||||
|  |  | ||||||
|  | The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. | ||||||
|  | So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total | ||||||
|  | are common. Each chip within the module exposes a separate NUMA domain. | ||||||
|  | There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. | ||||||
|  | MPI-3 is recommended with the use of four ranks per socket, | ||||||
|  | and 8 threads per rank.  | ||||||
|  |  | ||||||
|  | The following configuration is recommended for the AMD EPYC platform. | ||||||
|  |  | ||||||
|  | ``` bash | ||||||
|  | ../configure --enable-precision=double\ | ||||||
|  |              --enable-simd=AVX2       \ | ||||||
|  |              --enable-comms=mpi3 \ | ||||||
|  |              CXX=mpicxx  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||||
|  | ``` bash | ||||||
|  |                --with-gmp=<path>        \ | ||||||
|  |                --with-mpfr=<path>       \ | ||||||
|  | ``` | ||||||
|  | where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||||
|  |  | ||||||
|  | Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. | ||||||
|  | This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.  | ||||||
|  |  | ||||||
|  | It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and | ||||||
|  | shared memory to communicate within this node: | ||||||
|  |  | ||||||
|  | mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4  | ||||||
|  |  | ||||||
|  | Where omp_bind.sh does the following: | ||||||
|  | ``` | ||||||
|  | #!/bin/bash | ||||||
|  |  | ||||||
|  | numanode=` expr $PMI_RANK % 8 ` | ||||||
|  | basecore=`expr $numanode \* 16` | ||||||
|  | core0=`expr $basecore + 0 ` | ||||||
|  | core1=`expr $basecore + 2 ` | ||||||
|  | core2=`expr $basecore + 4 ` | ||||||
|  | core3=`expr $basecore + 6 ` | ||||||
|  | core4=`expr $basecore + 8 ` | ||||||
|  | core5=`expr $basecore + 10 ` | ||||||
|  | core6=`expr $basecore + 12 ` | ||||||
|  | core7=`expr $basecore + 14 ` | ||||||
|  |  | ||||||
|  | export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" | ||||||
|  | echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY | ||||||
|  |  | ||||||
|  | $@ | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | Performance: | ||||||
|  |  | ||||||
|  | #### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):  | ||||||
|  |  | ||||||
|  | mpirun  -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 | ||||||
|  |  | ||||||
|  | TBA | ||||||
|  |  | ||||||
|  | ### Build setup for BlueGene/Q | ||||||
|  |  | ||||||
|  | To be written... | ||||||
|  |  | ||||||
|  | ### Build setup for ARM Neon | ||||||
|  |  | ||||||
|  | To be written... | ||||||
|  |  | ||||||
|  | ### Build setup for laptops, other compilers, non-cluster builds | ||||||
|  |  | ||||||
|  | Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), | ||||||
|  | and omit the enable-mkl flag.  | ||||||
|  |  | ||||||
|  | Single node builds are enabled with  | ||||||
|  | ``` | ||||||
|  |             --enable-comms=none | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | FFTW support that is not in the default search path may then enabled with | ||||||
|  | ``` | ||||||
|  |     --with-fftw=<installpath> | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										70
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										70
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,6 +1,35 @@ | |||||||
| TODO: | TODO: | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
|  | Large item work list: | ||||||
|  |  | ||||||
|  | 1)- BG/Q port and check | ||||||
|  | 2)- Christoph's local basis expansion Lanczos | ||||||
|  | 3)- Precision conversion and sort out localConvert      <-- partial | ||||||
|  |  | ||||||
|  |   - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet | ||||||
|  | 4)- Physical propagator interface | ||||||
|  | 5)- Conserved currents | ||||||
|  | 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||||
|  | 7)- HDCR resume | ||||||
|  |  | ||||||
|  | Recent DONE  | ||||||
|  |  | ||||||
|  | -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O.  <--- DONE | ||||||
|  | -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE | ||||||
|  | -- GaugeFix into central location                      <-- DONE | ||||||
|  | -- Scidac and Ildg metadata handling                   <-- DONE | ||||||
|  | -- Binary I/O MPI2 IO                                  <-- DONE | ||||||
|  | -- Binary I/O speed up & x-strips                      <-- DONE | ||||||
|  | -- Cut down the exterior overhead                      <-- DONE | ||||||
|  | -- Interior legs from SHM comms                        <-- DONE | ||||||
|  | -- Half-precision comms                                <-- DONE | ||||||
|  | -- Merge high precision reduction into develop         <-- DONE | ||||||
|  | -- BlockCG, BCGrQ                                      <-- DONE | ||||||
|  | -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE | ||||||
|  |    -- slice* linalg routines for multiRHS, BlockCG     | ||||||
|  |  | ||||||
|  | ----- | ||||||
| * Forces; the UdSdU  term in gauge force term is half of what I think it should | * Forces; the UdSdU  term in gauge force term is half of what I think it should | ||||||
|   be. This is a consequence of taking ONLY the first term in: |   be. This is a consequence of taking ONLY the first term in: | ||||||
|  |  | ||||||
| @@ -21,16 +50,8 @@ TODO: | |||||||
|   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. |   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. | ||||||
|   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. |   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. | ||||||
|  |  | ||||||
|  |  | ||||||
| Policies: |  | ||||||
|  |  | ||||||
| * Link smearing/boundary conds; Policy class based implementation ; framework more in place |  | ||||||
|  |  | ||||||
| * Support different boundary conditions (finite temp, chem. potential ... ) | * Support different boundary conditions (finite temp, chem. potential ... ) | ||||||
|  |  | ||||||
| * Support different fermion representations?  |  | ||||||
|   - contained entirely within the integrator presently |  | ||||||
|  |  | ||||||
| - Sign of force term. | - Sign of force term. | ||||||
|  |  | ||||||
| - Reversibility test. | - Reversibility test. | ||||||
| @@ -41,11 +62,6 @@ Policies: | |||||||
|  |  | ||||||
| - Audit oIndex usage for cb behaviour | - Audit oIndex usage for cb behaviour | ||||||
|  |  | ||||||
| - Rectangle gauge actions. |  | ||||||
|   Iwasaki, |  | ||||||
|   Symanzik, |  | ||||||
|   ... etc... |  | ||||||
|  |  | ||||||
| - Prepare multigrid for HMC. - Alternate setup schemes. | - Prepare multigrid for HMC. - Alternate setup schemes. | ||||||
|  |  | ||||||
| - Support for ILDG --- ugly, not done | - Support for ILDG --- ugly, not done | ||||||
| @@ -55,9 +71,11 @@ Policies: | |||||||
| - FFTnD ? | - FFTnD ? | ||||||
|  |  | ||||||
| - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | ||||||
|  |  | ||||||
| - Gparity force term; Gparity (R)HMC. | - Gparity force term; Gparity (R)HMC. | ||||||
| - Random number state save restore |  | ||||||
| - Mobius implementation clean up to rmove #if 0 stale code sequences | - Mobius implementation clean up to rmove #if 0 stale code sequences | ||||||
|  |  | ||||||
| - CG -- profile carefully, kernel fusion, whole CG performance measurements. | - CG -- profile carefully, kernel fusion, whole CG performance measurements. | ||||||
|  |  | ||||||
| ================================================================ | ================================================================ | ||||||
| @@ -90,6 +108,7 @@ Insert/Extract | |||||||
| Not sure of status of this -- reverify. Things are working nicely now though. | Not sure of status of this -- reverify. Things are working nicely now though. | ||||||
|  |  | ||||||
| * Make the Tensor types and Complex etc... play more nicely. | * Make the Tensor types and Complex etc... play more nicely. | ||||||
|  |  | ||||||
|   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > |   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > | ||||||
|     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I |     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I | ||||||
|     want to introduce a syntax that does not require this. |     want to introduce a syntax that does not require this. | ||||||
| @@ -112,6 +131,8 @@ Not sure of status of this -- reverify. Things are working nicely now though. | |||||||
| RECENT | RECENT | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
|  |   - Support different fermion representations? -- DONE | ||||||
|  |   - contained entirely within the integrator presently | ||||||
|   - Clean up HMC                                                             -- DONE |   - Clean up HMC                                                             -- DONE | ||||||
|   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE |   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE | ||||||
|   - Simplified the integrators a bit.                                        -- DONE |   - Simplified the integrators a bit.                                        -- DONE | ||||||
| @@ -123,6 +144,26 @@ RECENT | |||||||
|   - Parallel io improvements                                  -- DONE |   - Parallel io improvements                                  -- DONE | ||||||
|   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE |   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE | ||||||
|  |  | ||||||
|  |  | ||||||
|  | DONE: | ||||||
|  | - MultiArray -- MultiRHS done | ||||||
|  | - ConjugateGradientMultiShift -- DONE | ||||||
|  | - MCR                         -- DONE | ||||||
|  | - Remez -- Mike or Boost?     -- DONE | ||||||
|  | - Proto (ET)                  -- DONE | ||||||
|  | - uBlas                       -- DONE ; Eigen | ||||||
|  | - Potentially Useful Boost libraries -- DONE ; Eigen | ||||||
|  | - Aligned allocator; memory pool -- DONE | ||||||
|  | - Multiprecision              -- DONE | ||||||
|  | - Serialization               -- DONE | ||||||
|  | - Regex -- Not needed | ||||||
|  | - Tokenize -- Why? | ||||||
|  |  | ||||||
|  | - Random number state save restore -- DONE | ||||||
|  | - Rectangle gauge actions. -- DONE | ||||||
|  |   Iwasaki, | ||||||
|  |   Symanzik, | ||||||
|  |   ... etc... | ||||||
| Done: Cayley, Partial , ContFrac force terms. | Done: Cayley, Partial , ContFrac force terms. | ||||||
|  |  | ||||||
| DONE | DONE | ||||||
| @@ -207,6 +248,7 @@ Done | |||||||
| FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | ||||||
| ====================================================================================================== | ====================================================================================================== | ||||||
|  |  | ||||||
|  | * Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE | ||||||
| * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | ||||||
|   user pass these? Is this a QCD specific? |   user pass these? Is this a QCD specific? | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								VERSION
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								VERSION
									
									
									
									
									
								
							| @@ -1,6 +1,5 @@ | |||||||
| Version : 0.6.0 | Version : 0.7.0 | ||||||
|  |  | ||||||
| - AVX512, AVX2, AVX, SSE good | - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended | ||||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above | - MPI and MPI3 comms optimisations for KNL and OPA finished | ||||||
| - MPI and MPI3 | - Half precision comms | ||||||
| - HiRep, Smearing, Generic gauge group |  | ||||||
|   | |||||||
							
								
								
									
										775
									
								
								benchmarks/Benchmark_ITT.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										775
									
								
								benchmarks/Benchmark_ITT.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,775 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./benchmarks/Benchmark_memory_bandwidth.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Grid::QCD; | ||||||
|  |  | ||||||
|  | typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; | ||||||
|  | typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; | ||||||
|  | typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | std::vector<int> L_list; | ||||||
|  | std::vector<int> Ls_list; | ||||||
|  | std::vector<double> mflop_list; | ||||||
|  |  | ||||||
|  | double mflop_ref; | ||||||
|  | double mflop_ref_err; | ||||||
|  |  | ||||||
|  | int NN_global; | ||||||
|  |  | ||||||
|  | struct time_statistics{ | ||||||
|  |   double mean; | ||||||
|  |   double err; | ||||||
|  |   double min; | ||||||
|  |   double max; | ||||||
|  |  | ||||||
|  |   void statistics(std::vector<double> v){ | ||||||
|  |       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||||
|  |       mean = sum / v.size(); | ||||||
|  |  | ||||||
|  |       std::vector<double> diff(v.size()); | ||||||
|  |       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||||
|  |       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||||
|  |       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||||
|  |  | ||||||
|  |       auto result = std::minmax_element(v.begin(), v.end()); | ||||||
|  |       min = *result.first; | ||||||
|  |       max = *result.second; | ||||||
|  | } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | void comms_header(){ | ||||||
|  |   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||||
|  |             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | Gamma::Algebra Gmu [] = { | ||||||
|  |   Gamma::Algebra::GammaX, | ||||||
|  |   Gamma::Algebra::GammaY, | ||||||
|  |   Gamma::Algebra::GammaZ, | ||||||
|  |   Gamma::Algebra::GammaT | ||||||
|  | }; | ||||||
|  | struct controls { | ||||||
|  |   int Opt; | ||||||
|  |   int CommsOverlap; | ||||||
|  |   Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; | ||||||
|  |   //  int HugePages; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class Benchmark { | ||||||
|  | public: | ||||||
|  |   static void Decomposition (void ) { | ||||||
|  |  | ||||||
|  |     int threads = GridThread::GetThreads(); | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; | ||||||
|  |     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"\tvReal          : "<<sizeof(vReal )*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"\tvRealD         : "<<sizeof(vRealD)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"\tvComplex       : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"\tvComplexF      : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   static void Comms(void) | ||||||
|  |   { | ||||||
|  |     int Nloop=200; | ||||||
|  |     int nmu=0; | ||||||
|  |     int maxlat=32; | ||||||
|  |  | ||||||
|  |     std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); | ||||||
|  |     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |  | ||||||
|  |     for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||||
|  |  | ||||||
|  |     std::vector<double> t_time(Nloop); | ||||||
|  |     time_statistics timestat; | ||||||
|  |  | ||||||
|  |     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|  |     comms_header(); | ||||||
|  |  | ||||||
|  |     for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|  |       for(int Ls=8;Ls<=8;Ls*=2){ | ||||||
|  |  | ||||||
|  | 	std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|  | 	      lat*mpi_layout[1], | ||||||
|  | 	      lat*mpi_layout[2], | ||||||
|  | 	      lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|  | 	GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  | 	RealD Nrank = Grid._Nprocessors; | ||||||
|  | 	RealD Nnode = Grid.NodeCount(); | ||||||
|  | 	RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|  | 	std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|  | 	std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
|  | 	Grid.ShmBufferFreeAll(); | ||||||
|  | 	for(int d=0;d<8;d++){ | ||||||
|  | 	  xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	  rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	  bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	  bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  | 	int ncomm; | ||||||
|  | 	double dbytes; | ||||||
|  | 	std::vector<double> times(Nloop); | ||||||
|  | 	for(int i=0;i<Nloop;i++){ | ||||||
|  |  | ||||||
|  | 	  double start=usecond(); | ||||||
|  |  | ||||||
|  | 	  dbytes=0; | ||||||
|  | 	  ncomm=0; | ||||||
|  |  | ||||||
|  | 	  parallel_for(int dir=0;dir<8;dir++){ | ||||||
|  |  | ||||||
|  | 	    double tbytes; | ||||||
|  | 	    int mu =dir % 4; | ||||||
|  |  | ||||||
|  | 	    if (mpi_layout[mu]>1 ) { | ||||||
|  | 	         | ||||||
|  | 	      int xmit_to_rank; | ||||||
|  | 	      int recv_from_rank; | ||||||
|  | 	      if ( dir == mu ) {  | ||||||
|  | 		int comm_proc=1; | ||||||
|  | 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	      } else {  | ||||||
|  | 		int comm_proc = mpi_layout[mu]-1; | ||||||
|  | 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	      } | ||||||
|  | 	      tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | ||||||
|  | 						 (void *)&rbuf[dir][0], recv_from_rank, | ||||||
|  | 						 bytes,dir); | ||||||
|  | 	   | ||||||
|  | #ifdef GRID_OMP | ||||||
|  | #pragma omp atomic | ||||||
|  | #endif | ||||||
|  | 	      ncomm++; | ||||||
|  |  | ||||||
|  | #ifdef GRID_OMP | ||||||
|  | #pragma omp atomic | ||||||
|  | #endif | ||||||
|  | 	      dbytes+=tbytes; | ||||||
|  | 	    } | ||||||
|  | 	  } | ||||||
|  | 	  Grid.Barrier(); | ||||||
|  | 	  double stop=usecond(); | ||||||
|  | 	  t_time[i] = stop-start; // microseconds | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	timestat.statistics(t_time); | ||||||
|  | 	//	for(int i=0;i<t_time.size();i++){ | ||||||
|  | 	//	  std::cout << i<<" "<<t_time[i]<<std::endl; | ||||||
|  | 	//	} | ||||||
|  |  | ||||||
|  | 	dbytes=dbytes*ppn; | ||||||
|  | 	double xbytes    = dbytes*0.5; | ||||||
|  | 	double rbytes    = dbytes*0.5; | ||||||
|  | 	double bidibytes = dbytes; | ||||||
|  |  | ||||||
|  | 	std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  | 		 <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  | 		 <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  | 		 <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  | 		 << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  | 		 << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |  | ||||||
|  |   | ||||||
|  | 	 | ||||||
|  | 	    } | ||||||
|  |     }     | ||||||
|  |  | ||||||
|  |     return; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   static void Memory(void) | ||||||
|  |   { | ||||||
|  |     const int Nvec=8; | ||||||
|  |     typedef Lattice< iVector< vReal,Nvec> > LatticeVec; | ||||||
|  |     typedef iVector<vReal,Nvec> Vec; | ||||||
|  |  | ||||||
|  |     std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); | ||||||
|  |     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |  | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |    | ||||||
|  |   uint64_t lmax=48; | ||||||
|  | #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) | ||||||
|  |  | ||||||
|  |     GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |     for(int lat=8;lat<=lmax;lat+=4){ | ||||||
|  |  | ||||||
|  |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|  |       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|  |       Vec rn ; random(sRNG,rn); | ||||||
|  |  | ||||||
|  |       LatticeVec z(&Grid); z=rn; | ||||||
|  |       LatticeVec x(&Grid); x=rn; | ||||||
|  |       LatticeVec y(&Grid); y=rn; | ||||||
|  |       double a=2.0; | ||||||
|  |  | ||||||
|  |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|  |       double start=usecond(); | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|  | 	z=a*x-y; | ||||||
|  |         x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away | ||||||
|  |         y._odata[4]=z._odata[4]; | ||||||
|  |       } | ||||||
|  |       double stop=usecond(); | ||||||
|  |       double time = (stop-start)/Nloop*1000; | ||||||
|  |       | ||||||
|  |       double flops=vol*Nvec*2;// mul,add | ||||||
|  |       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||||
|  |       std::cout<<GridLogMessage<<std::setprecision(3)  | ||||||
|  | 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   static double DWF5(int Ls,int L) | ||||||
|  |   { | ||||||
|  |     RealD mass=0.1; | ||||||
|  |     RealD M5  =1.8; | ||||||
|  |  | ||||||
|  |     double mflops; | ||||||
|  |     double mflops_best = 0; | ||||||
|  |     double mflops_worst= 0; | ||||||
|  |     std::vector<double> mflops_all; | ||||||
|  |  | ||||||
|  |     /////////////////////////////////////////////////////// | ||||||
|  |     // Set/Get the layout & grid size | ||||||
|  |     /////////////////////////////////////////////////////// | ||||||
|  |     int threads = GridThread::GetThreads(); | ||||||
|  |     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); | ||||||
|  |     std::vector<int> local({L,L,L,L}); | ||||||
|  |  | ||||||
|  |     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  | ||||||
|  | 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|  |     uint64_t NP = TmpGrid->RankCount(); | ||||||
|  |     uint64_t NN = TmpGrid->NodeCount(); | ||||||
|  |     NN_global=NN; | ||||||
|  |     uint64_t SHM=NP/NN; | ||||||
|  |  | ||||||
|  |     std::vector<int> internal; | ||||||
|  |     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); | ||||||
|  |     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); | ||||||
|  |     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); | ||||||
|  |     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); | ||||||
|  |     else assert(0); | ||||||
|  |  | ||||||
|  |     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); | ||||||
|  |     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); | ||||||
|  |  | ||||||
|  |     ///////// Welcome message //////////// | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |     ///////// Lattice Init //////////// | ||||||
|  |     GridCartesian         * UGrid    = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|  |     GridRedBlackCartesian * UrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|  |     GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); | ||||||
|  |     GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); | ||||||
|  |     GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); | ||||||
|  |     GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); | ||||||
|  |  | ||||||
|  |     ///////// RNG Init //////////// | ||||||
|  |     std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |     std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |     GridParallelRNG          RNG5(sFGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||||
|  |     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||||
|  |  | ||||||
|  |     ///////// Source preparation //////////// | ||||||
|  |     LatticeFermion src   (sFGrid); random(RNG5,src); | ||||||
|  |     LatticeFermion tmp   (sFGrid); | ||||||
|  |  | ||||||
|  |     RealD N2 = 1.0/::sqrt(norm2(src)); | ||||||
|  |     src = src*N2; | ||||||
|  |      | ||||||
|  |     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  | ||||||
|  |  | ||||||
|  |     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); | ||||||
|  |     LatticeFermion src_e (sFrbGrid); | ||||||
|  |     LatticeFermion src_o (sFrbGrid); | ||||||
|  |     LatticeFermion r_e   (sFrbGrid); | ||||||
|  |     LatticeFermion r_o   (sFrbGrid); | ||||||
|  |     LatticeFermion r_eo  (sFGrid); | ||||||
|  |     LatticeFermion err   (sFGrid); | ||||||
|  |     { | ||||||
|  |  | ||||||
|  |       pickCheckerboard(Even,src_e,src); | ||||||
|  |       pickCheckerboard(Odd,src_o,src); | ||||||
|  |  | ||||||
|  | #if defined(AVX512)  | ||||||
|  |       const int num_cases = 6; | ||||||
|  |       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); | ||||||
|  | #else | ||||||
|  |       const int num_cases = 4; | ||||||
|  |       std::string fmt("U/S ; U/O ; G/S ; G/O "); | ||||||
|  | #endif | ||||||
|  |       controls Cases [] = { | ||||||
|  | #ifdef AVX512 | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | #endif | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } | ||||||
|  |       };  | ||||||
|  |  | ||||||
|  |       for(int c=0;c<num_cases;c++) { | ||||||
|  |  | ||||||
|  | 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; | ||||||
|  | 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; | ||||||
|  | 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); | ||||||
|  |  | ||||||
|  | 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|  | 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  | 	int nwarm = 100; | ||||||
|  | 	double t0=usecond(); | ||||||
|  | 	sFGrid->Barrier(); | ||||||
|  | 	for(int i=0;i<nwarm;i++){ | ||||||
|  | 	  sDw.DhopEO(src_o,r_e,DaggerNo); | ||||||
|  | 	} | ||||||
|  | 	sFGrid->Barrier(); | ||||||
|  | 	double t1=usecond(); | ||||||
|  | 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); | ||||||
|  | 	//	if (ncall < 500) ncall = 500; | ||||||
|  | 	uint64_t ncall = 500; | ||||||
|  |  | ||||||
|  | 	sFGrid->Broadcast(0,&ncall,sizeof(ncall)); | ||||||
|  |  | ||||||
|  | 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; | ||||||
|  | 	sDw.ZeroCounters(); | ||||||
|  |  | ||||||
|  | 	time_statistics timestat; | ||||||
|  | 	std::vector<double> t_time(ncall); | ||||||
|  | 	for(uint64_t i=0;i<ncall;i++){ | ||||||
|  | 	  t0=usecond(); | ||||||
|  | 	  sDw.DhopEO(src_o,r_e,DaggerNo); | ||||||
|  | 	  t1=usecond(); | ||||||
|  | 	  t_time[i] = t1-t0; | ||||||
|  | 	} | ||||||
|  | 	sFGrid->Barrier(); | ||||||
|  | 	 | ||||||
|  | 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||||
|  | 	double flops=(1344.0*volume)/2; | ||||||
|  | 	double mf_hi, mf_lo, mf_err; | ||||||
|  |  | ||||||
|  | 	timestat.statistics(t_time); | ||||||
|  | 	mf_hi = flops/timestat.min; | ||||||
|  | 	mf_lo = flops/timestat.max; | ||||||
|  | 	mf_err= flops/timestat.min * timestat.err/timestat.mean; | ||||||
|  |  | ||||||
|  | 	mflops = flops/timestat.mean; | ||||||
|  | 	mflops_all.push_back(mflops); | ||||||
|  | 	if ( mflops_best == 0   ) mflops_best = mflops; | ||||||
|  | 	if ( mflops_worst== 0   ) mflops_worst= mflops; | ||||||
|  | 	if ( mflops>mflops_best ) mflops_best = mflops; | ||||||
|  | 	if ( mflops<mflops_worst) mflops_worst= mflops; | ||||||
|  |  | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank   "<< mflops/NP<<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node   "<< mflops/NN<<std::endl; | ||||||
|  |  | ||||||
|  | 	sDw.Report(); | ||||||
|  |  | ||||||
|  |       } | ||||||
|  |       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< mflops_worst/mflops_best <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage <<fmt << std::endl; | ||||||
|  |       std::cout<<GridLogMessage ; | ||||||
|  |  | ||||||
|  |       for(int i=0;i<mflops_all.size();i++){ | ||||||
|  | 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||||
|  |       } | ||||||
|  |       std::cout<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |     return mflops_best; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   static double DWF(int Ls,int L) | ||||||
|  |   { | ||||||
|  |     RealD mass=0.1; | ||||||
|  |     RealD M5  =1.8; | ||||||
|  |  | ||||||
|  |     double mflops; | ||||||
|  |     double mflops_best = 0; | ||||||
|  |     double mflops_worst= 0; | ||||||
|  |     std::vector<double> mflops_all; | ||||||
|  |  | ||||||
|  |     /////////////////////////////////////////////////////// | ||||||
|  |     // Set/Get the layout & grid size | ||||||
|  |     /////////////////////////////////////////////////////// | ||||||
|  |     int threads = GridThread::GetThreads(); | ||||||
|  |     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); | ||||||
|  |     std::vector<int> local({L,L,L,L}); | ||||||
|  |  | ||||||
|  |     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  | ||||||
|  | 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|  |     uint64_t NP = TmpGrid->RankCount(); | ||||||
|  |     uint64_t NN = TmpGrid->NodeCount(); | ||||||
|  |     NN_global=NN; | ||||||
|  |     uint64_t SHM=NP/NN; | ||||||
|  |  | ||||||
|  |     std::vector<int> internal; | ||||||
|  |     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); | ||||||
|  |     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); | ||||||
|  |     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); | ||||||
|  |     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); | ||||||
|  |     else assert(0); | ||||||
|  |  | ||||||
|  |     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); | ||||||
|  |     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); | ||||||
|  |  | ||||||
|  |     ///////// Welcome message //////////// | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     ///////// Lattice Init //////////// | ||||||
|  |     GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|  |     GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|  |     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
|  |     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||||
|  |  | ||||||
|  |      | ||||||
|  |     ///////// RNG Init //////////// | ||||||
|  |     std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |     std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||||
|  |     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||||
|  |  | ||||||
|  |     ///////// Source preparation //////////// | ||||||
|  |     LatticeFermion src   (FGrid); random(RNG5,src); | ||||||
|  |     LatticeFermion ref   (FGrid); | ||||||
|  |     LatticeFermion tmp   (FGrid); | ||||||
|  |  | ||||||
|  |     RealD N2 = 1.0/::sqrt(norm2(src)); | ||||||
|  |     src = src*N2; | ||||||
|  |      | ||||||
|  |     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  | ||||||
|  |  | ||||||
|  |     DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |  | ||||||
|  |     //////////////////////////////////// | ||||||
|  |     // Naive wilson implementation | ||||||
|  |     //////////////////////////////////// | ||||||
|  |     { | ||||||
|  |       LatticeGaugeField Umu5d(FGrid);  | ||||||
|  |       std::vector<LatticeColourMatrix> U(4,FGrid); | ||||||
|  |       for(int ss=0;ss<Umu._grid->oSites();ss++){ | ||||||
|  | 	for(int s=0;s<Ls;s++){ | ||||||
|  | 	  Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |       ref = zero; | ||||||
|  |       for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); | ||||||
|  |       } | ||||||
|  |       for(int mu=0;mu<Nd;mu++){ | ||||||
|  | 	 | ||||||
|  | 	tmp = U[mu]*Cshift(src,mu+1,1); | ||||||
|  | 	ref=ref + tmp - Gamma(Gmu[mu])*tmp; | ||||||
|  | 	 | ||||||
|  | 	tmp =adj(U[mu])*src; | ||||||
|  | 	tmp =Cshift(tmp,mu+1,-1); | ||||||
|  | 	ref=ref + tmp + Gamma(Gmu[mu])*tmp; | ||||||
|  |       } | ||||||
|  |       ref = -0.5*ref; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     LatticeFermion src_e (FrbGrid); | ||||||
|  |     LatticeFermion src_o (FrbGrid); | ||||||
|  |     LatticeFermion r_e   (FrbGrid); | ||||||
|  |     LatticeFermion r_o   (FrbGrid); | ||||||
|  |     LatticeFermion r_eo  (FGrid); | ||||||
|  |     LatticeFermion err   (FGrid); | ||||||
|  |     { | ||||||
|  |  | ||||||
|  |       pickCheckerboard(Even,src_e,src); | ||||||
|  |       pickCheckerboard(Odd,src_o,src); | ||||||
|  |  | ||||||
|  | #if defined(AVX512)  | ||||||
|  |       const int num_cases = 6; | ||||||
|  |       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); | ||||||
|  | #else | ||||||
|  |       const int num_cases = 4; | ||||||
|  |       std::string fmt("U/S ; U/O ; G/S ; G/O "); | ||||||
|  | #endif | ||||||
|  |       controls Cases [] = { | ||||||
|  | #ifdef AVX512 | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | #endif | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||||
|  | 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } | ||||||
|  |       };  | ||||||
|  |  | ||||||
|  |       for(int c=0;c<num_cases;c++) { | ||||||
|  |  | ||||||
|  | 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; | ||||||
|  | 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; | ||||||
|  | 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); | ||||||
|  |  | ||||||
|  | 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  | 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|  | 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  | 	int nwarm = 200; | ||||||
|  | 	double t0=usecond(); | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  | 	for(int i=0;i<nwarm;i++){ | ||||||
|  | 	  Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
|  | 	} | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  | 	double t1=usecond(); | ||||||
|  | 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); | ||||||
|  | 	//	if (ncall < 500) ncall = 500; | ||||||
|  | 	uint64_t ncall = 1000; | ||||||
|  |  | ||||||
|  | 	FGrid->Broadcast(0,&ncall,sizeof(ncall)); | ||||||
|  |  | ||||||
|  | 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; | ||||||
|  | 	Dw.ZeroCounters(); | ||||||
|  |  | ||||||
|  | 	time_statistics timestat; | ||||||
|  | 	std::vector<double> t_time(ncall); | ||||||
|  | 	for(uint64_t i=0;i<ncall;i++){ | ||||||
|  | 	  t0=usecond(); | ||||||
|  | 	  Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
|  | 	  t1=usecond(); | ||||||
|  | 	  t_time[i] = t1-t0; | ||||||
|  | 	} | ||||||
|  | 	FGrid->Barrier(); | ||||||
|  | 	 | ||||||
|  | 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||||
|  | 	double flops=(1344.0*volume)/2; | ||||||
|  | 	double mf_hi, mf_lo, mf_err; | ||||||
|  |  | ||||||
|  | 	timestat.statistics(t_time); | ||||||
|  | 	mf_hi = flops/timestat.min; | ||||||
|  | 	mf_lo = flops/timestat.max; | ||||||
|  | 	mf_err= flops/timestat.min * timestat.err/timestat.mean; | ||||||
|  |  | ||||||
|  | 	mflops = flops/timestat.mean; | ||||||
|  | 	mflops_all.push_back(mflops); | ||||||
|  | 	if ( mflops_best == 0   ) mflops_best = mflops; | ||||||
|  | 	if ( mflops_worst== 0   ) mflops_worst= mflops; | ||||||
|  | 	if ( mflops>mflops_best ) mflops_best = mflops; | ||||||
|  | 	if ( mflops<mflops_worst) mflops_worst= mflops; | ||||||
|  |  | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl; | ||||||
|  |  | ||||||
|  | 	Dw.Report(); | ||||||
|  |  | ||||||
|  | 	Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
|  | 	Dw.DhopOE(src_e,r_o,DaggerNo); | ||||||
|  | 	setCheckerboard(r_eo,r_o); | ||||||
|  | 	setCheckerboard(r_eo,r_e); | ||||||
|  | 	err = r_eo-ref;  | ||||||
|  | 	std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  | 	assert((norm2(err)<1.0e-4)); | ||||||
|  |  | ||||||
|  |       } | ||||||
|  |       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< mflops_worst/mflops_best <<std::endl; | ||||||
|  |       std::cout<<GridLogMessage <<fmt << std::endl; | ||||||
|  |       std::cout<<GridLogMessage ; | ||||||
|  |  | ||||||
|  |       for(int i=0;i<mflops_all.size();i++){ | ||||||
|  | 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||||
|  |       } | ||||||
|  |       std::cout<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |     return mflops_best; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); | ||||||
|  | #ifdef KNL | ||||||
|  |   LebesgueOrder::Block = std::vector<int>({8,2,2,2}); | ||||||
|  | #else | ||||||
|  |   LebesgueOrder::Block = std::vector<int>({2,2,2,2}); | ||||||
|  | #endif | ||||||
|  |   Benchmark::Decomposition(); | ||||||
|  |  | ||||||
|  |   int do_memory=1; | ||||||
|  |   int do_comms =1; | ||||||
|  |   int do_su3   =0; | ||||||
|  |   int do_wilson=1; | ||||||
|  |   int do_dwf   =1; | ||||||
|  |  | ||||||
|  |   if ( do_memory ) { | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     Benchmark::Memory(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ( do_comms ) { | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     Benchmark::Comms(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ( do_su3 ) { | ||||||
|  |     // empty for now | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   int sel=2; | ||||||
|  |   std::vector<int> L_list({8,12,16,24}); | ||||||
|  |   std::vector<double> wilson; | ||||||
|  |   std::vector<double> dwf4; | ||||||
|  |   std::vector<double> dwf5; | ||||||
|  |  | ||||||
|  |   if ( do_wilson ) { | ||||||
|  |     int Ls=1; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     for(int l=0;l<L_list.size();l++){ | ||||||
|  |       wilson.push_back(Benchmark::DWF(1,L_list[l])); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   int Ls=16; | ||||||
|  |   if ( do_dwf ) { | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     for(int l=0;l<L_list.size();l++){ | ||||||
|  |       dwf4.push_back(Benchmark::DWF(Ls,L_list[l])); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ( do_dwf ) { | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |     for(int l=0;l<L_list.size();l++){ | ||||||
|  |       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl; | ||||||
|  |   for(int l=0;l<L_list.size();l++){ | ||||||
|  |     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; | ||||||
|  |   } | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   int NN=NN_global; | ||||||
|  |   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; | ||||||
|  |   for(int l=0;l<L_list.size();l++){ | ||||||
|  |     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; | ||||||
|  |   } | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << " Comparison point result: "  << dwf4[sel]/NN <<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
| @@ -31,6 +31,32 @@ using namespace std; | |||||||
| using namespace Grid; | using namespace Grid; | ||||||
| using namespace Grid::QCD; | using namespace Grid::QCD; | ||||||
|  |  | ||||||
|  | struct time_statistics{ | ||||||
|  |   double mean; | ||||||
|  |   double err; | ||||||
|  |   double min; | ||||||
|  |   double max; | ||||||
|  |  | ||||||
|  |   void statistics(std::vector<double> v){ | ||||||
|  |       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||||
|  |       mean = sum / v.size(); | ||||||
|  |  | ||||||
|  |       std::vector<double> diff(v.size()); | ||||||
|  |       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||||
|  |       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||||
|  |       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||||
|  |  | ||||||
|  |       auto result = std::minmax_element(v.begin(), v.end()); | ||||||
|  |       min = *result.first; | ||||||
|  |       max = *result.second; | ||||||
|  | } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | void header(){ | ||||||
|  |   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||||
|  |             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||||
|  | }; | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
| @@ -40,17 +66,21 @@ int main (int argc, char ** argv) | |||||||
|   int threads = GridThread::GetThreads(); |   int threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   int Nloop=10; |   int Nloop=100; | ||||||
|   int nmu=0; |   int nmu=0; | ||||||
|  |   int maxlat=32; | ||||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; |   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; | ||||||
|  |   std::vector<double> t_time(Nloop); | ||||||
|  |   time_statistics timestat; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|   int maxlat=24; |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=8;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -58,15 +88,23 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);	 | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |       for(int mu=0;mu<8;mu++){ | ||||||
|  | 	xbuf[mu].resize(lat*lat*lat*Ls); | ||||||
|  | 	rbuf[mu].resize(lat*lat*lat*Ls); | ||||||
|  | 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; | ||||||
|  |       } | ||||||
|  |  | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  |       double start=usecond(); | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| @@ -79,7 +117,6 @@ int main (int argc, char ** argv) | |||||||
| 	    int comm_proc=1; | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     |  | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
| 	    Grid.SendToRecvFromBegin(requests, | 	    Grid.SendToRecvFromBegin(requests, | ||||||
| 				   (void *)&xbuf[mu][0], | 				   (void *)&xbuf[mu][0], | ||||||
| @@ -102,18 +139,24 @@ int main (int argc, char ** argv) | |||||||
| 	} | 	} | ||||||
| 	Grid.SendToRecvFromComplete(requests); | 	Grid.SendToRecvFromComplete(requests); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  |  | ||||||
|       } |  | ||||||
| 	double stop=usecond(); | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
|  |       } | ||||||
|  |  | ||||||
|       double dbytes    = bytes; |       timestat.statistics(t_time); | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |  | ||||||
|  |       double dbytes    = bytes*ppn; | ||||||
|  |       double xbytes    = dbytes*2.0*ncomm; | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       double time = stop-start; // microseconds |       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
| @@ -121,25 +164,32 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|  |  | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=8;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); |       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8); | ||||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); |       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); | ||||||
|  |  | ||||||
|  |       for(int mu=0;mu<8;mu++){ | ||||||
|  | 	xbuf[mu].resize(lat*lat*lat*Ls); | ||||||
|  | 	rbuf[mu].resize(lat*lat*lat*Ls); | ||||||
|  | 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; | ||||||
|  |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  |       double start=usecond(); | ||||||
|      |      | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| @@ -178,30 +228,37 @@ int main (int argc, char ** argv) | |||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       double stop=usecond(); |       timestat.statistics(t_time); | ||||||
|        |        | ||||||
|       double dbytes    = bytes; |       double dbytes    = bytes*ppn; | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |       double xbytes    = dbytes*2.0*ncomm; | ||||||
|       double rbytes    = xbytes; |       double rbytes    = xbytes; | ||||||
|       double bidibytes = xbytes+rbytes; |       double bidibytes = xbytes+rbytes; | ||||||
|  |  | ||||||
|       double time = stop-start; |     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |  | ||||||
|        |        | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }   |   }   | ||||||
|  |  | ||||||
|  |  | ||||||
|   Nloop=10; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=8;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -209,6 +266,9 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -216,73 +276,86 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double dbytes; | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  | 	double start=usecond(); | ||||||
|  |  | ||||||
|  | 	dbytes=0; | ||||||
|  | 	ncomm=0; | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  |  | ||||||
| 	ncomm=0; |  | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
|  |  | ||||||
| 	  if (mpi_layout[mu]>1 ) { | 	  if (mpi_layout[mu]>1 ) { | ||||||
| 	   | 	   | ||||||
| 	    ncomm++; | 	    ncomm++; | ||||||
| 	    int comm_proc=1; | 	    int comm_proc=1; | ||||||
| 	    int xmit_to_rank; | 	    int xmit_to_rank; | ||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     |  | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu][0], | 					      (void *)&xbuf[mu][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| 					      (void *)&rbuf[mu][0], | 					      (void *)&rbuf[mu][0], | ||||||
| 					      recv_from_rank, | 					      recv_from_rank, | ||||||
| 					    bytes); | 					      bytes,mu); | ||||||
| 	 | 	 | ||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu+4][0], | 					      (void *)&xbuf[mu+4][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| 					      (void *)&rbuf[mu+4][0], | 					      (void *)&rbuf[mu+4][0], | ||||||
| 					      recv_from_rank, | 					      recv_from_rank, | ||||||
| 					    bytes); | 					      bytes,mu+4); | ||||||
| 	   | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.StencilSendToRecvFromComplete(requests); | 	Grid.StencilSendToRecvFromComplete(requests,0); | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  |  | ||||||
|       } |  | ||||||
| 	double stop=usecond(); | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
| 	 | 	 | ||||||
|       double dbytes    = bytes; |       } | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |  | ||||||
|       double rbytes    = xbytes; |       timestat.statistics(t_time); | ||||||
|       double bidibytes = xbytes+rbytes; |  | ||||||
|  |       dbytes=dbytes*ppn; | ||||||
|  |       double xbytes    = dbytes*0.5; | ||||||
|  |       double rbytes    = dbytes*0.5; | ||||||
|  |       double bidibytes = dbytes; | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |  | ||||||
|       double time = stop-start; // microseconds |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   Nloop=100; |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; |   header(); | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=maxlat;lat+=4){ |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|     for(int Ls=8;Ls<=32;Ls*=2){ |     for(int Ls=8;Ls<=8;Ls*=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|       				    lat*mpi_layout[1], |       				    lat*mpi_layout[1], | ||||||
| @@ -290,6 +363,9 @@ int main (int argc, char ** argv) | |||||||
|       				    lat*mpi_layout[3]}); |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
| @@ -297,16 +373,18 @@ int main (int argc, char ** argv) | |||||||
|       for(int d=0;d<8;d++){ |       for(int d=0;d<8;d++){ | ||||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       int ncomm; |       int ncomm; | ||||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |       double dbytes; | ||||||
|       double start=usecond(); |  | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int i=0;i<Nloop;i++){ | ||||||
|  | 	double start=usecond(); | ||||||
|  |  | ||||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  | 	dbytes=0; | ||||||
| 	ncomm=0; | 	ncomm=0; | ||||||
| 	for(int mu=0;mu<4;mu++){ | 	for(int mu=0;mu<4;mu++){ | ||||||
| 	 | 	 | ||||||
| @@ -318,44 +396,146 @@ int main (int argc, char ** argv) | |||||||
| 	    int recv_from_rank; | 	    int recv_from_rank; | ||||||
| 	     | 	     | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu][0], | 					      (void *)&xbuf[mu][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| 					      (void *)&rbuf[mu][0], | 					      (void *)&rbuf[mu][0], | ||||||
| 					      recv_from_rank, | 					      recv_from_rank, | ||||||
| 					    bytes); | 					      bytes,mu); | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | 	    Grid.StencilSendToRecvFromComplete(requests,mu); | ||||||
| 	    requests.resize(0); | 	    requests.resize(0); | ||||||
|  |  | ||||||
| 	    comm_proc = mpi_layout[mu]-1; | 	    comm_proc = mpi_layout[mu]-1; | ||||||
| 	   | 	   | ||||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    dbytes+= | ||||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | 	      Grid.StencilSendToRecvFromBegin(requests, | ||||||
| 					      (void *)&xbuf[mu+4][0], | 					      (void *)&xbuf[mu+4][0], | ||||||
| 					      xmit_to_rank, | 					      xmit_to_rank, | ||||||
| 					      (void *)&rbuf[mu+4][0], | 					      (void *)&rbuf[mu+4][0], | ||||||
| 					      recv_from_rank, | 					      recv_from_rank, | ||||||
| 					    bytes); | 					      bytes,mu+4); | ||||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | 	    Grid.StencilSendToRecvFromComplete(requests,mu+4); | ||||||
| 	    requests.resize(0); | 	    requests.resize(0); | ||||||
| 	   | 	   | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	Grid.Barrier(); | 	Grid.Barrier(); | ||||||
|  |  | ||||||
|       } |  | ||||||
| 	double stop=usecond(); | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
| 	 | 	 | ||||||
|       double dbytes    = bytes; |       } | ||||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; |  | ||||||
|       double rbytes    = xbytes; |  | ||||||
|       double bidibytes = xbytes+rbytes; |  | ||||||
|  |  | ||||||
|       double time = stop-start; // microseconds |       timestat.statistics(t_time); | ||||||
|  |  | ||||||
|  |       dbytes=dbytes*ppn; | ||||||
|  |       double xbytes    = dbytes*0.5; | ||||||
|  |       double rbytes    = dbytes*0.5; | ||||||
|  |       double bidibytes = dbytes; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|   |   | ||||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; |  | ||||||
|     } |     } | ||||||
|   }     |   }     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|  |   header(); | ||||||
|  |  | ||||||
|  |   for(int lat=4;lat<=maxlat;lat+=4){ | ||||||
|  |     for(int Ls=8;Ls<=8;Ls*=2){ | ||||||
|  |  | ||||||
|  |       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||||
|  |       				    lat*mpi_layout[1], | ||||||
|  |       				    lat*mpi_layout[2], | ||||||
|  |       				    lat*mpi_layout[3]}); | ||||||
|  |  | ||||||
|  |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |       RealD Nrank = Grid._Nprocessors; | ||||||
|  |       RealD Nnode = Grid.NodeCount(); | ||||||
|  |       RealD ppn = Nrank/Nnode; | ||||||
|  |  | ||||||
|  |       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||||
|  |       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||||
|  |       Grid.ShmBufferFreeAll(); | ||||||
|  |       for(int d=0;d<8;d++){ | ||||||
|  | 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  | 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       int ncomm; | ||||||
|  |       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||||
|  |       double dbytes; | ||||||
|  |       for(int i=0;i<Nloop;i++){ | ||||||
|  | 	double start=usecond(); | ||||||
|  |  | ||||||
|  | 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||||
|  | 	dbytes=0; | ||||||
|  | 	ncomm=0; | ||||||
|  |  | ||||||
|  | 	parallel_for(int dir=0;dir<8;dir++){ | ||||||
|  |  | ||||||
|  | 	  double tbytes; | ||||||
|  | 	  int mu =dir % 4; | ||||||
|  |  | ||||||
|  | 	  if (mpi_layout[mu]>1 ) { | ||||||
|  | 	   | ||||||
|  | 	    ncomm++; | ||||||
|  | 	    int xmit_to_rank; | ||||||
|  | 	    int recv_from_rank; | ||||||
|  | 	    if ( dir == mu ) {  | ||||||
|  | 	      int comm_proc=1; | ||||||
|  | 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    } else {  | ||||||
|  | 	      int comm_proc = mpi_layout[mu]-1; | ||||||
|  | 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||||
|  | 	    } | ||||||
|  |  | ||||||
|  | 	    tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | ||||||
|  | 					       (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); | ||||||
|  |  | ||||||
|  | #pragma omp atomic | ||||||
|  | 	    dbytes+=tbytes; | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  | 	Grid.Barrier(); | ||||||
|  | 	double stop=usecond(); | ||||||
|  | 	t_time[i] = stop-start; // microseconds | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       timestat.statistics(t_time); | ||||||
|  |  | ||||||
|  |       dbytes=dbytes*ppn; | ||||||
|  |       double xbytes    = dbytes*0.5; | ||||||
|  |       double rbytes    = dbytes*0.5; | ||||||
|  |       double bidibytes = dbytes; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||||
|  |                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||||
|  |                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||||
|  |                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||||
|  |                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||||
|  |                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||||
|  |   | ||||||
|  |     } | ||||||
|  |   }     | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; | ||||||
|  |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,28 +1,22 @@ | |||||||
|  /************************************************************************************* |  /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./benchmarks/Benchmark_dwf.cc |     Source file: ./benchmarks/Benchmark_dwf.cc | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
|     the Free Software Foundation; either version 2 of the License, or |     the Free Software Foundation; either version 2 of the License, or | ||||||
|     (at your option) any later version. |     (at your option) any later version. | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |     This program is distributed in the hope that it will be useful, | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|     GNU General Public License for more details. |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |     You should have received a copy of the GNU General Public License along | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| @@ -151,9 +145,7 @@ int main (int argc, char ** argv) | |||||||
|   RealD M5  =1.8; |   RealD M5  =1.8; | ||||||
|  |  | ||||||
|   RealD NP = UGrid->_Nprocessors; |   RealD NP = UGrid->_Nprocessors; | ||||||
|  |   RealD NN = UGrid->NodeCount(); | ||||||
|   std::cout << GridLogMessage << "Creating action operator " << std::endl; |  | ||||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; |   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; | ||||||
| @@ -163,16 +155,22 @@ int main (int argc, char ** argv) | |||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||||
|  |  | ||||||
|   int ncall =1000; |   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |   int ncall =500; | ||||||
|   if (1) { |   if (1) { | ||||||
|     FGrid->Barrier(); |     FGrid->Barrier(); | ||||||
|     Dw.ZeroCounters(); |     Dw.ZeroCounters(); | ||||||
|     Dw.Dhop(src,result,0); |     Dw.Dhop(src,result,0); | ||||||
|  |     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; | ||||||
|     double t0=usecond(); |     double t0=usecond(); | ||||||
|     for(int i=0;i<ncall;i++){ |     for(int i=0;i<ncall;i++){ | ||||||
|       __SSC_START; |       __SSC_START; | ||||||
| @@ -190,6 +188,7 @@ int main (int argc, char ** argv) | |||||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; |     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|     err = ref-result;  |     err = ref-result;  | ||||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
| @@ -206,6 +205,34 @@ int main (int argc, char ** argv) | |||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |   if (1) { | ||||||
|  |     FGrid->Barrier(); | ||||||
|  |     DwH.ZeroCounters(); | ||||||
|  |     DwH.Dhop(src,result,0); | ||||||
|  |     double t0=usecond(); | ||||||
|  |     for(int i=0;i<ncall;i++){ | ||||||
|  |       __SSC_START; | ||||||
|  |       DwH.Dhop(src,result,0); | ||||||
|  |       __SSC_STOP; | ||||||
|  |     } | ||||||
|  |     double t1=usecond(); | ||||||
|  |     FGrid->Barrier(); | ||||||
|  |      | ||||||
|  |     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||||
|  |     double flops=1344*volume*ncall; | ||||||
|  |  | ||||||
|  |     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|  |     err = ref-result;  | ||||||
|  |     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||||
|  |  | ||||||
|  |     assert (norm2(err)< 1.0e-3 ); | ||||||
|  |     DwH.Report(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { |   { | ||||||
|  |  | ||||||
| @@ -214,6 +241,10 @@ int main (int argc, char ** argv) | |||||||
|     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -245,6 +276,7 @@ int main (int argc, char ** argv) | |||||||
|     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; |     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; |     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; | ||||||
|     sDw.Report(); |     sDw.Report(); | ||||||
|     RealD sum=0; |     RealD sum=0; | ||||||
| @@ -271,12 +303,17 @@ int main (int argc, char ** argv) | |||||||
|     } |     } | ||||||
|     assert(sum < 1.0e-4); |     assert(sum < 1.0e-4); | ||||||
|  |  | ||||||
|  |      | ||||||
|     if(1){ |     if(1){ | ||||||
|       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|       std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl; |       std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl; | ||||||
|       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; |       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||||
|       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  | ||||||
| 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  |       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  | ||||||
| @@ -316,6 +353,7 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |       std::cout<<GridLogMessage << "sDeo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|       sDw.Report(); |       sDw.Report(); | ||||||
|  |  | ||||||
|       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); |       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||||
| @@ -344,7 +382,22 @@ int main (int argc, char ** argv) | |||||||
|       } |       } | ||||||
|       assert(error<1.0e-4); |       assert(error<1.0e-4); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |   if(0){ | ||||||
|  |     std::cout << "Single cache warm call to sDw.Dhop " <<std::endl; | ||||||
|  |     for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ | ||||||
|  |       sDw.Dhop(ssrc,sresult,0); | ||||||
|  |       PerformanceCounter Counter(i); | ||||||
|  |       Counter.Start(); | ||||||
|  |       sDw.Dhop(ssrc,sresult,0); | ||||||
|  |       Counter.Stop(); | ||||||
|  |       Counter.Report(); | ||||||
|     } |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   if (1) |   if (1) | ||||||
|   { // Naive wilson dag implementation |   { // Naive wilson dag implementation | ||||||
| @@ -394,14 +447,15 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|  |  | ||||||
|   // S-direction is INNERMOST and takes no part in the parity. |   // S-direction is INNERMOST and takes no part in the parity. | ||||||
|   static int Opt;  // these are a temporary hack |  | ||||||
|   static int Comms;  // these are a temporary hack |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; |   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; |   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; | ||||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; |   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; |   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; |   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||||
|  | #endif | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; |   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||||
| @@ -422,6 +476,7 @@ int main (int argc, char ** argv) | |||||||
|  |  | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||||
|     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; |     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; | ||||||
|     Dw.Report(); |     Dw.Report(); | ||||||
|   } |   } | ||||||
|   Dw.DhopEO(src_o,r_e,DaggerNo); |   Dw.DhopEO(src_o,r_e,DaggerNo); | ||||||
| @@ -448,8 +503,9 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; |   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; |   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; | ||||||
|  |  | ||||||
|   //assert(norm2(src_e)<1.0e-4); |   assert(norm2(src_e)<1.0e-4); | ||||||
|   //assert(norm2(src_o)<1.0e-4); |   assert(norm2(src_o)<1.0e-4); | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
|  |   exit(0); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -55,21 +55,21 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|   uint64_t lmax=44; |   uint64_t lmax=96; | ||||||
| #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | #define NLOOP (10*lmax*lmax*lmax*lmax/vol) | ||||||
|   for(int lat=4;lat<=lmax;lat+=4){ |   for(int lat=8;lat<=lmax;lat+=8){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid);// random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid);// random(pRNG,x); | ||||||
|       LatticeVec y(&Grid); //random(pRNG,y); |       LatticeVec y(&Grid);// random(pRNG,y); | ||||||
|       double a=2.0; |       double a=2.0; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -83,7 +83,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3*vol*Nvec*sizeof(Real); |       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -94,17 +94,17 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|    |    | ||||||
|   for(int lat=4;lat<=lmax;lat+=4){ |   for(int lat=8;lat<=lmax;lat+=8){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid);// random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid);// random(pRNG,x); | ||||||
|       LatticeVec y(&Grid); //random(pRNG,y); |       LatticeVec y(&Grid);// random(pRNG,y); | ||||||
|       double a=2.0; |       double a=2.0; | ||||||
|  |  | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
| @@ -119,7 +119,7 @@ int main (int argc, char ** argv) | |||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|       |       | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       double bytes=3*vol*Nvec*sizeof(Real); |       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
| @@ -129,20 +129,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=lmax;lat+=4){ |   for(int lat=8;lat<=lmax;lat+=8){ | ||||||
|  |  | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |  | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid);// random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid);// random(pRNG,x); | ||||||
|       LatticeVec y(&Grid); //random(pRNG,y); |       LatticeVec y(&Grid);// random(pRNG,y); | ||||||
|       RealD a=2.0; |       RealD a=2.0; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -154,7 +154,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=2*vol*Nvec*sizeof(Real); |       double bytes=2.0*vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*1;// mul |       double flops=vol*Nvec*1;// mul | ||||||
|       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; |       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||||
|  |  | ||||||
| @@ -166,17 +166,17 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=4;lat<=lmax;lat+=4){ |   for(int lat=8;lat<=lmax;lat+=8){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       uint64_t Nloop=NLOOP; |       uint64_t Nloop=NLOOP; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|       LatticeVec z(&Grid); //random(pRNG,z); |       LatticeVec z(&Grid);// random(pRNG,z); | ||||||
|       LatticeVec x(&Grid); //random(pRNG,x); |       LatticeVec x(&Grid);// random(pRNG,x); | ||||||
|       LatticeVec y(&Grid); //random(pRNG,y); |       LatticeVec y(&Grid);// random(pRNG,y); | ||||||
|       RealD a=2.0; |       RealD a=2.0; | ||||||
|       Real nn;       |       Real nn;       | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
| @@ -187,7 +187,7 @@ int main (int argc, char ** argv) | |||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|       double time = (stop-start)/Nloop*1000; |       double time = (stop-start)/Nloop*1000; | ||||||
|        |        | ||||||
|       double bytes=vol*Nvec*sizeof(Real); |       double bytes=1.0*vol*Nvec*sizeof(Real); | ||||||
|       double flops=vol*Nvec*2;// mul,add |       double flops=vol*Nvec*2;// mul,add | ||||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; |       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -35,13 +35,14 @@ using namespace Grid::QCD; | |||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
|   Grid_init(&argc,&argv); |   Grid_init(&argc,&argv); | ||||||
|  | #define LMAX (64) | ||||||
|  |  | ||||||
|   int Nloop=1000; |   int64_t Nloop=20; | ||||||
|  |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |  | ||||||
|   int threads = GridThread::GetThreads(); |   int64_t threads = GridThread::GetThreads(); | ||||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
| @@ -50,19 +51,19 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid);// random(pRNG,z); |       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid);// random(pRNG,x); |       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid);// random(pRNG,y); |       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int64_t i=0;i<Nloop;i++){ | ||||||
| 	x=x*y; | 	x=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -82,20 +83,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); |       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); |       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); |       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int64_t i=0;i<Nloop;i++){ | ||||||
| 	z=x*y; | 	z=x*y; | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -113,20 +114,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); |       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); |       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); |       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int64_t i=0;i<Nloop;i++){ | ||||||
| 	mult(z,x,y); | 	mult(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
| @@ -144,20 +145,20 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=2;lat<=32;lat+=2){ |   for(int lat=2;lat<=LMAX;lat+=2){ | ||||||
|  |  | ||||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); |       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; |       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||||
|  |  | ||||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); |       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); |       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||||
|  |  | ||||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); |       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); |       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); |       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||||
|  |  | ||||||
|       double start=usecond(); |       double start=usecond(); | ||||||
|       for(int i=0;i<Nloop;i++){ |       for(int64_t i=0;i<Nloop;i++){ | ||||||
| 	mac(z,x,y); | 	mac(z,x,y); | ||||||
|       } |       } | ||||||
|       double stop=usecond(); |       double stop=usecond(); | ||||||
|   | |||||||
| @@ -1,11 +1,7 @@ | |||||||
| include Make.inc | include Make.inc | ||||||
|  |  | ||||||
| simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o | bench-local: all | ||||||
|  | 	./Benchmark_su3 | ||||||
| EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a | 	./Benchmark_memory_bandwidth | ||||||
|  | 	./Benchmark_wilson | ||||||
| libsimple_su3_test_a_SOURCES = simple_su3_test.cc | 	./Benchmark_dwf --dslash-unroll | ||||||
|  |  | ||||||
| libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc |  | ||||||
|  |  | ||||||
| libsimple_simd_test_a_SOURCES = simple_simd_test.cc |  | ||||||
| @@ -1,6 +1,6 @@ | |||||||
| #!/usr/bin/env bash | #!/usr/bin/env bash | ||||||
|  |  | ||||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' | EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | ||||||
|  |  | ||||||
| echo "-- deploying Eigen source..." | echo "-- deploying Eigen source..." | ||||||
| wget ${EIGEN_URL} --no-check-certificate | wget ${EIGEN_URL} --no-check-certificate | ||||||
|   | |||||||
							
								
								
									
										149
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										149
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -1,16 +1,23 @@ | |||||||
| AC_PREREQ([2.63]) | AC_PREREQ([2.63]) | ||||||
| AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) | AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) | ||||||
| AC_CANONICAL_BUILD | AC_CANONICAL_BUILD | ||||||
| AC_CANONICAL_HOST | AC_CANONICAL_HOST | ||||||
| AC_CANONICAL_TARGET | AC_CANONICAL_TARGET | ||||||
| AM_INIT_AUTOMAKE(subdir-objects) | AM_INIT_AUTOMAKE([subdir-objects 1.13]) | ||||||
|  | AM_EXTRA_RECURSIVE_TARGETS([tests bench]) | ||||||
| AC_CONFIG_MACRO_DIR([m4]) | AC_CONFIG_MACRO_DIR([m4]) | ||||||
| AC_CONFIG_SRCDIR([lib/Grid.h]) | AC_CONFIG_SRCDIR([lib/Grid.h]) | ||||||
| AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | ||||||
| m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||||
|  |  | ||||||
| ############### Checks for programs | ################ Get git info | ||||||
|  | #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) | ||||||
|  |  | ||||||
|  | ################ Set flags | ||||||
|  | # do not move! | ||||||
| CXXFLAGS="-O3 $CXXFLAGS" | CXXFLAGS="-O3 $CXXFLAGS" | ||||||
|  |  | ||||||
|  | ############### Checks for programs | ||||||
| AC_PROG_CXX | AC_PROG_CXX | ||||||
| AC_PROG_RANLIB | AC_PROG_RANLIB | ||||||
|  |  | ||||||
| @@ -24,6 +31,8 @@ AX_GXX_VERSION | |||||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||||
|       [version of g++ that will compile the code]) |       [version of g++ that will compile the code]) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ############### Checks for typedefs, structures, and compiler characteristics | ############### Checks for typedefs, structures, and compiler characteristics | ||||||
| AC_TYPE_SIZE_T | AC_TYPE_SIZE_T | ||||||
| AC_TYPE_UINT32_T | AC_TYPE_UINT32_T | ||||||
| @@ -45,9 +54,14 @@ AC_CHECK_HEADERS(malloc/malloc.h) | |||||||
| AC_CHECK_HEADERS(malloc.h) | AC_CHECK_HEADERS(malloc.h) | ||||||
| AC_CHECK_HEADERS(endian.h) | AC_CHECK_HEADERS(endian.h) | ||||||
| AC_CHECK_HEADERS(execinfo.h) | AC_CHECK_HEADERS(execinfo.h) | ||||||
|  | AC_CHECK_HEADERS(numaif.h) | ||||||
| AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | ||||||
| AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | ||||||
|  |  | ||||||
|  | ############## Standard libraries | ||||||
|  | AC_CHECK_LIB([m],[cos]) | ||||||
|  | AC_CHECK_LIB([stdc++],[abort]) | ||||||
|  |  | ||||||
| ############### GMP and MPFR | ############### GMP and MPFR | ||||||
| AC_ARG_WITH([gmp], | AC_ARG_WITH([gmp], | ||||||
|     [AS_HELP_STRING([--with-gmp=prefix], |     [AS_HELP_STRING([--with-gmp=prefix], | ||||||
| @@ -67,6 +81,13 @@ AC_ARG_WITH([fftw], | |||||||
|             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] |             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] | ||||||
|             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) |             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
|  | ############### LIME | ||||||
|  | AC_ARG_WITH([lime], | ||||||
|  |             [AS_HELP_STRING([--with-lime=prefix], | ||||||
|  |             [try this for a non-standard install prefix of the LIME library])], | ||||||
|  |             [AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"] | ||||||
|  |             [AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"]) | ||||||
|  |  | ||||||
| ############### lapack | ############### lapack | ||||||
| AC_ARG_ENABLE([lapack], | AC_ARG_ENABLE([lapack], | ||||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], |     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], | ||||||
| @@ -83,6 +104,18 @@ case ${ac_LAPACK} in | |||||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; |         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; | ||||||
| esac | esac | ||||||
|  |  | ||||||
|  | ############### FP16 conversions | ||||||
|  | AC_ARG_ENABLE([sfw-fp16], | ||||||
|  |     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], | ||||||
|  |     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) | ||||||
|  | case ${ac_SFW_FP16} in | ||||||
|  |     yes) | ||||||
|  |       AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);; | ||||||
|  |     no);; | ||||||
|  |     *) | ||||||
|  |       AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);; | ||||||
|  | esac | ||||||
|  |  | ||||||
| ############### MKL | ############### MKL | ||||||
| AC_ARG_ENABLE([mkl], | AC_ARG_ENABLE([mkl], | ||||||
|     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], |     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], | ||||||
| @@ -152,6 +185,23 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3], | |||||||
|                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] |                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] | ||||||
|                [have_fftw=true]) |                [have_fftw=true]) | ||||||
|  |  | ||||||
|  | AC_SEARCH_LIBS([limeCreateReader], [lime], | ||||||
|  |                [AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])] | ||||||
|  |                [have_lime=true], | ||||||
|  | 	       [AC_MSG_WARN(C-LIME library was not found in your system. | ||||||
|  | In order to use ILGG file format please install or provide the correct path to your installation | ||||||
|  | Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) | ||||||
|  |  | ||||||
|  | AC_SEARCH_LIBS([crc32], [z], | ||||||
|  |                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] | ||||||
|  |                [have_zlib=true] [LIBS="${LIBS} -lz"], | ||||||
|  | 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) | ||||||
|  |  | ||||||
|  | AC_SEARCH_LIBS([move_pages], [numa], | ||||||
|  |                [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] | ||||||
|  |                [have_libnuma=true] [LIBS="${LIBS} -lnuma"], | ||||||
|  | 	       [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) | ||||||
|  |  | ||||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] |                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||||
|                [have_hdf5=true] |                [have_hdf5=true] | ||||||
| @@ -176,19 +226,26 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|     case ${ac_SIMD} in |     case ${ac_SIMD} in | ||||||
|       SSE4) |       SSE4) | ||||||
|         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) |         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) | ||||||
|  | 	case ${ac_SFW_FP16} in | ||||||
|  | 	  yes) | ||||||
| 	  SIMD_FLAGS='-msse4.2';; | 	  SIMD_FLAGS='-msse4.2';; | ||||||
|  | 	  no) | ||||||
|  | 	  SIMD_FLAGS='-msse4.2 -mf16c';; | ||||||
|  | 	  *) | ||||||
|  |           AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);; | ||||||
|  | 	esac;; | ||||||
|       AVX) |       AVX) | ||||||
|         AC_DEFINE([AVX1],[1],[AVX intrinsics]) |         AC_DEFINE([AVX1],[1],[AVX intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx';; |         SIMD_FLAGS='-mavx -mf16c';; | ||||||
|       AVXFMA4) |       AVXFMA4) | ||||||
|         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) |         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) | ||||||
|         SIMD_FLAGS='-mavx -mfma4';; |         SIMD_FLAGS='-mavx -mfma4 -mf16c';; | ||||||
|       AVXFMA) |       AVXFMA) | ||||||
|         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) |         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) | ||||||
|         SIMD_FLAGS='-mavx -mfma';; |         SIMD_FLAGS='-mavx -mfma -mf16c';; | ||||||
|       AVX2) |       AVX2) | ||||||
|         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) |         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx2 -mfma';; |         SIMD_FLAGS='-mavx2 -mfma -mf16c';; | ||||||
|       AVX512) |       AVX512) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; |         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; | ||||||
| @@ -197,6 +254,7 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|  |         AC_DEFINE([KNL],[1],[Knights landing processor]) | ||||||
|         SIMD_FLAGS='-march=knl';; |         SIMD_FLAGS='-march=knl';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -204,6 +262,9 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|                            [generic SIMD vector width (in bytes)]) |                            [generic SIMD vector width (in bytes)]) | ||||||
|         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" |         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|  |       NEONv8) | ||||||
|  |         AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) | ||||||
|  |         SIMD_FLAGS='-march=armv8-a';; | ||||||
|       QPX|BGQ) |       QPX|BGQ) | ||||||
|         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) |         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) | ||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
| @@ -232,6 +293,7 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='';; |         SIMD_FLAGS='';; | ||||||
|       KNL) |       KNL) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) | ||||||
|  |         AC_DEFINE([KNL],[1],[Knights landing processor]) | ||||||
|         SIMD_FLAGS='-xmic-avx512';; |         SIMD_FLAGS='-xmic-avx512';; | ||||||
|       GEN) |       GEN) | ||||||
|         AC_DEFINE([GEN],[1],[generic vector code]) |         AC_DEFINE([GEN],[1],[generic vector code]) | ||||||
| @@ -280,14 +342,14 @@ case ${ac_COMMS} in | |||||||
|         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) |         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) | ||||||
|         comms_type='none' |         comms_type='none' | ||||||
|      ;; |      ;; | ||||||
|      mpi3l*) |  | ||||||
|        AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] ) |  | ||||||
|        comms_type='mpi3l' |  | ||||||
|      ;; |  | ||||||
|      mpi3*) |      mpi3*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) |         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) | ||||||
|         comms_type='mpi3' |         comms_type='mpi3' | ||||||
|      ;; |      ;; | ||||||
|  |      mpit) | ||||||
|  |         AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) | ||||||
|  |         comms_type='mpit' | ||||||
|  |      ;; | ||||||
|      mpi*) |      mpi*) | ||||||
|         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) |         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||||
|         comms_type='mpi' |         comms_type='mpi' | ||||||
| @@ -315,7 +377,7 @@ esac | |||||||
| AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] ) | AM_CONDITIONAL(BUILD_COMMS_MPIT,  [ test "${comms_type}X" == "mpitX" ] ) | ||||||
| AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | ||||||
|  |  | ||||||
| ############### RNG selection | ############### RNG selection | ||||||
| @@ -384,33 +446,31 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg]) | |||||||
|  |  | ||||||
| ############### Ouput | ############### Ouput | ||||||
| cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | ||||||
|  | GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" | ||||||
|  | GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS" | ||||||
|  | GRID_LIBS=$LIBS | ||||||
|  | GRID_SHORT_SHA=`git rev-parse --short HEAD` | ||||||
|  | GRID_SHA=`git rev-parse HEAD` | ||||||
|  | GRID_BRANCH=`git rev-parse --abbrev-ref HEAD` | ||||||
| AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | ||||||
| AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | ||||||
| AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | ||||||
| AC_SUBST([AM_CFLAGS]) | AC_SUBST([AM_CFLAGS]) | ||||||
| AC_SUBST([AM_CXXFLAGS]) | AC_SUBST([AM_CXXFLAGS]) | ||||||
| AC_SUBST([AM_LDFLAGS]) | AC_SUBST([AM_LDFLAGS]) | ||||||
| AC_CONFIG_FILES(Makefile) | AC_SUBST([GRID_CXXFLAGS]) | ||||||
| AC_CONFIG_FILES(lib/Makefile) | AC_SUBST([GRID_LDFLAGS]) | ||||||
| AC_CONFIG_FILES(tests/Makefile) | AC_SUBST([GRID_LIBS]) | ||||||
| AC_CONFIG_FILES(tests/IO/Makefile) | AC_SUBST([GRID_SHA]) | ||||||
| AC_CONFIG_FILES(tests/core/Makefile) | AC_SUBST([GRID_BRANCH]) | ||||||
| AC_CONFIG_FILES(tests/debug/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/forces/Makefile) | git_commit=`cd $srcdir && ./scripts/configure.commit` | ||||||
| AC_CONFIG_FILES(tests/hadrons/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/hmc/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/solver/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) |  | ||||||
| AC_CONFIG_FILES(tests/testu01/Makefile) |  | ||||||
| AC_CONFIG_FILES(benchmarks/Makefile) |  | ||||||
| AC_CONFIG_FILES(extras/Makefile) |  | ||||||
| AC_CONFIG_FILES(extras/Hadrons/Makefile) |  | ||||||
| AC_OUTPUT |  | ||||||
|  |  | ||||||
| echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
| Summary of configuration for $PACKAGE v$VERSION | Summary of configuration for $PACKAGE v$VERSION | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  | ----- GIT VERSION ------------------------------------- | ||||||
|  | $git_commit | ||||||
| ----- PLATFORM ---------------------------------------- | ----- PLATFORM ---------------------------------------- | ||||||
| architecture (build)        : $build_cpu | architecture (build)        : $build_cpu | ||||||
| os (build)                  : $build_os | os (build)                  : $build_os | ||||||
| @@ -423,10 +483,12 @@ SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | |||||||
| Threading                   : ${ac_openmp} | Threading                   : ${ac_openmp} | ||||||
| Communications type         : ${comms_type} | Communications type         : ${comms_type} | ||||||
| Default precision           : ${ac_PRECISION} | Default precision           : ${ac_PRECISION} | ||||||
|  | Software FP16 conversion    : ${ac_SFW_FP16} | ||||||
| RNG choice                  : ${ac_RNG} | RNG choice                  : ${ac_RNG} | ||||||
| GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||||
| LAPACK                      : ${ac_LAPACK} | LAPACK                      : ${ac_LAPACK} | ||||||
| FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||||
|  | LIME (ILDG support)         : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi` | ||||||
| HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | ||||||
| build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | ||||||
| ----- BUILD FLAGS ------------------------------------- | ----- BUILD FLAGS ------------------------------------- | ||||||
| @@ -436,7 +498,32 @@ LDFLAGS: | |||||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
| LIBS: | LIBS: | ||||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||||
| -------------------------------------------------------" > config.summary | -------------------------------------------------------" > grid.configure.summary | ||||||
|  |  | ||||||
|  | GRID_SUMMARY="`cat grid.configure.summary`" | ||||||
|  | AM_SUBST_NOTMAKE([GRID_SUMMARY]) | ||||||
|  | AC_SUBST([GRID_SUMMARY]) | ||||||
|  |  | ||||||
|  | AC_CONFIG_FILES([grid-config], [chmod +x grid-config]) | ||||||
|  | AC_CONFIG_FILES(Makefile) | ||||||
|  | AC_CONFIG_FILES(lib/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/IO/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/core/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/debug/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/forces/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/hadrons/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/hmc/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/solver/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/smearing/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||||
|  | AC_CONFIG_FILES(tests/testu01/Makefile) | ||||||
|  | AC_CONFIG_FILES(benchmarks/Makefile) | ||||||
|  | AC_CONFIG_FILES(extras/Makefile) | ||||||
|  | AC_CONFIG_FILES(extras/Hadrons/Makefile) | ||||||
|  | AC_OUTPUT | ||||||
|  |  | ||||||
| echo "" | echo "" | ||||||
| cat config.summary | cat grid.configure.summary | ||||||
| echo "" | echo "" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -162,7 +162,8 @@ void Application::saveParameterFile(const std::string parameterFileName) | |||||||
| sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | ||||||
|  |  | ||||||
| #define DEFINE_MEMPEAK \ | #define DEFINE_MEMPEAK \ | ||||||
| auto memPeak = [this](const std::vector<unsigned int> &program)\ | GeneticScheduler<unsigned int>::ObjFunc memPeak = \ | ||||||
|  | [this](const std::vector<unsigned int> &program)\ | ||||||
| {\ | {\ | ||||||
|     unsigned int memPeak;\ |     unsigned int memPeak;\ | ||||||
|     bool         msg;\ |     bool         msg;\ | ||||||
|   | |||||||
| @@ -41,9 +41,10 @@ using namespace Hadrons; | |||||||
| // constructor ///////////////////////////////////////////////////////////////// | // constructor ///////////////////////////////////////////////////////////////// | ||||||
| Environment::Environment(void) | Environment::Environment(void) | ||||||
| { | { | ||||||
|     nd_ = GridDefaultLatt().size(); |     dim_ = GridDefaultLatt(); | ||||||
|  |     nd_  = dim_.size(); | ||||||
|     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( |     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( | ||||||
|         GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), |         dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||||
|         GridDefaultMpi())); |         GridDefaultMpi())); | ||||||
|     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); |     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); | ||||||
|     auto loc = getGrid()->LocalDimensions(); |     auto loc = getGrid()->LocalDimensions(); | ||||||
| @@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const | |||||||
|     return nd_; |     return nd_; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::vector<int> Environment::getDim(void) const | ||||||
|  | { | ||||||
|  |     return dim_; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | int Environment::getDim(const unsigned int mu) const | ||||||
|  | { | ||||||
|  |     return dim_[mu]; | ||||||
|  | } | ||||||
|  |  | ||||||
| // random number generator ///////////////////////////////////////////////////// | // random number generator ///////////////////////////////////////////////////// | ||||||
| void Environment::setSeed(const std::vector<int> &seed) | void Environment::setSeed(const std::vector<int> &seed) | ||||||
| { | { | ||||||
| @@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const | |||||||
|     return getModuleType(getModuleAddress(name)); |     return getModuleType(getModuleAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::string Environment::getModuleNamespace(const unsigned int address) const | ||||||
|  | { | ||||||
|  |     std::string type = getModuleType(address), ns; | ||||||
|  |      | ||||||
|  |     auto pos2 = type.rfind("::"); | ||||||
|  |     auto pos1 = type.rfind("::", pos2 - 2); | ||||||
|  |      | ||||||
|  |     return type.substr(pos1 + 2, pos2 - pos1 - 2); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::string Environment::getModuleNamespace(const std::string name) const | ||||||
|  | { | ||||||
|  |     return getModuleNamespace(getModuleAddress(name)); | ||||||
|  | } | ||||||
|  |  | ||||||
| bool Environment::hasModule(const unsigned int address) const | bool Environment::hasModule(const unsigned int address) const | ||||||
| { | { | ||||||
|     return (address < module_.size()); |     return (address < module_.size()); | ||||||
| @@ -491,9 +517,16 @@ std::string Environment::getObjectName(const unsigned int address) const | |||||||
| std::string Environment::getObjectType(const unsigned int address) const | std::string Environment::getObjectType(const unsigned int address) const | ||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|  |     { | ||||||
|  |         if (object_[address].type) | ||||||
|         { |         { | ||||||
|             return typeName(object_[address].type); |             return typeName(object_[address].type); | ||||||
|         } |         } | ||||||
|  |         else | ||||||
|  |         { | ||||||
|  |             return "<no type>"; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|     else if (hasObject(address)) |     else if (hasObject(address)) | ||||||
|     { |     { | ||||||
|         HADRON_ERROR("object with address " + std::to_string(address) |         HADRON_ERROR("object with address " + std::to_string(address) | ||||||
| @@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const | |||||||
|     return getObjectSize(getObjectAddress(name)); |     return getObjectSize(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | unsigned int Environment::getObjectModule(const unsigned int address) const | ||||||
|  | { | ||||||
|  |     if (hasObject(address)) | ||||||
|  |     { | ||||||
|  |         return object_[address].module; | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         HADRON_ERROR("no object with address " + std::to_string(address)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | unsigned int Environment::getObjectModule(const std::string name) const | ||||||
|  | { | ||||||
|  |     return getObjectModule(getObjectAddress(name)); | ||||||
|  | } | ||||||
|  |  | ||||||
| unsigned int Environment::getObjectLs(const unsigned int address) const | unsigned int Environment::getObjectLs(const unsigned int address) const | ||||||
| { | { | ||||||
|     if (hasRegisteredObject(address)) |     if (hasRegisteredObject(address)) | ||||||
|   | |||||||
| @@ -106,6 +106,8 @@ public: | |||||||
|     void                    createGrid(const unsigned int Ls); |     void                    createGrid(const unsigned int Ls); | ||||||
|     GridCartesian *         getGrid(const unsigned int Ls = 1) const; |     GridCartesian *         getGrid(const unsigned int Ls = 1) const; | ||||||
|     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; |     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; | ||||||
|  |     std::vector<int>        getDim(void) const; | ||||||
|  |     int                     getDim(const unsigned int mu) const; | ||||||
|     unsigned int            getNd(void) const; |     unsigned int            getNd(void) const; | ||||||
|     // random number generator |     // random number generator | ||||||
|     void                    setSeed(const std::vector<int> &seed); |     void                    setSeed(const std::vector<int> &seed); | ||||||
| @@ -131,6 +133,8 @@ public: | |||||||
|     std::string             getModuleName(const unsigned int address) const; |     std::string             getModuleName(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const unsigned int address) const; |     std::string             getModuleType(const unsigned int address) const; | ||||||
|     std::string             getModuleType(const std::string name) const; |     std::string             getModuleType(const std::string name) const; | ||||||
|  |     std::string             getModuleNamespace(const unsigned int address) const; | ||||||
|  |     std::string             getModuleNamespace(const std::string name) const; | ||||||
|     bool                    hasModule(const unsigned int address) const; |     bool                    hasModule(const unsigned int address) const; | ||||||
|     bool                    hasModule(const std::string name) const; |     bool                    hasModule(const std::string name) const; | ||||||
|     Graph<unsigned int>     makeModuleGraph(void) const; |     Graph<unsigned int>     makeModuleGraph(void) const; | ||||||
| @@ -171,6 +175,8 @@ public: | |||||||
|     std::string             getObjectType(const std::string name) const; |     std::string             getObjectType(const std::string name) const; | ||||||
|     Size                    getObjectSize(const unsigned int address) const; |     Size                    getObjectSize(const unsigned int address) const; | ||||||
|     Size                    getObjectSize(const std::string name) const; |     Size                    getObjectSize(const std::string name) const; | ||||||
|  |     unsigned int            getObjectModule(const unsigned int address) const; | ||||||
|  |     unsigned int            getObjectModule(const std::string name) const; | ||||||
|     unsigned int            getObjectLs(const unsigned int address) const; |     unsigned int            getObjectLs(const unsigned int address) const; | ||||||
|     unsigned int            getObjectLs(const std::string name) const; |     unsigned int            getObjectLs(const std::string name) const; | ||||||
|     bool                    hasObject(const unsigned int address) const; |     bool                    hasObject(const unsigned int address) const; | ||||||
| @@ -181,6 +187,10 @@ public: | |||||||
|     bool                    hasCreatedObject(const std::string name) const; |     bool                    hasCreatedObject(const std::string name) const; | ||||||
|     bool                    isObject5d(const unsigned int address) const; |     bool                    isObject5d(const unsigned int address) const; | ||||||
|     bool                    isObject5d(const std::string name) const; |     bool                    isObject5d(const std::string name) const; | ||||||
|  |     template <typename T> | ||||||
|  |     bool                    isObjectOfType(const unsigned int address) const; | ||||||
|  |     template <typename T> | ||||||
|  |     bool                    isObjectOfType(const std::string name) const; | ||||||
|     Environment::Size       getTotalSize(void) const; |     Environment::Size       getTotalSize(void) const; | ||||||
|     void                    addOwnership(const unsigned int owner, |     void                    addOwnership(const unsigned int owner, | ||||||
|                                          const unsigned int property); |                                          const unsigned int property); | ||||||
| @@ -197,6 +207,7 @@ private: | |||||||
|     bool                                   dryRun_{false}; |     bool                                   dryRun_{false}; | ||||||
|     unsigned int                           traj_, locVol_; |     unsigned int                           traj_, locVol_; | ||||||
|     // grids |     // grids | ||||||
|  |     std::vector<int>                       dim_; | ||||||
|     GridPt                                 grid4d_; |     GridPt                                 grid4d_; | ||||||
|     std::map<unsigned int, GridPt>         grid5d_; |     std::map<unsigned int, GridPt>         grid5d_; | ||||||
|     GridRbPt                               gridRb4d_; |     GridRbPt                               gridRb4d_; | ||||||
| @@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const | |||||||
|         else |         else | ||||||
|         { |         { | ||||||
|             HADRON_ERROR("object with address " + std::to_string(address) + |             HADRON_ERROR("object with address " + std::to_string(address) + | ||||||
|                          " does not have type '" + typeid(T).name() + |                          " does not have type '" + typeName(&typeid(T)) + | ||||||
|                          "' (has type '" + getObjectType(address) + "')"); |                          "' (has type '" + getObjectType(address) + "')"); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name) | |||||||
|     return createLattice<T>(getObjectAddress(name)); |     return createLattice<T>(getObjectAddress(name)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template <typename T> | ||||||
|  | bool Environment::isObjectOfType(const unsigned int address) const | ||||||
|  | { | ||||||
|  |     if (hasRegisteredObject(address)) | ||||||
|  |     { | ||||||
|  |         if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get())) | ||||||
|  |         { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         else | ||||||
|  |         { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else if (hasObject(address)) | ||||||
|  |     { | ||||||
|  |         HADRON_ERROR("object with address " + std::to_string(address) + | ||||||
|  |                      " exists but is not registered"); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         HADRON_ERROR("no object with address " + std::to_string(address)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename T> | ||||||
|  | bool Environment::isObjectOfType(const std::string name) const | ||||||
|  | { | ||||||
|  |     return isObjectOfType<T>(getObjectAddress(name)); | ||||||
|  | } | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Environment_hpp_ | #endif // Hadrons_Environment_hpp_ | ||||||
|   | |||||||
| @@ -51,23 +51,43 @@ using Grid::operator<<; | |||||||
|  * error with GCC 5 (clang & GCC 6 compile fine without it). |  * error with GCC 5 (clang & GCC 6 compile fine without it). | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| // FIXME: find a way to do that in a more general fashion |  | ||||||
| #ifndef FIMPL | #ifndef FIMPL | ||||||
| #define FIMPL WilsonImplR | #define FIMPL WilsonImplR | ||||||
| #endif | #endif | ||||||
|  | #ifndef SIMPL | ||||||
|  | #define SIMPL ScalarImplCR | ||||||
|  | #endif | ||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| // type aliases | // type aliases | ||||||
| #define TYPE_ALIASES(FImpl, suffix)\ | #define FERM_TYPE_ALIASES(FImpl, suffix)\ | ||||||
| typedef FermionOperator<FImpl>                       FMat##suffix;             \ | typedef FermionOperator<FImpl>                       FMat##suffix;             \ | ||||||
| typedef typename FImpl::FermionField                 FermionField##suffix;     \ | typedef typename FImpl::FermionField                 FermionField##suffix;     \ | ||||||
| typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | ||||||
| typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | ||||||
| typedef typename FImpl::DoubledGaugeField            DoubledGaugeField##suffix;\ | typedef std::vector<typename FImpl::SitePropagator::scalar_object>             \ | ||||||
| typedef std::function<void(FermionField##suffix &,                             \ |                                                      SlicedPropagator##suffix; | ||||||
|  |  | ||||||
|  | #define GAUGE_TYPE_ALIASES(FImpl, suffix)\ | ||||||
|  | typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; | ||||||
|  |  | ||||||
|  | #define SCALAR_TYPE_ALIASES(SImpl, suffix)\ | ||||||
|  | typedef typename SImpl::Field ScalarField##suffix;\ | ||||||
|  | typedef typename SImpl::Field PropagatorField##suffix; | ||||||
|  |  | ||||||
|  | #define SOLVER_TYPE_ALIASES(FImpl, suffix)\ | ||||||
|  | typedef std::function<void(FermionField##suffix &,\ | ||||||
|                       const FermionField##suffix &)> SolverFn##suffix; |                       const FermionField##suffix &)> SolverFn##suffix; | ||||||
|  |  | ||||||
|  | #define SINK_TYPE_ALIASES(suffix)\ | ||||||
|  | typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix; | ||||||
|  |  | ||||||
|  | #define FGS_TYPE_ALIASES(FImpl, suffix)\ | ||||||
|  | FERM_TYPE_ALIASES(FImpl, suffix)\ | ||||||
|  | GAUGE_TYPE_ALIASES(FImpl, suffix)\ | ||||||
|  | SOLVER_TYPE_ALIASES(FImpl, suffix) | ||||||
|  |  | ||||||
| // logger | // logger | ||||||
| class HadronsLogger: public Logger | class HadronsLogger: public Logger | ||||||
| { | { | ||||||
| @@ -145,6 +165,15 @@ std::string typeName(void) | |||||||
|     return typeName(typeIdPt<T>()); |     return typeName(typeIdPt<T>()); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // default writers/readers | ||||||
|  | #ifdef HAVE_HDF5 | ||||||
|  | typedef Hdf5Reader CorrReader; | ||||||
|  | typedef Hdf5Writer CorrWriter; | ||||||
|  | #else | ||||||
|  | typedef XmlReader CorrReader; | ||||||
|  | typedef XmlWriter CorrWriter; | ||||||
|  | #endif | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Global_hpp_ | #endif // Hadrons_Global_hpp_ | ||||||
|   | |||||||
| @@ -1,40 +1,25 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
| Source file: extras/Hadrons/Modules.hpp |  | ||||||
|  |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MSink/Point.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Point.hpp> | #include <Grid/Hadrons/Modules/MSource/Point.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MSource/Wall.hpp> | ||||||
| #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | ||||||
| #include <Grid/Hadrons/Modules/Quark.hpp> |  | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_DWF_hpp_ | #ifndef Hadrons_MAction_DWF_hpp_ | ||||||
| #define Hadrons_DWF_hpp_ | #define Hadrons_MAction_DWF_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -48,14 +48,15 @@ public: | |||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     unsigned int, Ls, |                                     unsigned int, Ls, | ||||||
|                                     double      , mass, |                                     double      , mass, | ||||||
|                                     double      , M5); |                                     double      , M5, | ||||||
|  |                                     std::string , boundary); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TDWF: public Module<DWFPar> | class TDWF: public Module<DWFPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl,); |     FGS_TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TDWF(const std::string name); |     TDWF(const std::string name); | ||||||
| @@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void) | |||||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " |                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" |                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|  |     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||||
|  |                  << std::endl; | ||||||
|     env().createGrid(par().Ls); |     env().createGrid(par().Ls); | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &g4     = *env().getGrid(); |     auto &g4     = *env().getGrid(); | ||||||
|     auto &grb4   = *env().getRbGrid(); |     auto &grb4   = *env().getRbGrid(); | ||||||
|     auto &g5     = *env().getGrid(par().Ls); |     auto &g5     = *env().getGrid(par().Ls); | ||||||
|     auto &grb5   = *env().getRbGrid(par().Ls); |     auto &grb5   = *env().getRbGrid(par().Ls); | ||||||
|  |     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||||
|  |     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); | ||||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, |     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||||
|                                                 par().mass, par().M5); |                                                 par().mass, par().M5, | ||||||
|  |                                                 implParams); | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -131,4 +137,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_DWF_hpp_ | #endif // Hadrons_MAction_DWF_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Wilson_hpp_ | #ifndef Hadrons_MAction_Wilson_hpp_ | ||||||
| #define Hadrons_Wilson_hpp_ | #define Hadrons_MAction_Wilson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -46,14 +46,15 @@ class WilsonPar: Serializable | |||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||||
|                                     std::string, gauge, |                                     std::string, gauge, | ||||||
|                                     double     , mass); |                                     double     , mass, | ||||||
|  |                                     std::string, boundary); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TWilson: public Module<WilsonPar> | class TWilson: public Module<WilsonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl,); |     FGS_TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TWilson(const std::string name); |     TWilson(const std::string name); | ||||||
| @@ -112,10 +113,15 @@ void TWilson<FImpl>::execute() | |||||||
| { | { | ||||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass |     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; |                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||||
|  |     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||||
|  |                  << std::endl; | ||||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); |     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||||
|     auto &grid   = *env().getGrid(); |     auto &grid   = *env().getGrid(); | ||||||
|     auto &gridRb = *env().getRbGrid(); |     auto &gridRb = *env().getRbGrid(); | ||||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); |     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||||
|  |     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); | ||||||
|  |     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, | ||||||
|  |                                             implParams); | ||||||
|     env().setObject(getName(), fMatPt); |     env().setObject(getName(), fMatPt); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Baryon_hpp_ | #ifndef Hadrons_MContraction_Baryon_hpp_ | ||||||
| #define Hadrons_Baryon_hpp_ | #define Hadrons_MContraction_Baryon_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3> | |||||||
| class TBaryon: public Module<BaryonPar> | class TBaryon: public Module<BaryonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl1, 1); |     FERM_TYPE_ALIASES(FImpl1, 1); | ||||||
|     TYPE_ALIASES(FImpl2, 2); |     FERM_TYPE_ALIASES(FImpl2, 2); | ||||||
|     TYPE_ALIASES(FImpl3, 3); |     FERM_TYPE_ALIASES(FImpl3, 3); | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
| @@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" |                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" | ||||||
|                  << par().q3 << "'" << std::endl; |                  << par().q3 << "'" << std::endl; | ||||||
|      |      | ||||||
|     XmlWriter             writer(par().output); |     CorrWriter             writer(par().output); | ||||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); |     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); | ||||||
| @@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | |||||||
|      |      | ||||||
|     // FIXME: do contractions |     // FIXME: do contractions | ||||||
|      |      | ||||||
|     write(writer, "meson", result); |     // write(writer, "meson", result); | ||||||
| } | } | ||||||
|  |  | ||||||
| END_MODULE_NAMESPACE | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Baryon_hpp_ | #endif // Hadrons_MContraction_Baryon_hpp_ | ||||||
|   | |||||||
							
								
								
									
										144
									
								
								extras/Hadrons/Modules/MContraction/DiscLoop.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								extras/Hadrons/Modules/MContraction/DiscLoop.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MContraction_DiscLoop_hpp_ | ||||||
|  | #define Hadrons_MContraction_DiscLoop_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                                DiscLoop                                    * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | class DiscLoopPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar, | ||||||
|  |                                     std::string,    q_loop, | ||||||
|  |                                     Gamma::Algebra, gamma, | ||||||
|  |                                     std::string,    output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | class TDiscLoop: public Module<DiscLoopPar> | ||||||
|  | { | ||||||
|  |     FERM_TYPE_ALIASES(FImpl,); | ||||||
|  |     class Result: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||||
|  |                                         Gamma::Algebra, gamma, | ||||||
|  |                                         std::vector<Complex>, corr); | ||||||
|  |     }; | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TDiscLoop(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TDiscLoop(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                       TDiscLoop implementation                             * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | TDiscLoop<FImpl>::TDiscLoop(const std::string name) | ||||||
|  | : Module<DiscLoopPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TDiscLoop<FImpl>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q_loop}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TDiscLoop<FImpl>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TDiscLoop<FImpl>::setup(void) | ||||||
|  | { | ||||||
|  |      | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TDiscLoop<FImpl>::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing disconnected loop contraction '" << getName()  | ||||||
|  |                  << "' using '" << par().q_loop << "' with " << par().gamma  | ||||||
|  |                  << " insertion." << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter            writer(par().output); | ||||||
|  |     PropagatorField       &q_loop = *env().template getObject<PropagatorField>(par().q_loop); | ||||||
|  |     LatticeComplex        c(env().getGrid()); | ||||||
|  |     Gamma                 gamma(par().gamma); | ||||||
|  |     std::vector<TComplex> buf; | ||||||
|  |     Result                result; | ||||||
|  |  | ||||||
|  |     c = trace(gamma*q_loop); | ||||||
|  |     sliceSum(c, buf, Tp); | ||||||
|  |  | ||||||
|  |     result.gamma = par().gamma; | ||||||
|  |     result.corr.resize(buf.size()); | ||||||
|  |     for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|  |     { | ||||||
|  |         result.corr[t] = TensorRemove(buf[t]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     write(writer, "disc", result); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MContraction_DiscLoop_hpp_ | ||||||
							
								
								
									
										170
									
								
								extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MContraction_Gamma3pt_hpp_ | ||||||
|  | #define Hadrons_MContraction_Gamma3pt_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * 3pt contraction with gamma matrix insertion. | ||||||
|  |  * | ||||||
|  |  * Schematic: | ||||||
|  |  * | ||||||
|  |  *             q2           q3 | ||||||
|  |  *        /----<------*------<----¬ | ||||||
|  |  *       /          gamma          \ | ||||||
|  |  *      /                           \ | ||||||
|  |  *   i *                            * f | ||||||
|  |  *      \                          / | ||||||
|  |  *       \                        / | ||||||
|  |  *        \----------->----------/ | ||||||
|  |  *                   q1 | ||||||
|  |  * | ||||||
|  |  *      trace(g5*q1*adj(q2)*g5*gamma*q3) | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                               Gamma3pt                                     * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | class Gamma3ptPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar, | ||||||
|  |                                     std::string,    q1, | ||||||
|  |                                     std::string,    q2, | ||||||
|  |                                     std::string,    q3, | ||||||
|  |                                     Gamma::Algebra, gamma, | ||||||
|  |                                     std::string,    output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | class TGamma3pt: public Module<Gamma3ptPar> | ||||||
|  | { | ||||||
|  |     FERM_TYPE_ALIASES(FImpl1, 1); | ||||||
|  |     FERM_TYPE_ALIASES(FImpl2, 2); | ||||||
|  |     FERM_TYPE_ALIASES(FImpl3, 3); | ||||||
|  |     class Result: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||||
|  |                                         Gamma::Algebra, gamma, | ||||||
|  |                                         std::vector<Complex>, corr); | ||||||
|  |     }; | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TGamma3pt(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TGamma3pt(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                       TGamma3pt implementation                             * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name) | ||||||
|  | : Module<Gamma3ptPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void) | ||||||
|  | { | ||||||
|  |      | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||||
|  | void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing 3pt contractions '" << getName() << "' using" | ||||||
|  |                  << " quarks '" << par().q1 << "', '" << par().q2 << "' and '" | ||||||
|  |                  << par().q3 << "', with " << par().gamma << " insertion."  | ||||||
|  |                  << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter            writer(par().output); | ||||||
|  |     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|  |     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|  |     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q3); | ||||||
|  |     LatticeComplex        c(env().getGrid()); | ||||||
|  |     Gamma                 g5(Gamma::Algebra::Gamma5); | ||||||
|  |     Gamma                 gamma(par().gamma); | ||||||
|  |     std::vector<TComplex> buf; | ||||||
|  |     Result                result; | ||||||
|  |  | ||||||
|  |     c = trace(g5*q1*adj(q2)*(g5*gamma)*q3); | ||||||
|  |     sliceSum(c, buf, Tp); | ||||||
|  |  | ||||||
|  |     result.gamma = par().gamma; | ||||||
|  |     result.corr.resize(buf.size()); | ||||||
|  |     for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|  |     { | ||||||
|  |         result.corr[t] = TensorRemove(buf[t]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     write(writer, "gamma3pt", result); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MContraction_Gamma3pt_hpp_ | ||||||
| @@ -6,8 +6,10 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |         Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -27,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Meson_hpp_ | #ifndef Hadrons_MContraction_Meson_hpp_ | ||||||
| #define Hadrons_Meson_hpp_ | #define Hadrons_MContraction_Meson_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -36,32 +38,56 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   | ||||||
|  |  Meson contractions | ||||||
|  |  ----------------------------- | ||||||
|  |   | ||||||
|  |  * options: | ||||||
|  |  - q1: input propagator 1 (string) | ||||||
|  |  - q2: input propagator 2 (string) | ||||||
|  |  - gammas: gamma products to insert at sink & source, pairs of gamma matrices  | ||||||
|  |            (space-separated strings) in angled brackets (i.e. <g_sink g_src>), | ||||||
|  |            in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>"). | ||||||
|  |  | ||||||
|  |            Special values: "all" - perform all possible contractions. | ||||||
|  |  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."), | ||||||
|  |         given as multiples of (2*pi) / L. | ||||||
|  | */ | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                                TMeson                                       * |  *                                TMeson                                       * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| BEGIN_MODULE_NAMESPACE(MContraction) | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair; | ||||||
|  |  | ||||||
| class MesonPar: Serializable | class MesonPar: Serializable | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, | ||||||
|                                     std::string, q1, |                                     std::string, q1, | ||||||
|                                     std::string, q2, |                                     std::string, q2, | ||||||
|                                     std::string,    output, |                                     std::string, gammas, | ||||||
|                                     Gamma::Algebra, gammaSource, |                                     std::string, sink, | ||||||
|                                     Gamma::Algebra, gammaSink); |                                     std::string, output); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| class TMeson: public Module<MesonPar> | class TMeson: public Module<MesonPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl1, 1); |     FERM_TYPE_ALIASES(FImpl1, 1); | ||||||
|     TYPE_ALIASES(FImpl2, 2); |     FERM_TYPE_ALIASES(FImpl2, 2); | ||||||
|  |     FERM_TYPE_ALIASES(ScalarImplCR, Scalar); | ||||||
|  |     SINK_TYPE_ALIASES(Scalar); | ||||||
|     class Result: Serializable |     class Result: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr); |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||||
|  |                                         Gamma::Algebra, gamma_snk, | ||||||
|  |                                         Gamma::Algebra, gamma_src, | ||||||
|  |                                         std::vector<Complex>, corr); | ||||||
|     }; |     }; | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
| @@ -71,6 +97,7 @@ public: | |||||||
|     // dependencies/products |     // dependencies/products | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     virtual void parseGammaString(std::vector<GammaPair> &gammaList); | ||||||
|     // execution |     // execution | ||||||
|     virtual void execute(void); |     virtual void execute(void); | ||||||
| }; | }; | ||||||
| @@ -90,7 +117,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name) | |||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> input = {par().q1, par().q2}; |     std::vector<std::string> input = {par().q1, par().q2, par().sink}; | ||||||
|      |      | ||||||
|     return input; |     return input; | ||||||
| } | } | ||||||
| @@ -103,7 +130,35 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | |||||||
|     return output; |     return output; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template <typename FImpl1, typename FImpl2> | ||||||
|  | void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) | ||||||
|  | { | ||||||
|  |     gammaList.clear(); | ||||||
|  |     // Determine gamma matrices to insert at source/sink. | ||||||
|  |     if (par().gammas.compare("all") == 0) | ||||||
|  |     { | ||||||
|  |         // Do all contractions. | ||||||
|  |         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) | ||||||
|  |         { | ||||||
|  |             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) | ||||||
|  |             { | ||||||
|  |                 gammaList.push_back(std::make_pair((Gamma::Algebra)i,  | ||||||
|  |                                                    (Gamma::Algebra)j)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         // Parse individual contractions from input string. | ||||||
|  |         gammaList = strToVec<GammaPair>(par().gammas); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | #define mesonConnected(q1, q2, gSnk, gSrc) \ | ||||||
|  | (g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) | ||||||
|  |  | ||||||
| template <typename FImpl1, typename FImpl2> | template <typename FImpl1, typename FImpl2> | ||||||
| void TMeson<FImpl1, FImpl2>::execute(void) | void TMeson<FImpl1, FImpl2>::execute(void) | ||||||
| { | { | ||||||
| @@ -111,21 +166,73 @@ void TMeson<FImpl1, FImpl2>::execute(void) | |||||||
|                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" |                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" | ||||||
|                  << std::endl; |                  << std::endl; | ||||||
|      |      | ||||||
|     XmlWriter             writer(par().output); |     CorrWriter             writer(par().output); | ||||||
|  |     std::vector<TComplex>  buf; | ||||||
|  |     std::vector<Result>    result; | ||||||
|  |     Gamma                  g5(Gamma::Algebra::Gamma5); | ||||||
|  |     std::vector<GammaPair> gammaList; | ||||||
|  |     int                    nt = env().getDim(Tp); | ||||||
|  |      | ||||||
|  |     parseGammaString(gammaList); | ||||||
|  |     result.resize(gammaList.size()); | ||||||
|  |     for (unsigned int i = 0; i < result.size(); ++i) | ||||||
|  |     { | ||||||
|  |         result[i].gamma_snk = gammaList[i].first; | ||||||
|  |         result[i].gamma_src = gammaList[i].second; | ||||||
|  |         result[i].corr.resize(nt); | ||||||
|  |     } | ||||||
|  |     if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and | ||||||
|  |         env().template isObjectOfType<SlicedPropagator2>(par().q2)) | ||||||
|  |     { | ||||||
|  |         SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1); | ||||||
|  |         SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2); | ||||||
|  |          | ||||||
|  |         LOG(Message) << "(propagator already sinked)" << std::endl; | ||||||
|  |         for (unsigned int i = 0; i < result.size(); ++i) | ||||||
|  |         { | ||||||
|  |             Gamma gSnk(gammaList[i].first); | ||||||
|  |             Gamma gSrc(gammaList[i].second); | ||||||
|  |              | ||||||
|  |             for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|  |             { | ||||||
|  |                 result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|         PropagatorField1 &q1   = *env().template getObject<PropagatorField1>(par().q1); |         PropagatorField1 &q1   = *env().template getObject<PropagatorField1>(par().q1); | ||||||
|         PropagatorField2 &q2   = *env().template getObject<PropagatorField2>(par().q2); |         PropagatorField2 &q2   = *env().template getObject<PropagatorField2>(par().q2); | ||||||
|         LatticeComplex   c(env().getGrid()); |         LatticeComplex   c(env().getGrid()); | ||||||
|     Gamma                 gSrc(par().gammaSource), gSnk(par().gammaSink); |  | ||||||
|     Gamma                 g5(Gamma::Algebra::Gamma5); |  | ||||||
|     std::vector<TComplex> buf; |  | ||||||
|     Result                result; |  | ||||||
|          |          | ||||||
|     c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5); |         LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; | ||||||
|  |         for (unsigned int i = 0; i < result.size(); ++i) | ||||||
|  |         { | ||||||
|  |             Gamma       gSnk(gammaList[i].first); | ||||||
|  |             Gamma       gSrc(gammaList[i].second); | ||||||
|  |             std::string ns; | ||||||
|  |                  | ||||||
|  |             ns = env().getModuleNamespace(env().getObjectModule(par().sink)); | ||||||
|  |             if (ns == "MSource") | ||||||
|  |             { | ||||||
|  |                 PropagatorField1 &sink = | ||||||
|  |                     *env().template getObject<PropagatorField1>(par().sink); | ||||||
|  |                  | ||||||
|  |                 c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); | ||||||
|                 sliceSum(c, buf, Tp); |                 sliceSum(c, buf, Tp); | ||||||
|     result.corr.resize(buf.size()); |             } | ||||||
|  |             else if (ns == "MSink") | ||||||
|  |             { | ||||||
|  |                 SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink); | ||||||
|  |                  | ||||||
|  |                 c   = trace(mesonConnected(q1, q2, gSnk, gSrc)); | ||||||
|  |                 buf = sink(c); | ||||||
|  |             } | ||||||
|             for (unsigned int t = 0; t < buf.size(); ++t) |             for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|             { |             { | ||||||
|         result.corr[t] = TensorRemove(buf[t]); |                 result[i].corr[t] = TensorRemove(buf[t]); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     write(writer, "meson", result); |     write(writer, "meson", result); | ||||||
| } | } | ||||||
| @@ -134,4 +241,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Meson_hpp_ | #endif // Hadrons_MContraction_Meson_hpp_ | ||||||
|   | |||||||
							
								
								
									
										114
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||||
|  | #define Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakHamiltonian                                    * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | /******************************************************************************* | ||||||
|  |  * Utilities for contractions involving the Weak Hamiltonian. | ||||||
|  |  ******************************************************************************/ | ||||||
|  | //// Sum and store correlator. | ||||||
|  | #define MAKE_DIAG(exp, buf, res, n)\ | ||||||
|  | sliceSum(exp, buf, Tp);\ | ||||||
|  | res.name = (n);\ | ||||||
|  | res.corr.resize(buf.size());\ | ||||||
|  | for (unsigned int t = 0; t < buf.size(); ++t)\ | ||||||
|  | {\ | ||||||
|  |     res.corr[t] = TensorRemove(buf[t]);\ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | //// Contraction of mu index: use 'mu' variable in exp. | ||||||
|  | #define SUM_MU(buf,exp)\ | ||||||
|  | buf = zero;\ | ||||||
|  | for (unsigned int mu = 0; mu < ndim; ++mu)\ | ||||||
|  | {\ | ||||||
|  |     buf += exp;\ | ||||||
|  | } | ||||||
|  |  | ||||||
|  | enum  | ||||||
|  | { | ||||||
|  |   i_V = 0, | ||||||
|  |   i_A = 1, | ||||||
|  |   n_i = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class WeakHamiltonianPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar, | ||||||
|  |                                     std::string, q1, | ||||||
|  |                                     std::string, q2, | ||||||
|  |                                     std::string, q3, | ||||||
|  |                                     std::string, q4, | ||||||
|  |                                     std::string, output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | #define MAKE_WEAK_MODULE(modname)\ | ||||||
|  | class T##modname: public Module<WeakHamiltonianPar>\ | ||||||
|  | {\ | ||||||
|  | public:\ | ||||||
|  |     FERM_TYPE_ALIASES(FIMPL,)\ | ||||||
|  |     class Result: Serializable\ | ||||||
|  |     {\ | ||||||
|  |     public:\ | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\ | ||||||
|  |                                         std::string, name,\ | ||||||
|  |                                         std::vector<Complex>, corr);\ | ||||||
|  |     };\ | ||||||
|  | public:\ | ||||||
|  |     /* constructor */ \ | ||||||
|  |     T##modname(const std::string name);\ | ||||||
|  |     /* destructor */ \ | ||||||
|  |     virtual ~T##modname(void) = default;\ | ||||||
|  |     /* dependency relation */ \ | ||||||
|  |     virtual std::vector<std::string> getInput(void);\ | ||||||
|  |     virtual std::vector<std::string> getOutput(void);\ | ||||||
|  |     /* setup */ \ | ||||||
|  |     virtual void setup(void);\ | ||||||
|  |     /* execution */ \ | ||||||
|  |     virtual void execute(void);\ | ||||||
|  |     std::vector<std::string> VA_label = {"V", "A"};\ | ||||||
|  | };\ | ||||||
|  | MODULE_REGISTER_NS(modname, T##modname, MContraction); | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||||
							
								
								
									
										137
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MContraction; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Weak Hamiltonian current-current contractions, Eye-type. | ||||||
|  |  *  | ||||||
|  |  * These contractions are generated by the Q1 and Q2 operators in the physical | ||||||
|  |  * basis (see e.g. Fig 3 of arXiv:1507.03094). | ||||||
|  |  *  | ||||||
|  |  * Schematics:        q4                 |                   | ||||||
|  |  *                  /-<-¬                |                              | ||||||
|  |  *                 /     \               |             q2           q3 | ||||||
|  |  *                 \     /               |        /----<------*------<----¬                         | ||||||
|  |  *            q2    \   /    q3          |       /          /-*-¬          \ | ||||||
|  |  *       /-----<-----* *-----<----¬      |      /          /     \          \ | ||||||
|  |  *    i *            H_W           * f   |   i *           \     /  q4      * f | ||||||
|  |  *       \                        /      |      \           \->-/          /    | ||||||
|  |  *        \                      /       |       \                        /        | ||||||
|  |  *         \---------->---------/        |        \----------->----------/         | ||||||
|  |  *                   q1                  |                   q1                   | ||||||
|  |  *                                       | | ||||||
|  |  *                Saucer (S)             |                  Eye (E) | ||||||
|  |  *  | ||||||
|  |  * S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2]) | ||||||
|  |  * E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2]) | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                  TWeakHamiltonianEye implementation                        * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name) | ||||||
|  | : Module<WeakHamiltonianPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TWeakHamiltonianEye::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TWeakHamiltonianEye::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianEye::setup(void) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianEye::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"  | ||||||
|  |                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||||
|  |                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||||
|  |                  << "'." << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter             writer(par().output); | ||||||
|  |     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||||
|  |     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||||
|  |     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||||
|  |     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||||
|  |     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||||
|  |     LatticeComplex        expbuf(env().getGrid()); | ||||||
|  |     std::vector<TComplex> corrbuf; | ||||||
|  |     std::vector<Result>   result(n_eye_diag); | ||||||
|  |     unsigned int ndim   = env().getNd(); | ||||||
|  |  | ||||||
|  |     PropagatorField              tmp1(env().getGrid()); | ||||||
|  |     LatticeComplex               tmp2(env().getGrid()); | ||||||
|  |     std::vector<PropagatorField> S_body(ndim, tmp1); | ||||||
|  |     std::vector<PropagatorField> S_loop(ndim, tmp1); | ||||||
|  |     std::vector<LatticeComplex>  E_body(ndim, tmp2); | ||||||
|  |     std::vector<LatticeComplex>  E_loop(ndim, tmp2); | ||||||
|  |  | ||||||
|  |     // Setup for S-type contractions. | ||||||
|  |     for (int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu])); | ||||||
|  |         S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu])); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform S-type contractions.     | ||||||
|  |     SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu])) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S") | ||||||
|  |  | ||||||
|  |     // Recycle sub-expressions for E-type contractions. | ||||||
|  |     for (unsigned int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         E_body[mu] = trace(S_body[mu]); | ||||||
|  |         E_loop[mu] = trace(S_loop[mu]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform E-type contractions. | ||||||
|  |     SUM_MU(expbuf, E_body[mu]*E_loop[mu]) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E") | ||||||
|  |  | ||||||
|  |     write(writer, "HW_Eye", result); | ||||||
|  | } | ||||||
							
								
								
									
										58
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||||
|  | #define Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakHamiltonianEye                                 * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | enum | ||||||
|  | { | ||||||
|  |     S_diag = 0, | ||||||
|  |     E_diag = 1, | ||||||
|  |     n_eye_diag = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Saucer and Eye subdiagram contractions. | ||||||
|  | #define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma) | ||||||
|  | #define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma) | ||||||
|  |  | ||||||
|  | MAKE_WEAK_MODULE(WeakHamiltonianEye) | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||||
							
								
								
									
										139
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,139 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MContraction; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Weak Hamiltonian current-current contractions, Non-Eye-type. | ||||||
|  |  *  | ||||||
|  |  * These contractions are generated by the Q1 and Q2 operators in the physical | ||||||
|  |  * basis (see e.g. Fig 3 of arXiv:1507.03094). | ||||||
|  |  *  | ||||||
|  |  * Schematic:      | ||||||
|  |  *            q2             q3          |           q2              q3 | ||||||
|  |  *          /--<--¬       /--<--¬        |        /--<--¬         /--<--¬        | ||||||
|  |  *         /       \     /       \       |       /       \       /       \       | ||||||
|  |  *        /         \   /         \      |      /         \     /         \      | ||||||
|  |  *       /           \ /           \     |     /           \   /           \     | ||||||
|  |  *    i *             * H_W         *  f |  i *             * * H_W         * f  | ||||||
|  |  *      \             *             |    |     \           /   \           / | ||||||
|  |  *       \           / \           /     |      \         /     \         /     | ||||||
|  |  *        \         /   \         /      |       \       /       \       /   | ||||||
|  |  *         \       /     \       /       |        \-->--/         \-->--/       | ||||||
|  |  *          \-->--/       \-->--/        |          q1               q4  | ||||||
|  |  *            q1             q4          | | ||||||
|  |  *                Connected (C)          |                 Wing (W) | ||||||
|  |  * | ||||||
|  |  * C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu]) | ||||||
|  |  * W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu]) | ||||||
|  |  *  | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                  TWeakHamiltonianNonEye implementation                     * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name) | ||||||
|  | : Module<WeakHamiltonianPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TWeakHamiltonianNonEye::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianNonEye::setup(void) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakHamiltonianNonEye::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"  | ||||||
|  |                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||||
|  |                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||||
|  |                  << "'." << std::endl; | ||||||
|  |      | ||||||
|  |     CorrWriter             writer(par().output); | ||||||
|  |     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||||
|  |     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||||
|  |     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||||
|  |     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||||
|  |     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||||
|  |     LatticeComplex        expbuf(env().getGrid()); | ||||||
|  |     std::vector<TComplex> corrbuf; | ||||||
|  |     std::vector<Result>   result(n_noneye_diag);  | ||||||
|  |     unsigned int ndim   = env().getNd(); | ||||||
|  |  | ||||||
|  |     PropagatorField              tmp1(env().getGrid()); | ||||||
|  |     LatticeComplex               tmp2(env().getGrid()); | ||||||
|  |     std::vector<PropagatorField> C_i_side_loop(ndim, tmp1); | ||||||
|  |     std::vector<PropagatorField> C_f_side_loop(ndim, tmp1); | ||||||
|  |     std::vector<LatticeComplex>  W_i_side_loop(ndim, tmp2); | ||||||
|  |     std::vector<LatticeComplex>  W_f_side_loop(ndim, tmp2); | ||||||
|  |  | ||||||
|  |     // Setup for C-type contractions. | ||||||
|  |     for (int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu])); | ||||||
|  |         C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu])); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform C-type contractions.     | ||||||
|  |     SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu])) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C") | ||||||
|  |  | ||||||
|  |     // Recycle sub-expressions for W-type contractions. | ||||||
|  |     for (unsigned int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         W_i_side_loop[mu] = trace(C_i_side_loop[mu]); | ||||||
|  |         W_f_side_loop[mu] = trace(C_f_side_loop[mu]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Perform W-type contractions. | ||||||
|  |     SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu]) | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W") | ||||||
|  |  | ||||||
|  |     write(writer, "HW_NonEye", result); | ||||||
|  | } | ||||||
| @@ -0,0 +1,57 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||||
|  | #define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakHamiltonianNonEye                              * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | enum | ||||||
|  | { | ||||||
|  |     W_diag = 0, | ||||||
|  |     C_diag = 1, | ||||||
|  |     n_noneye_diag = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Wing and Connected subdiagram contractions | ||||||
|  | #define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) | ||||||
|  |  | ||||||
|  | MAKE_WEAK_MODULE(WeakHamiltonianNonEye) | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||||
							
								
								
									
										135
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MContraction; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Weak Hamiltonian + current contractions, disconnected topology for neutral  | ||||||
|  |  * mesons. | ||||||
|  |  *  | ||||||
|  |  * These contractions are generated by operators Q_1,...,10 of the dS=1 Weak | ||||||
|  |  * Hamiltonian in the physical basis and an additional current J (see e.g.  | ||||||
|  |  * Fig 11 of arXiv:1507.03094). | ||||||
|  |  *  | ||||||
|  |  * Schematic: | ||||||
|  |  *                         | ||||||
|  |  *           q2          q4             q3 | ||||||
|  |  *       /--<--¬     /---<--¬       /---<--¬ | ||||||
|  |  *     /         \ /         \     /        \ | ||||||
|  |  *  i *           * H_W      |  J *          * f | ||||||
|  |  *     \         / \         /     \        / | ||||||
|  |  *      \--->---/   \-------/       \------/ | ||||||
|  |  *          q1  | ||||||
|  |  *  | ||||||
|  |  * options | ||||||
|  |  * - q1: input propagator 1 (string) | ||||||
|  |  * - q2: input propagator 2 (string) | ||||||
|  |  * - q3: input propagator 3 (string), assumed to be sequential propagator  | ||||||
|  |  * - q4: input propagator 4 (string), assumed to be a loop | ||||||
|  |  *  | ||||||
|  |  * type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5) | ||||||
|  |  * type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5) | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /******************************************************************************* | ||||||
|  |  *                  TWeakNeutral4ptDisc implementation                         * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name) | ||||||
|  | : Module<WeakHamiltonianPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TWeakNeutral4ptDisc::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakNeutral4ptDisc::setup(void) | ||||||
|  | { | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TWeakNeutral4ptDisc::execute(void) | ||||||
|  | { | ||||||
|  |     LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"  | ||||||
|  |                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||||
|  |                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||||
|  |                  << "'." << std::endl; | ||||||
|  |  | ||||||
|  |     CorrWriter             writer(par().output); | ||||||
|  |     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||||
|  |     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||||
|  |     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||||
|  |     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||||
|  |     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||||
|  |     LatticeComplex        expbuf(env().getGrid()); | ||||||
|  |     std::vector<TComplex> corrbuf; | ||||||
|  |     std::vector<Result>   result(n_neut_disc_diag); | ||||||
|  |     unsigned int ndim   = env().getNd(); | ||||||
|  |  | ||||||
|  |     PropagatorField              tmp(env().getGrid()); | ||||||
|  |     std::vector<PropagatorField> meson(ndim, tmp); | ||||||
|  |     std::vector<PropagatorField> loop(ndim, tmp); | ||||||
|  |     LatticeComplex               curr(env().getGrid()); | ||||||
|  |  | ||||||
|  |     // Setup for type 1 contractions. | ||||||
|  |     for (int mu = 0; mu < ndim; ++mu) | ||||||
|  |     { | ||||||
|  |         meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu])); | ||||||
|  |         loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu])); | ||||||
|  |     } | ||||||
|  |     curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5)); | ||||||
|  |  | ||||||
|  |     // Perform type 1 contractions.     | ||||||
|  |     SUM_MU(expbuf, trace(meson[mu]*loop[mu])) | ||||||
|  |     expbuf *= curr; | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1") | ||||||
|  |  | ||||||
|  |     // Perform type 2 contractions. | ||||||
|  |     SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu])) | ||||||
|  |     expbuf *= curr; | ||||||
|  |     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2") | ||||||
|  |  | ||||||
|  |     write(writer, "HW_disc0", result); | ||||||
|  | } | ||||||
							
								
								
									
										59
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||||
|  | #define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         WeakNeutral4ptDisc                                 * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MContraction) | ||||||
|  |  | ||||||
|  | enum | ||||||
|  | { | ||||||
|  |     neut_disc_1_diag = 0, | ||||||
|  |     neut_disc_2_diag = 1, | ||||||
|  |     n_neut_disc_diag = 2 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Neutral 4pt disconnected subdiagram contractions. | ||||||
|  | #define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) | ||||||
|  | #define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma) | ||||||
|  | #define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma)) | ||||||
|  |  | ||||||
|  | MAKE_WEAK_MODULE(WeakNeutral4ptDisc) | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||||
| @@ -1,34 +1,5 @@ | |||||||
| /*************************************************************************************
 | #ifndef Hadrons_MFermion_GaugeProp_hpp_ | ||||||
| 
 | #define Hadrons_MFermion_GaugeProp_hpp_ | ||||||
| Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
| 
 |  | ||||||
| Source file: extras/Hadrons/Modules/Quark.hpp |  | ||||||
| 
 |  | ||||||
| Copyright (C) 2015 |  | ||||||
| Copyright (C) 2016 |  | ||||||
| 
 |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> |  | ||||||
| 
 |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
| 
 |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
| 
 |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
| 
 |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| 
 |  | ||||||
| #ifndef Hadrons_Quark_hpp_ |  | ||||||
| #define Hadrons_Quark_hpp_ |  | ||||||
| 
 | 
 | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -37,27 +8,29 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                               TQuark                                       * |  *                                GaugeProp                                   * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| class QuarkPar: Serializable | BEGIN_MODULE_NAMESPACE(MFermion) | ||||||
|  | 
 | ||||||
|  | class GaugePropPar: Serializable | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, |     GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, | ||||||
|                                     std::string, source, |                                     std::string, source, | ||||||
|                                     std::string, solver); |                                     std::string, solver); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| class TQuark: public Module<QuarkPar> | class TGaugeProp: public Module<GaugePropPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl,); |     FGS_TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor
 |     // constructor
 | ||||||
|     TQuark(const std::string name); |     TGaugeProp(const std::string name); | ||||||
|     // destructor
 |     // destructor
 | ||||||
|     virtual ~TQuark(void) = default; |     virtual ~TGaugeProp(void) = default; | ||||||
|     // dependencies/products
 |     // dependency relation
 | ||||||
|     virtual std::vector<std::string> getInput(void); |     virtual std::vector<std::string> getInput(void); | ||||||
|     virtual std::vector<std::string> getOutput(void); |     virtual std::vector<std::string> getOutput(void); | ||||||
|     // setup
 |     // setup
 | ||||||
| @@ -69,20 +42,20 @@ private: | |||||||
|     SolverFn     *solver_{nullptr}; |     SolverFn     *solver_{nullptr}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| MODULE_REGISTER(Quark, TQuark<FIMPL>); | MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion); | ||||||
| 
 | 
 | ||||||
| /******************************************************************************
 | /******************************************************************************
 | ||||||
|  *                          TQuark implementation                             * |  *                      TGaugeProp implementation                             * | ||||||
|  ******************************************************************************/ |  ******************************************************************************/ | ||||||
| // constructor /////////////////////////////////////////////////////////////////
 | // constructor /////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| TQuark<FImpl>::TQuark(const std::string name) | TGaugeProp<FImpl>::TGaugeProp(const std::string name) | ||||||
| : Module(name) | : Module<GaugePropPar>(name) | ||||||
| {} | {} | ||||||
| 
 | 
 | ||||||
| // dependencies/products ///////////////////////////////////////////////////////
 | // dependencies/products ///////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TQuark<FImpl>::getInput(void) | std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> in = {par().source, par().solver}; |     std::vector<std::string> in = {par().source, par().solver}; | ||||||
|      |      | ||||||
| @@ -90,7 +63,7 @@ std::vector<std::string> TQuark<FImpl>::getInput(void) | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| std::vector<std::string> TQuark<FImpl>::getOutput(void) | std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | ||||||
| { | { | ||||||
|     std::vector<std::string> out = {getName(), getName() + "_5d"}; |     std::vector<std::string> out = {getName(), getName() + "_5d"}; | ||||||
|      |      | ||||||
| @@ -99,7 +72,7 @@ std::vector<std::string> TQuark<FImpl>::getOutput(void) | |||||||
| 
 | 
 | ||||||
| // setup ///////////////////////////////////////////////////////////////////////
 | // setup ///////////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TQuark<FImpl>::setup(void) | void TGaugeProp<FImpl>::setup(void) | ||||||
| { | { | ||||||
|     Ls_ = env().getObjectLs(par().solver); |     Ls_ = env().getObjectLs(par().solver); | ||||||
|     env().template registerLattice<PropagatorField>(getName()); |     env().template registerLattice<PropagatorField>(getName()); | ||||||
| @@ -111,7 +84,7 @@ void TQuark<FImpl>::setup(void) | |||||||
| 
 | 
 | ||||||
| // execution ///////////////////////////////////////////////////////////////////
 | // execution ///////////////////////////////////////////////////////////////////
 | ||||||
| template <typename FImpl> | template <typename FImpl> | ||||||
| void TQuark<FImpl>::execute(void) | void TGaugeProp<FImpl>::execute(void) | ||||||
| { | { | ||||||
|     LOG(Message) << "Computing quark propagator '" << getName() << "'" |     LOG(Message) << "Computing quark propagator '" << getName() << "'" | ||||||
|     << std::endl; |     << std::endl; | ||||||
| @@ -173,13 +146,15 @@ void TQuark<FImpl>::execute(void) | |||||||
|             *env().template getObject<PropagatorField>(getName()); |             *env().template getObject<PropagatorField>(getName()); | ||||||
|              |              | ||||||
|             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); |             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); | ||||||
|             axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1); |             axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); | ||||||
|             ExtractSlice(tmp, sol, 0, 0); |             ExtractSlice(tmp, sol, 0, 0); | ||||||
|             FermToProp(p4d, tmp, s, c); |             FermToProp(p4d, tmp, s, c); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  | 
 | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
| 
 | 
 | ||||||
| #endif // Hadrons_Quark_hpp_
 | #endif // Hadrons_MFermion_GaugeProp_hpp_
 | ||||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | |||||||
| // execution /////////////////////////////////////////////////////////////////// | // execution /////////////////////////////////////////////////////////////////// | ||||||
| void TLoad::execute(void) | void TLoad::execute(void) | ||||||
| { | { | ||||||
|     NerscField  header; |     FieldMetaData  header; | ||||||
|     std::string fileName = par().file + "." |     std::string fileName = par().file + "." | ||||||
|                            + std::to_string(env().getTrajectory()); |                            + std::to_string(env().getTrajectory()); | ||||||
|      |      | ||||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | |||||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); |     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||||
|     NerscIO::readConfiguration(U, header, fileName); |     NerscIO::readConfiguration(U, header, fileName); | ||||||
|     LOG(Message) << "NERSC header:" << std::endl; |     LOG(Message) << "NERSC header:" << std::endl; | ||||||
|     dump_nersc_header(header, LOG(Message)); |     dump_meta_data(header, LOG(Message)); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Load_hpp_ | #ifndef Hadrons_MGauge_Load_hpp_ | ||||||
| #define Hadrons_Load_hpp_ | #define Hadrons_MGauge_Load_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Load_hpp_ | #endif // Hadrons_MGauge_Load_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Random_hpp_ | #ifndef Hadrons_MGauge_Random_hpp_ | ||||||
| #define Hadrons_Random_hpp_ | #define Hadrons_MGauge_Random_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Random_hpp_ | #endif // Hadrons_MGauge_Random_hpp_ | ||||||
|   | |||||||
							
								
								
									
										88
									
								
								extras/Hadrons/Modules/MGauge/StochEm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								extras/Hadrons/Modules/MGauge/StochEm.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MGauge/StochEm.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MGauge; | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  | *                  TStochEm implementation                             * | ||||||
|  | ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TStochEm::TStochEm(const std::string name) | ||||||
|  | : Module<StochEmPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TStochEm::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TStochEm::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TStochEm::setup(void) | ||||||
|  | { | ||||||
|  |     if (!env().hasRegisteredObject("_" + getName() + "_weight")) | ||||||
|  |     { | ||||||
|  |         env().registerLattice<EmComp>("_" + getName() + "_weight"); | ||||||
|  |     } | ||||||
|  |     env().registerLattice<EmField>(getName()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TStochEm::execute(void) | ||||||
|  | { | ||||||
|  |     PhotonR photon(par().gauge, par().zmScheme); | ||||||
|  |     EmField &a = *env().createLattice<EmField>(getName()); | ||||||
|  |     EmComp  *w; | ||||||
|  |      | ||||||
|  |     if (!env().hasCreatedObject("_" + getName() + "_weight")) | ||||||
|  |     { | ||||||
|  |         LOG(Message) << "Caching stochatic EM potential weight (gauge: " | ||||||
|  |                      << par().gauge << ", zero-mode scheme: " | ||||||
|  |                      << par().zmScheme << ")..." << std::endl; | ||||||
|  |         w = env().createLattice<EmComp>("_" + getName() + "_weight"); | ||||||
|  |         photon.StochasticWeight(*w); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         w = env().getObject<EmComp>("_" + getName() + "_weight"); | ||||||
|  |     } | ||||||
|  |     LOG(Message) << "Generating stochatic EM potential..." << std::endl; | ||||||
|  |     photon.StochasticField(a, *env().get4dRng(), *w); | ||||||
|  | } | ||||||
							
								
								
									
										75
									
								
								extras/Hadrons/Modules/MGauge/StochEm.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								extras/Hadrons/Modules/MGauge/StochEm.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2015 | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef Hadrons_MGauge_StochEm_hpp_ | ||||||
|  | #define Hadrons_MGauge_StochEm_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         StochEm                                 * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MGauge) | ||||||
|  |  | ||||||
|  | class StochEmPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, | ||||||
|  |                                     PhotonR::Gauge,    gauge, | ||||||
|  |                                     PhotonR::ZmScheme, zmScheme); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class TStochEm: public Module<StochEmPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     typedef PhotonR::GaugeField     EmField; | ||||||
|  |     typedef PhotonR::GaugeLinkField EmComp; | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TStochEm(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TStochEm(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MGauge_StochEm_hpp_ | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Unit_hpp_ | #ifndef Hadrons_MGauge_Unit_hpp_ | ||||||
| #define Hadrons_Unit_hpp_ | #define Hadrons_MGauge_Unit_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Unit_hpp_ | #endif // Hadrons_MGauge_Unit_hpp_ | ||||||
|   | |||||||
							
								
								
									
										132
									
								
								extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,132 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MLoop_NoiseLoop_hpp_ | ||||||
|  | #define Hadrons_MLoop_NoiseLoop_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   | ||||||
|  |  Noise loop propagator | ||||||
|  |  ----------------------------- | ||||||
|  |  * loop_x = q_x * adj(eta_x) | ||||||
|  |   | ||||||
|  |  * options: | ||||||
|  |  - q = Result of inversion on noise source. | ||||||
|  |  - eta = noise source. | ||||||
|  |  | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         NoiseLoop                                          * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MLoop) | ||||||
|  |  | ||||||
|  | class NoiseLoopPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar, | ||||||
|  |                                     std::string, q, | ||||||
|  |                                     std::string, eta); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | class TNoiseLoop: public Module<NoiseLoopPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     FERM_TYPE_ALIASES(FImpl,); | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TNoiseLoop(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TNoiseLoop(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                 TNoiseLoop implementation                                  * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | TNoiseLoop<FImpl>::TNoiseLoop(const std::string name) | ||||||
|  | : Module<NoiseLoopPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TNoiseLoop<FImpl>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().q, par().eta}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TNoiseLoop<FImpl>::setup(void) | ||||||
|  | { | ||||||
|  |     env().template registerLattice<PropagatorField>(getName()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TNoiseLoop<FImpl>::execute(void) | ||||||
|  | { | ||||||
|  |     PropagatorField &loop = *env().template createLattice<PropagatorField>(getName()); | ||||||
|  |     PropagatorField &q    = *env().template getObject<PropagatorField>(par().q); | ||||||
|  |     PropagatorField &eta  = *env().template getObject<PropagatorField>(par().eta); | ||||||
|  |     loop = q*adj(eta); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MLoop_NoiseLoop_hpp_ | ||||||
							
								
								
									
										226
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										226
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,226 @@ | |||||||
|  | #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MScalar; | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  | *                     TChargedProp implementation                             * | ||||||
|  | ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TChargedProp::TChargedProp(const std::string name) | ||||||
|  | : Module<ChargedPropPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TChargedProp::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().source, par().emField}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TChargedProp::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TChargedProp::setup(void) | ||||||
|  | { | ||||||
|  |     freeMomPropName_ = FREEMOMPROP(par().mass); | ||||||
|  |     phaseName_.clear(); | ||||||
|  |     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |     { | ||||||
|  |         phaseName_.push_back("_shiftphase_" + std::to_string(mu)); | ||||||
|  |     } | ||||||
|  |     GFSrcName_ = "_" + getName() + "_DinvSrc"; | ||||||
|  |     if (!env().hasRegisteredObject(freeMomPropName_)) | ||||||
|  |     { | ||||||
|  |         env().registerLattice<ScalarField>(freeMomPropName_); | ||||||
|  |     } | ||||||
|  |     if (!env().hasRegisteredObject(phaseName_[0])) | ||||||
|  |     { | ||||||
|  |         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |         { | ||||||
|  |             env().registerLattice<ScalarField>(phaseName_[mu]); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (!env().hasRegisteredObject(GFSrcName_)) | ||||||
|  |     { | ||||||
|  |         env().registerLattice<ScalarField>(GFSrcName_); | ||||||
|  |     } | ||||||
|  |     env().registerLattice<ScalarField>(getName()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TChargedProp::execute(void) | ||||||
|  | { | ||||||
|  |     // CACHING ANALYTIC EXPRESSIONS | ||||||
|  |     ScalarField &source = *env().getObject<ScalarField>(par().source); | ||||||
|  |     Complex     ci(0.0,1.0); | ||||||
|  |     FFT         fft(env().getGrid()); | ||||||
|  |      | ||||||
|  |     // cache free scalar propagator | ||||||
|  |     if (!env().hasCreatedObject(freeMomPropName_)) | ||||||
|  |     { | ||||||
|  |         LOG(Message) << "Caching momentum space free scalar propagator" | ||||||
|  |                      << " (mass= " << par().mass << ")..." << std::endl; | ||||||
|  |         freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_); | ||||||
|  |         SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_); | ||||||
|  |     } | ||||||
|  |     // cache G*F*src | ||||||
|  |     if (!env().hasCreatedObject(GFSrcName_)) | ||||||
|  |          | ||||||
|  |     { | ||||||
|  |         GFSrc_ = env().createLattice<ScalarField>(GFSrcName_); | ||||||
|  |         fft.FFT_all_dim(*GFSrc_, source, FFT::forward); | ||||||
|  |         *GFSrc_ = (*freeMomProp_)*(*GFSrc_); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         GFSrc_ = env().getObject<ScalarField>(GFSrcName_); | ||||||
|  |     } | ||||||
|  |     // cache phases | ||||||
|  |     if (!env().hasCreatedObject(phaseName_[0])) | ||||||
|  |     { | ||||||
|  |         std::vector<int> &l = env().getGrid()->_fdimensions; | ||||||
|  |          | ||||||
|  |         LOG(Message) << "Caching shift phases..." << std::endl; | ||||||
|  |         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |         { | ||||||
|  |             Real    twoPiL = M_PI*2./l[mu]; | ||||||
|  |              | ||||||
|  |             phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu])); | ||||||
|  |             LatticeCoordinate(*(phase_[mu]), mu); | ||||||
|  |             *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |         { | ||||||
|  |             phase_.push_back(env().getObject<ScalarField>(phaseName_[mu])); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // PROPAGATOR CALCULATION | ||||||
|  |     LOG(Message) << "Computing charged scalar propagator" | ||||||
|  |                  << " (mass= " << par().mass | ||||||
|  |                  << ", charge= " << par().charge << ")..." << std::endl; | ||||||
|  |      | ||||||
|  |     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); | ||||||
|  |     ScalarField buf(env().getGrid()); | ||||||
|  |     ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; | ||||||
|  |     double      q = par().charge; | ||||||
|  |      | ||||||
|  |     // G*F*Src | ||||||
|  |     prop = GFSrc; | ||||||
|  |  | ||||||
|  |     // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) | ||||||
|  |     buf = GFSrc; | ||||||
|  |     momD1(buf, fft); | ||||||
|  |     buf = G*buf; | ||||||
|  |     prop = prop - q*buf; | ||||||
|  |  | ||||||
|  |     // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) | ||||||
|  |     momD1(buf, fft); | ||||||
|  |     prop = prop + q*q*G*buf; | ||||||
|  |  | ||||||
|  |     // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) | ||||||
|  |     buf = GFSrc; | ||||||
|  |     momD2(buf, fft); | ||||||
|  |     prop = prop - q*q*G*buf; | ||||||
|  |  | ||||||
|  |     // final FT | ||||||
|  |     fft.FFT_all_dim(prop, prop, FFT::backward); | ||||||
|  |      | ||||||
|  |     // OUTPUT IF NECESSARY | ||||||
|  |     if (!par().output.empty()) | ||||||
|  |     { | ||||||
|  |         std::string           filename = par().output + "." + | ||||||
|  |                                          std::to_string(env().getTrajectory()); | ||||||
|  |          | ||||||
|  |         LOG(Message) << "Saving zero-momentum projection to '" | ||||||
|  |                      << filename << "'..." << std::endl; | ||||||
|  |          | ||||||
|  |         CorrWriter            writer(filename); | ||||||
|  |         std::vector<TComplex> vecBuf; | ||||||
|  |         std::vector<Complex>  result; | ||||||
|  |          | ||||||
|  |         sliceSum(prop, vecBuf, Tp); | ||||||
|  |         result.resize(vecBuf.size()); | ||||||
|  |         for (unsigned int t = 0; t < vecBuf.size(); ++t) | ||||||
|  |         { | ||||||
|  |             result[t] = TensorRemove(vecBuf[t]); | ||||||
|  |         } | ||||||
|  |         write(writer, "charge", q); | ||||||
|  |         write(writer, "prop", result); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void TChargedProp::momD1(ScalarField &s, FFT &fft) | ||||||
|  | { | ||||||
|  |     EmField     &A = *env().getObject<EmField>(par().emField); | ||||||
|  |     ScalarField buf(env().getGrid()), result(env().getGrid()), | ||||||
|  |                 Amu(env().getGrid()); | ||||||
|  |     Complex     ci(0.0,1.0); | ||||||
|  |  | ||||||
|  |     result = zero; | ||||||
|  |  | ||||||
|  |     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |     { | ||||||
|  |         Amu = peekLorentz(A, mu); | ||||||
|  |         buf = (*phase_[mu])*s; | ||||||
|  |         fft.FFT_all_dim(buf, buf, FFT::backward); | ||||||
|  |         buf = Amu*buf; | ||||||
|  |         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||||
|  |         result = result - ci*buf; | ||||||
|  |     } | ||||||
|  |     fft.FFT_all_dim(s, s, FFT::backward); | ||||||
|  |     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |     { | ||||||
|  |         Amu = peekLorentz(A, mu); | ||||||
|  |         buf = Amu*s; | ||||||
|  |         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||||
|  |         result = result + ci*adj(*phase_[mu])*buf; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     s = result; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void TChargedProp::momD2(ScalarField &s, FFT &fft) | ||||||
|  | { | ||||||
|  |     EmField     &A = *env().getObject<EmField>(par().emField); | ||||||
|  |     ScalarField buf(env().getGrid()), result(env().getGrid()), | ||||||
|  |                 Amu(env().getGrid()); | ||||||
|  |  | ||||||
|  |     result = zero; | ||||||
|  |      | ||||||
|  |     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |     { | ||||||
|  |         Amu = peekLorentz(A, mu); | ||||||
|  |         buf = (*phase_[mu])*s; | ||||||
|  |         fft.FFT_all_dim(buf, buf, FFT::backward); | ||||||
|  |         buf = Amu*Amu*buf; | ||||||
|  |         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||||
|  |         result = result + .5*buf; | ||||||
|  |     } | ||||||
|  |     fft.FFT_all_dim(s, s, FFT::backward); | ||||||
|  |     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||||
|  |     { | ||||||
|  |         Amu = peekLorentz(A, mu);         | ||||||
|  |         buf = Amu*Amu*s; | ||||||
|  |         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||||
|  |         result = result + .5*adj(*phase_[mu])*buf; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     s = result; | ||||||
|  | } | ||||||
							
								
								
									
										61
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | |||||||
|  | #ifndef Hadrons_MScalar_ChargedProp_hpp_ | ||||||
|  | #define Hadrons_MScalar_ChargedProp_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                       Charged scalar propagator                            * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MScalar) | ||||||
|  |  | ||||||
|  | class ChargedPropPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, | ||||||
|  |                                     std::string, emField, | ||||||
|  |                                     std::string, source, | ||||||
|  |                                     double,      mass, | ||||||
|  |                                     double,      charge, | ||||||
|  |                                     std::string, output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class TChargedProp: public Module<ChargedPropPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     SCALAR_TYPE_ALIASES(SIMPL,); | ||||||
|  |     typedef PhotonR::GaugeField     EmField; | ||||||
|  |     typedef PhotonR::GaugeLinkField EmComp; | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TChargedProp(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TChargedProp(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | private: | ||||||
|  |     void momD1(ScalarField &s, FFT &fft); | ||||||
|  |     void momD2(ScalarField &s, FFT &fft); | ||||||
|  | private: | ||||||
|  |     std::string                freeMomPropName_, GFSrcName_; | ||||||
|  |     std::vector<std::string>   phaseName_; | ||||||
|  |     ScalarField                *freeMomProp_, *GFSrc_; | ||||||
|  |     std::vector<ScalarField *> phase_; | ||||||
|  |     EmField                    *A; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MScalar_ChargedProp_hpp_ | ||||||
							
								
								
									
										79
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | |||||||
|  | #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> | ||||||
|  | #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Hadrons; | ||||||
|  | using namespace MScalar; | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  | *                        TFreeProp implementation                             * | ||||||
|  | ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | TFreeProp::TFreeProp(const std::string name) | ||||||
|  | : Module<FreePropPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | std::vector<std::string> TFreeProp::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in = {par().source}; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<std::string> TFreeProp::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | void TFreeProp::setup(void) | ||||||
|  | { | ||||||
|  |     freeMomPropName_ = FREEMOMPROP(par().mass); | ||||||
|  |      | ||||||
|  |     if (!env().hasRegisteredObject(freeMomPropName_)) | ||||||
|  |     { | ||||||
|  |         env().registerLattice<ScalarField>(freeMomPropName_); | ||||||
|  |     } | ||||||
|  |     env().registerLattice<ScalarField>(getName()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | void TFreeProp::execute(void) | ||||||
|  | { | ||||||
|  |     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); | ||||||
|  |     ScalarField &source = *env().getObject<ScalarField>(par().source); | ||||||
|  |     ScalarField *freeMomProp; | ||||||
|  |  | ||||||
|  |     if (!env().hasCreatedObject(freeMomPropName_)) | ||||||
|  |     { | ||||||
|  |         LOG(Message) << "Caching momentum space free scalar propagator" | ||||||
|  |                      << " (mass= " << par().mass << ")..." << std::endl; | ||||||
|  |         freeMomProp = env().createLattice<ScalarField>(freeMomPropName_); | ||||||
|  |         SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         freeMomProp = env().getObject<ScalarField>(freeMomPropName_); | ||||||
|  |     } | ||||||
|  |     LOG(Message) << "Computing free scalar propagator..." << std::endl; | ||||||
|  |     SIMPL::FreePropagator(source, prop, *freeMomProp); | ||||||
|  |      | ||||||
|  |     if (!par().output.empty()) | ||||||
|  |     { | ||||||
|  |         TextWriter            writer(par().output + "." + | ||||||
|  |                                      std::to_string(env().getTrajectory())); | ||||||
|  |         std::vector<TComplex> buf; | ||||||
|  |         std::vector<Complex>  result; | ||||||
|  |          | ||||||
|  |         sliceSum(prop, buf, Tp); | ||||||
|  |         result.resize(buf.size()); | ||||||
|  |         for (unsigned int t = 0; t < buf.size(); ++t) | ||||||
|  |         { | ||||||
|  |             result[t] = TensorRemove(buf[t]); | ||||||
|  |         } | ||||||
|  |         write(writer, "prop", result); | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										50
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | |||||||
|  | #ifndef Hadrons_MScalar_FreeProp_hpp_ | ||||||
|  | #define Hadrons_MScalar_FreeProp_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                               FreeProp                                     * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MScalar) | ||||||
|  |  | ||||||
|  | class FreePropPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, | ||||||
|  |                                     std::string, source, | ||||||
|  |                                     double,      mass, | ||||||
|  |                                     std::string, output); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class TFreeProp: public Module<FreePropPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     SCALAR_TYPE_ALIASES(SIMPL,); | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TFreeProp(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TFreeProp(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | private: | ||||||
|  |     std::string freeMomPropName_; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MScalar_FreeProp_hpp_ | ||||||
							
								
								
									
										6
									
								
								extras/Hadrons/Modules/MScalar/Scalar.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								extras/Hadrons/Modules/MScalar/Scalar.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | |||||||
|  | #ifndef Hadrons_Scalar_hpp_ | ||||||
|  | #define Hadrons_Scalar_hpp_ | ||||||
|  |  | ||||||
|  | #define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) | ||||||
|  |  | ||||||
|  | #endif // Hadrons_Scalar_hpp_ | ||||||
							
								
								
									
										114
									
								
								extras/Hadrons/Modules/MSink/Point.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								extras/Hadrons/Modules/MSink/Point.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | |||||||
|  | #ifndef Hadrons_MSink_Point_hpp_ | ||||||
|  | #define Hadrons_MSink_Point_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                                   Point                                    * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MSink) | ||||||
|  |  | ||||||
|  | class PointPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, | ||||||
|  |                                     std::string, mom); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | class TPoint: public Module<PointPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     FERM_TYPE_ALIASES(FImpl,); | ||||||
|  |     SINK_TYPE_ALIASES(); | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TPoint(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TPoint(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSink); | ||||||
|  | MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                          TPoint implementation                             * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | TPoint<FImpl>::TPoint(const std::string name) | ||||||
|  | : Module<PointPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TPoint<FImpl>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TPoint<FImpl>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TPoint<FImpl>::setup(void) | ||||||
|  | { | ||||||
|  |     unsigned int size; | ||||||
|  |      | ||||||
|  |     size = env().template lattice4dSize<LatticeComplex>(); | ||||||
|  |     env().registerObject(getName(), size); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TPoint<FImpl>::execute(void) | ||||||
|  | { | ||||||
|  |     std::vector<Real> p = strToVec<Real>(par().mom); | ||||||
|  |     LatticeComplex    ph(env().getGrid()), coor(env().getGrid()); | ||||||
|  |     Complex           i(0.0,1.0); | ||||||
|  |      | ||||||
|  |     LOG(Message) << "Setting up point sink function for momentum [" | ||||||
|  |                  << par().mom << "]" << std::endl; | ||||||
|  |     ph = zero; | ||||||
|  |     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||||
|  |     { | ||||||
|  |         LatticeCoordinate(coor, mu); | ||||||
|  |         ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; | ||||||
|  |     } | ||||||
|  |     ph = exp((Real)(2*M_PI)*i*ph); | ||||||
|  |     auto sink = [ph](const PropagatorField &field) | ||||||
|  |     { | ||||||
|  |         SlicedPropagator res; | ||||||
|  |         PropagatorField  tmp = ph*field; | ||||||
|  |          | ||||||
|  |         sliceSum(tmp, res, Tp); | ||||||
|  |          | ||||||
|  |         return res; | ||||||
|  |     }; | ||||||
|  |     env().setObject(getName(), new SinkFn(sink)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MSink_Point_hpp_ | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_RBPrecCG_hpp_ | #ifndef Hadrons_MSolver_RBPrecCG_hpp_ | ||||||
| #define Hadrons_RBPrecCG_hpp_ | #define Hadrons_MSolver_RBPrecCG_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -53,7 +53,7 @@ template <typename FImpl> | |||||||
| class TRBPrecCG: public Module<RBPrecCGPar> | class TRBPrecCG: public Module<RBPrecCGPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl,); |     FGS_TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TRBPrecCG(const std::string name); |     TRBPrecCG(const std::string name); | ||||||
| @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_RBPrecCG_hpp_ | #endif // Hadrons_MSolver_RBPrecCG_hpp_ | ||||||
|   | |||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Point_hpp_ | #ifndef Hadrons_MSource_Point_hpp_ | ||||||
| #define Hadrons_Point_hpp_ | #define Hadrons_MSource_Point_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -63,7 +63,7 @@ template <typename FImpl> | |||||||
| class TPoint: public Module<PointPar> | class TPoint: public Module<PointPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl,); |     FERM_TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TPoint(const std::string name); |     TPoint(const std::string name); | ||||||
| @@ -79,6 +79,7 @@ public: | |||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSource); | MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSource); | ||||||
|  | MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource); | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TPoint template implementation                       * |  *                       TPoint template implementation                       * | ||||||
| @@ -132,4 +133,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Point_hpp_ | #endif // Hadrons_MSource_Point_hpp_ | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp | |||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
| Copyright (C) 2016 | Copyright (C) 2016 | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  | ||||||
| @@ -27,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_SeqGamma_hpp_ | #ifndef Hadrons_MSource_SeqGamma_hpp_ | ||||||
| #define Hadrons_SeqGamma_hpp_ | #define Hadrons_MSource_SeqGamma_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -71,7 +72,7 @@ template <typename FImpl> | |||||||
| class TSeqGamma: public Module<SeqGammaPar> | class TSeqGamma: public Module<SeqGammaPar> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl,); |     FGS_TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TSeqGamma(const std::string name); |     TSeqGamma(const std::string name); | ||||||
| @@ -149,9 +150,9 @@ void TSeqGamma<FImpl>::execute(void) | |||||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) |     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||||
|     { |     { | ||||||
|         LatticeCoordinate(coor, mu); |         LatticeCoordinate(coor, mu); | ||||||
|         ph = ph + p[mu]*coor; |         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||||
|     } |     } | ||||||
|     ph = exp(i*ph); |     ph = exp((Real)(2*M_PI)*i*ph); | ||||||
|     LatticeCoordinate(t, Tp); |     LatticeCoordinate(t, Tp); | ||||||
|     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); |     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); | ||||||
| } | } | ||||||
| @@ -160,4 +161,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_SeqGamma_hpp_ | #endif // Hadrons_MSource_SeqGamma_hpp_ | ||||||
|   | |||||||
							
								
								
									
										147
									
								
								extras/Hadrons/Modules/MSource/Wall.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								extras/Hadrons/Modules/MSource/Wall.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  | Source file: extras/Hadrons/Modules/MSource/Wall.hpp | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Andrew Lawson <andrew.lawson1991@gmail.com> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef Hadrons_MSource_WallSource_hpp_ | ||||||
|  | #define Hadrons_MSource_WallSource_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Hadrons/Global.hpp> | ||||||
|  | #include <Grid/Hadrons/Module.hpp> | ||||||
|  | #include <Grid/Hadrons/ModuleFactory.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   | ||||||
|  |  Wall source | ||||||
|  |  ----------------------------- | ||||||
|  |  * src_x = delta(x_3 - tW) * exp(i x.mom) | ||||||
|  |   | ||||||
|  |  * options: | ||||||
|  |  - tW: source timeslice (integer) | ||||||
|  |  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.") | ||||||
|  |   | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                         Wall                                               * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | BEGIN_MODULE_NAMESPACE(MSource) | ||||||
|  |  | ||||||
|  | class WallPar: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar, | ||||||
|  |                                     unsigned int, tW, | ||||||
|  |                                     std::string, mom); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | class TWall: public Module<WallPar> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     FERM_TYPE_ALIASES(FImpl,); | ||||||
|  | public: | ||||||
|  |     // constructor | ||||||
|  |     TWall(const std::string name); | ||||||
|  |     // destructor | ||||||
|  |     virtual ~TWall(void) = default; | ||||||
|  |     // dependency relation | ||||||
|  |     virtual std::vector<std::string> getInput(void); | ||||||
|  |     virtual std::vector<std::string> getOutput(void); | ||||||
|  |     // setup | ||||||
|  |     virtual void setup(void); | ||||||
|  |     // execution | ||||||
|  |     virtual void execute(void); | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource); | ||||||
|  |  | ||||||
|  | /****************************************************************************** | ||||||
|  |  *                 TWall implementation                                       * | ||||||
|  |  ******************************************************************************/ | ||||||
|  | // constructor ///////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | TWall<FImpl>::TWall(const std::string name) | ||||||
|  | : Module<WallPar>(name) | ||||||
|  | {} | ||||||
|  |  | ||||||
|  | // dependencies/products /////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TWall<FImpl>::getInput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> in; | ||||||
|  |      | ||||||
|  |     return in; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename FImpl> | ||||||
|  | std::vector<std::string> TWall<FImpl>::getOutput(void) | ||||||
|  | { | ||||||
|  |     std::vector<std::string> out = {getName()}; | ||||||
|  |      | ||||||
|  |     return out; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // setup /////////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TWall<FImpl>::setup(void) | ||||||
|  | { | ||||||
|  |     env().template registerLattice<PropagatorField>(getName()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // execution /////////////////////////////////////////////////////////////////// | ||||||
|  | template <typename FImpl> | ||||||
|  | void TWall<FImpl>::execute(void) | ||||||
|  | {     | ||||||
|  |     LOG(Message) << "Generating wall source at t = " << par().tW  | ||||||
|  |                  << " with momentum " << par().mom << std::endl; | ||||||
|  |      | ||||||
|  |     PropagatorField &src = *env().template createLattice<PropagatorField>(getName()); | ||||||
|  |     Lattice<iScalar<vInteger>> t(env().getGrid()); | ||||||
|  |     LatticeComplex             ph(env().getGrid()), coor(env().getGrid()); | ||||||
|  |     std::vector<Real>          p; | ||||||
|  |     Complex                    i(0.0,1.0); | ||||||
|  |      | ||||||
|  |     p  = strToVec<Real>(par().mom); | ||||||
|  |     ph = zero; | ||||||
|  |     for(unsigned int mu = 0; mu < Nd; mu++) | ||||||
|  |     { | ||||||
|  |         LatticeCoordinate(coor, mu); | ||||||
|  |         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||||
|  |     } | ||||||
|  |     ph = exp((Real)(2*M_PI)*i*ph); | ||||||
|  |     LatticeCoordinate(t, Tp); | ||||||
|  |     src = 1.; | ||||||
|  |     src = where((t == par().tW), src*ph, 0.*src); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | END_MODULE_NAMESPACE | ||||||
|  |  | ||||||
|  | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // Hadrons_MSource_WallSource_hpp_ | ||||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef Hadrons_Z2_hpp_ | #ifndef Hadrons_MSource_Z2_hpp_ | ||||||
| #define Hadrons_Z2_hpp_ | #define Hadrons_MSource_Z2_hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -67,7 +67,7 @@ template <typename FImpl> | |||||||
| class TZ2: public Module<Z2Par> | class TZ2: public Module<Z2Par> | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     TYPE_ALIASES(FImpl,); |     FERM_TYPE_ALIASES(FImpl,); | ||||||
| public: | public: | ||||||
|     // constructor |     // constructor | ||||||
|     TZ2(const std::string name); |     TZ2(const std::string name); | ||||||
| @@ -83,6 +83,7 @@ public: | |||||||
| }; | }; | ||||||
|  |  | ||||||
| MODULE_REGISTER_NS(Z2,       TZ2<FIMPL>,        MSource); | MODULE_REGISTER_NS(Z2,       TZ2<FIMPL>,        MSource); | ||||||
|  | MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource); | ||||||
|  |  | ||||||
| /****************************************************************************** | /****************************************************************************** | ||||||
|  *                       TZ2 template implementation                          * |  *                       TZ2 template implementation                          * | ||||||
| @@ -148,4 +149,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons_Z2_hpp_ | #endif // Hadrons_MSource_Z2_hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____FILEBASENAME____hpp_ | #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||||
| #define Hadrons____FILEBASENAME____hpp_ | #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____FILEBASENAME____hpp_ | #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| #ifndef Hadrons____FILEBASENAME____hpp_ | #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||||
| #define Hadrons____FILEBASENAME____hpp_ | #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||||
|  |  | ||||||
| #include <Grid/Hadrons/Global.hpp> | #include <Grid/Hadrons/Global.hpp> | ||||||
| #include <Grid/Hadrons/Module.hpp> | #include <Grid/Hadrons/Module.hpp> | ||||||
| @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE | |||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #endif // Hadrons____FILEBASENAME____hpp_ | #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||||
|   | |||||||
| @@ -1,19 +1,38 @@ | |||||||
| modules_cc =\ | modules_cc =\ | ||||||
|  |   Modules/MContraction/WeakHamiltonianEye.cc \ | ||||||
|  |   Modules/MContraction/WeakHamiltonianNonEye.cc \ | ||||||
|  |   Modules/MContraction/WeakNeutral4ptDisc.cc \ | ||||||
|   Modules/MGauge/Load.cc \ |   Modules/MGauge/Load.cc \ | ||||||
|   Modules/MGauge/Random.cc \ |   Modules/MGauge/Random.cc \ | ||||||
|   Modules/MGauge/Unit.cc |   Modules/MGauge/StochEm.cc \ | ||||||
|  |   Modules/MGauge/Unit.cc \ | ||||||
|  |   Modules/MScalar/ChargedProp.cc \ | ||||||
|  |   Modules/MScalar/FreeProp.cc | ||||||
|  |  | ||||||
| modules_hpp =\ | modules_hpp =\ | ||||||
|   Modules/MAction/DWF.hpp \ |   Modules/MAction/DWF.hpp \ | ||||||
|   Modules/MAction/Wilson.hpp \ |   Modules/MAction/Wilson.hpp \ | ||||||
|   Modules/MContraction/Baryon.hpp \ |   Modules/MContraction/Baryon.hpp \ | ||||||
|  |   Modules/MContraction/DiscLoop.hpp \ | ||||||
|  |   Modules/MContraction/Gamma3pt.hpp \ | ||||||
|   Modules/MContraction/Meson.hpp \ |   Modules/MContraction/Meson.hpp \ | ||||||
|  |   Modules/MContraction/WeakHamiltonian.hpp \ | ||||||
|  |   Modules/MContraction/WeakHamiltonianEye.hpp \ | ||||||
|  |   Modules/MContraction/WeakHamiltonianNonEye.hpp \ | ||||||
|  |   Modules/MContraction/WeakNeutral4ptDisc.hpp \ | ||||||
|  |   Modules/MFermion/GaugeProp.hpp \ | ||||||
|   Modules/MGauge/Load.hpp \ |   Modules/MGauge/Load.hpp \ | ||||||
|   Modules/MGauge/Random.hpp \ |   Modules/MGauge/Random.hpp \ | ||||||
|  |   Modules/MGauge/StochEm.hpp \ | ||||||
|   Modules/MGauge/Unit.hpp \ |   Modules/MGauge/Unit.hpp \ | ||||||
|  |   Modules/MLoop/NoiseLoop.hpp \ | ||||||
|  |   Modules/MScalar/ChargedProp.hpp \ | ||||||
|  |   Modules/MScalar/FreeProp.hpp \ | ||||||
|  |   Modules/MScalar/Scalar.hpp \ | ||||||
|  |   Modules/MSink/Point.hpp \ | ||||||
|   Modules/MSolver/RBPrecCG.hpp \ |   Modules/MSolver/RBPrecCG.hpp \ | ||||||
|   Modules/MSource/Point.hpp \ |   Modules/MSource/Point.hpp \ | ||||||
|   Modules/MSource/SeqGamma.hpp \ |   Modules/MSource/SeqGamma.hpp \ | ||||||
|   Modules/MSource/Z2.hpp \ |   Modules/MSource/Wall.hpp \ | ||||||
|   Modules/Quark.hpp |   Modules/MSource/Z2.hpp | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								extras/qed-fvol/Global.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								extras/qed-fvol/Global.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | |||||||
|  | #include <qed-fvol/Global.hpp> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace QCD; | ||||||
|  | using namespace QedFVol; | ||||||
|  |  | ||||||
|  | QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); | ||||||
|  | QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); | ||||||
|  | QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); | ||||||
|  | QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); | ||||||
|  | QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); | ||||||
							
								
								
									
										42
									
								
								extras/qed-fvol/Global.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								extras/qed-fvol/Global.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | |||||||
|  | #ifndef QedFVol_Global_hpp_ | ||||||
|  | #define QedFVol_Global_hpp_ | ||||||
|  |  | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  | #define BEGIN_QEDFVOL_NAMESPACE \ | ||||||
|  | namespace Grid {\ | ||||||
|  | using namespace QCD;\ | ||||||
|  | namespace QedFVol {\ | ||||||
|  | using Grid::operator<<; | ||||||
|  | #define END_QEDFVOL_NAMESPACE }} | ||||||
|  |  | ||||||
|  | /* the 'using Grid::operator<<;' statement prevents a very nasty compilation | ||||||
|  |  * error with GCC (clang compiles fine without it). | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | BEGIN_QEDFVOL_NAMESPACE | ||||||
|  |  | ||||||
|  | class QedFVolLogger: public Logger | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, | ||||||
|  |                                                   GridLogColours, "BLACK"){}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | #define LOG(channel) std::cout << QedFVolLog##channel | ||||||
|  | #define QEDFVOL_ERROR(msg)\ | ||||||
|  | LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ | ||||||
|  |            << __LINE__ << ")" << std::endl;\ | ||||||
|  | abort(); | ||||||
|  |  | ||||||
|  | #define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; | ||||||
|  |  | ||||||
|  | extern QedFVolLogger QedFVolLogError; | ||||||
|  | extern QedFVolLogger QedFVolLogWarning; | ||||||
|  | extern QedFVolLogger QedFVolLogMessage; | ||||||
|  | extern QedFVolLogger QedFVolLogIterative; | ||||||
|  | extern QedFVolLogger QedFVolLogDebug; | ||||||
|  |  | ||||||
|  | END_QEDFVOL_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // QedFVol_Global_hpp_ | ||||||
							
								
								
									
										9
									
								
								extras/qed-fvol/Makefile.am
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								extras/qed-fvol/Makefile.am
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | AM_CXXFLAGS += -I$(top_srcdir)/extras | ||||||
|  |  | ||||||
|  | bin_PROGRAMS = qed-fvol | ||||||
|  |  | ||||||
|  | qed_fvol_SOURCES =   \ | ||||||
|  |     qed-fvol.cc      \ | ||||||
|  |     Global.cc | ||||||
|  |  | ||||||
|  | qed_fvol_LDADD   = -lGrid | ||||||
							
								
								
									
										265
									
								
								extras/qed-fvol/WilsonLoops.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										265
									
								
								extras/qed-fvol/WilsonLoops.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,265 @@ | |||||||
|  | #ifndef QEDFVOL_WILSONLOOPS_H | ||||||
|  | #define QEDFVOL_WILSONLOOPS_H | ||||||
|  |  | ||||||
|  | #include <Global.hpp> | ||||||
|  |  | ||||||
|  | BEGIN_QEDFVOL_NAMESPACE | ||||||
|  |  | ||||||
|  | template <class Gimpl> class NewWilsonLoops : public Gimpl { | ||||||
|  | public: | ||||||
|  |   INHERIT_GIMPL_TYPES(Gimpl); | ||||||
|  |  | ||||||
|  |   typedef typename Gimpl::GaugeLinkField GaugeMat; | ||||||
|  |   typedef typename Gimpl::GaugeField GaugeLorentz; | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // directed plaquette oriented in mu,nu plane | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U, | ||||||
|  |                            const int mu, const int nu) { | ||||||
|  |     // Annoyingly, must use either scope resolution to find dependent base | ||||||
|  |     // class, | ||||||
|  |     // or this-> ; there is no "this" in a static method. This forces explicit | ||||||
|  |     // Gimpl scope | ||||||
|  |     // resolution throughout the usage in this file, and rather defeats the | ||||||
|  |     // purpose of deriving | ||||||
|  |     // from Gimpl. | ||||||
|  |     plaq = Gimpl::CovShiftBackward( | ||||||
|  |         U[mu], mu, Gimpl::CovShiftBackward( | ||||||
|  |                        U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // trace of directed plaquette oriented in mu,nu plane | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void traceDirPlaquette(LatticeComplex &plaq, | ||||||
|  |                                 const std::vector<GaugeMat> &U, const int mu, | ||||||
|  |                                 const int nu) { | ||||||
|  |     GaugeMat sp(U[0]._grid); | ||||||
|  |     dirPlaquette(sp, U, mu, nu); | ||||||
|  |     plaq = trace(sp); | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum over all planes of plaquette | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void sitePlaquette(LatticeComplex &Plaq, | ||||||
|  |                             const std::vector<GaugeMat> &U) { | ||||||
|  |     LatticeComplex sitePlaq(U[0]._grid); | ||||||
|  |     Plaq = zero; | ||||||
|  |     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { | ||||||
|  |       for (int nu = 0; nu < mu; nu++) { | ||||||
|  |         traceDirPlaquette(sitePlaq, U, mu, nu); | ||||||
|  |         Plaq = Plaq + sitePlaq; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum over all x,y,z,t and over all planes of plaquette | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real sumPlaquette(const GaugeLorentz &Umu) { | ||||||
|  |     std::vector<GaugeMat> U(4, Umu._grid); | ||||||
|  |  | ||||||
|  |     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||||
|  |       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     LatticeComplex Plaq(Umu._grid); | ||||||
|  |  | ||||||
|  |     sitePlaquette(Plaq, U); | ||||||
|  |  | ||||||
|  |     TComplex Tp = sum(Plaq); | ||||||
|  |     Complex p = TensorRemove(Tp); | ||||||
|  |     return p.real(); | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // average over all x,y,z,t and over all planes of plaquette | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real avgPlaquette(const GaugeLorentz &Umu) { | ||||||
|  |     int ndim = Umu._grid->_ndimension; | ||||||
|  |     Real sumplaq = sumPlaquette(Umu); | ||||||
|  |     Real vol = Umu._grid->gSites(); | ||||||
|  |     Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; | ||||||
|  |     return sumplaq / vol / faces / Nc; // Nc dependent... FIXME | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // Wilson loop of size (R1, R2), oriented in mu,nu plane | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U, | ||||||
|  |                            const int Rmu, const int Rnu, | ||||||
|  |                            const int mu, const int nu) { | ||||||
|  |     wl = U[nu]; | ||||||
|  |  | ||||||
|  |     for(int i = 0; i < Rnu-1; i++){ | ||||||
|  |       wl = Gimpl::CovShiftForward(U[nu], nu, wl); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for(int i = 0; i < Rmu; i++){ | ||||||
|  |       wl = Gimpl::CovShiftForward(U[mu], mu, wl); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for(int i = 0; i < Rnu; i++){ | ||||||
|  |       wl = Gimpl::CovShiftBackward(U[nu], nu, wl); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for(int i = 0; i < Rmu; i++){ | ||||||
|  |       wl = Gimpl::CovShiftBackward(U[mu], mu, wl); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // trace of Wilson Loop oriented in mu,nu plane | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void traceWilsonLoop(LatticeComplex &wl, | ||||||
|  |                                 const std::vector<GaugeMat> &U, | ||||||
|  |                                 const int Rmu, const int Rnu, | ||||||
|  |                                 const int mu, const int nu) { | ||||||
|  |     GaugeMat sp(U[0]._grid); | ||||||
|  |     wilsonLoop(sp, U, Rmu, Rnu, mu, nu); | ||||||
|  |     wl = trace(sp); | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum over all planes of Wilson loop | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void siteWilsonLoop(LatticeComplex &Wl, | ||||||
|  |                             const std::vector<GaugeMat> &U, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     LatticeComplex siteWl(U[0]._grid); | ||||||
|  |     Wl = zero; | ||||||
|  |     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { | ||||||
|  |       for (int nu = 0; nu < mu; nu++) { | ||||||
|  |         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||||
|  |         Wl = Wl + siteWl; | ||||||
|  |         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||||
|  |         Wl = Wl + siteWl; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum over planes of Wilson loop with length R1 | ||||||
|  |   // in the time direction | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void siteTimelikeWilsonLoop(LatticeComplex &Wl, | ||||||
|  |                             const std::vector<GaugeMat> &U, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     LatticeComplex siteWl(U[0]._grid); | ||||||
|  |  | ||||||
|  |     int ndim = U[0]._grid->_ndimension; | ||||||
|  |  | ||||||
|  |     Wl = zero; | ||||||
|  |     for (int nu = 0; nu < ndim - 1; nu++) { | ||||||
|  |       traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); | ||||||
|  |       Wl = Wl + siteWl; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum Wilson loop over all planes orthogonal to the time direction | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static void siteSpatialWilsonLoop(LatticeComplex &Wl, | ||||||
|  |                             const std::vector<GaugeMat> &U, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     LatticeComplex siteWl(U[0]._grid); | ||||||
|  |  | ||||||
|  |     Wl = zero; | ||||||
|  |     for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { | ||||||
|  |       for (int nu = 0; nu < mu; nu++) { | ||||||
|  |         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||||
|  |         Wl = Wl + siteWl; | ||||||
|  |         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||||
|  |         Wl = Wl + siteWl; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum over all x,y,z,t and over all planes of Wilson loop | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real sumWilsonLoop(const GaugeLorentz &Umu, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     std::vector<GaugeMat> U(4, Umu._grid); | ||||||
|  |  | ||||||
|  |     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||||
|  |       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     LatticeComplex Wl(Umu._grid); | ||||||
|  |  | ||||||
|  |     siteWilsonLoop(Wl, U, R1, R2); | ||||||
|  |  | ||||||
|  |     TComplex Tp = sum(Wl); | ||||||
|  |     Complex p = TensorRemove(Tp); | ||||||
|  |     return p.real(); | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum over all x,y,z,t and over all planes of timelike Wilson loop | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     std::vector<GaugeMat> U(4, Umu._grid); | ||||||
|  |  | ||||||
|  |     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||||
|  |       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     LatticeComplex Wl(Umu._grid); | ||||||
|  |  | ||||||
|  |     siteTimelikeWilsonLoop(Wl, U, R1, R2); | ||||||
|  |  | ||||||
|  |     TComplex Tp = sum(Wl); | ||||||
|  |     Complex p = TensorRemove(Tp); | ||||||
|  |     return p.real(); | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // sum over all x,y,z,t and over all planes of spatial Wilson loop | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     std::vector<GaugeMat> U(4, Umu._grid); | ||||||
|  |  | ||||||
|  |     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||||
|  |       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     LatticeComplex Wl(Umu._grid); | ||||||
|  |  | ||||||
|  |     siteSpatialWilsonLoop(Wl, U, R1, R2); | ||||||
|  |  | ||||||
|  |     TComplex Tp = sum(Wl); | ||||||
|  |     Complex p = TensorRemove(Tp); | ||||||
|  |     return p.real(); | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // average over all x,y,z,t and over all planes of Wilson loop | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real avgWilsonLoop(const GaugeLorentz &Umu, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     int ndim = Umu._grid->_ndimension; | ||||||
|  |     Real sumWl = sumWilsonLoop(Umu, R1, R2); | ||||||
|  |     Real vol = Umu._grid->gSites(); | ||||||
|  |     Real faces = 1.0 * ndim * (ndim - 1); | ||||||
|  |     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // average over all x,y,z,t and over all planes of timelike Wilson loop | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     int ndim = Umu._grid->_ndimension; | ||||||
|  |     Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); | ||||||
|  |     Real vol = Umu._grid->gSites(); | ||||||
|  |     Real faces = 1.0 * (ndim - 1); | ||||||
|  |     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||||
|  |   } | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   // average over all x,y,z,t and over all planes of spatial Wilson loop | ||||||
|  |   ////////////////////////////////////////////////// | ||||||
|  |   static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||||
|  |                             const int R1, const int R2) { | ||||||
|  |     int ndim = Umu._grid->_ndimension; | ||||||
|  |     Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); | ||||||
|  |     Real vol = Umu._grid->gSites(); | ||||||
|  |     Real faces = 1.0 * (ndim - 1) * (ndim - 2); | ||||||
|  |     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | END_QEDFVOL_NAMESPACE | ||||||
|  |  | ||||||
|  | #endif // QEDFVOL_WILSONLOOPS_H | ||||||
							
								
								
									
										88
									
								
								extras/qed-fvol/qed-fvol.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								extras/qed-fvol/qed-fvol.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | |||||||
|  | #include <Global.hpp> | ||||||
|  | #include <WilsonLoops.h> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace QCD; | ||||||
|  | using namespace QedFVol; | ||||||
|  |  | ||||||
|  | typedef PeriodicGaugeImpl<QedGimplR>    QedPeriodicGimplR; | ||||||
|  | typedef PhotonR::GaugeField             EmField; | ||||||
|  | typedef PhotonR::GaugeLinkField         EmComp; | ||||||
|  |  | ||||||
|  | const int NCONFIGS = 10; | ||||||
|  | const int NWILSON = 10; | ||||||
|  |  | ||||||
|  | int main(int argc, char *argv[]) | ||||||
|  | { | ||||||
|  |     // parse command line | ||||||
|  |     std::string parameterFileName; | ||||||
|  |      | ||||||
|  |     if (argc < 2) | ||||||
|  |     { | ||||||
|  |         std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]"; | ||||||
|  |         std::cerr << std::endl; | ||||||
|  |         std::exit(EXIT_FAILURE); | ||||||
|  |     } | ||||||
|  |     parameterFileName = argv[1]; | ||||||
|  |      | ||||||
|  |     // initialization | ||||||
|  |     Grid_init(&argc, &argv); | ||||||
|  |     QedFVolLogError.Active(GridLogError.isActive()); | ||||||
|  |     QedFVolLogWarning.Active(GridLogWarning.isActive()); | ||||||
|  |     QedFVolLogMessage.Active(GridLogMessage.isActive()); | ||||||
|  |     QedFVolLogIterative.Active(GridLogIterative.isActive()); | ||||||
|  |     QedFVolLogDebug.Active(GridLogDebug.isActive()); | ||||||
|  |     LOG(Message) << "Grid initialized" << std::endl; | ||||||
|  |      | ||||||
|  |     // QED stuff | ||||||
|  |     std::vector<int> latt_size   = GridDefaultLatt(); | ||||||
|  |     std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); | ||||||
|  |     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |     GridCartesian    grid(latt_size,simd_layout,mpi_layout); | ||||||
|  |     GridParallelRNG  pRNG(&grid); | ||||||
|  |     PhotonR          photon(PhotonR::Gauge::feynman, | ||||||
|  |                             PhotonR::ZmScheme::qedL); | ||||||
|  |     EmField          a(&grid); | ||||||
|  |     EmField          expA(&grid); | ||||||
|  |  | ||||||
|  |     Complex imag_unit(0, 1); | ||||||
|  |  | ||||||
|  |     Real wlA; | ||||||
|  |     std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); | ||||||
|  |  | ||||||
|  |     pRNG.SeedRandomDevice(); | ||||||
|  |  | ||||||
|  |     LOG(Message) << "Wilson loop calculation beginning" << std::endl; | ||||||
|  |     for(int ic = 0; ic < NCONFIGS; ic++){ | ||||||
|  |         LOG(Message) << "Configuration " << ic <<std::endl; | ||||||
|  |         photon.StochasticField(a, pRNG); | ||||||
|  |  | ||||||
|  |         // Exponentiate photon field | ||||||
|  |         expA = exp(imag_unit*a); | ||||||
|  |  | ||||||
|  |         // Calculate Wilson loops | ||||||
|  |         for(int iw=1; iw<=NWILSON; iw++){ | ||||||
|  |             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3; | ||||||
|  |             logWlAvg[iw-1] -= 2*log(wlA); | ||||||
|  |             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3; | ||||||
|  |             logWlTime[iw-1] -= 2*log(wlA); | ||||||
|  |             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3; | ||||||
|  |             logWlSpace[iw-1] -= 2*log(wlA); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     LOG(Message) << "Wilson loop calculation completed" << std::endl; | ||||||
|  |      | ||||||
|  |     // Calculate Wilson loops | ||||||
|  |     for(int iw=1; iw<=10; iw++){ | ||||||
|  |         LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; | ||||||
|  |         LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; | ||||||
|  |         LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; | ||||||
|  |         LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // epilogue | ||||||
|  |     LOG(Message) << "Grid is finalizing now" << std::endl; | ||||||
|  |     Grid_finalize(); | ||||||
|  |      | ||||||
|  |     return EXIT_SUCCESS; | ||||||
|  | } | ||||||
| @@ -21,3 +21,16 @@ problem. The test case works with icpc and with clang++, but fails consistently | |||||||
| current variants. | current variants. | ||||||
|  |  | ||||||
| Peter | Peter | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ************ | ||||||
|  |  | ||||||
|  | Second GCC bug reported, see Issue 100. | ||||||
|  |  | ||||||
|  | https://wandbox.org/permlink/tzssJza6R9XnqANw | ||||||
|  | https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652 | ||||||
|  |  | ||||||
|  | Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the | ||||||
|  | CI test suite. The limitations of Travis runtime limits & weak cores are being shown. | ||||||
|  |  | ||||||
|  | Travis uses 5.4.1 for g++-5. | ||||||
|   | |||||||
							
								
								
									
										86
									
								
								grid-config.in
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										86
									
								
								grid-config.in
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,86 @@ | |||||||
|  | #! /bin/sh | ||||||
|  |  | ||||||
|  | prefix=@prefix@ | ||||||
|  | exec_prefix=@exec_prefix@ | ||||||
|  | includedir=@includedir@ | ||||||
|  |  | ||||||
|  | usage() | ||||||
|  | { | ||||||
|  |   cat <<EOF | ||||||
|  | Usage: grid-config [OPTION] | ||||||
|  |  | ||||||
|  | Known values for OPTION are: | ||||||
|  |  | ||||||
|  |   --prefix     show Grid installation prefix | ||||||
|  |   --cxxflags   print pre-processor and compiler flags | ||||||
|  |   --ldflags    print library linking flags | ||||||
|  |   --libs       print library linking information | ||||||
|  |   --summary    print full build summary | ||||||
|  |   --help       display this help and exit | ||||||
|  |   --version    output version information | ||||||
|  |   --git        print git revision | ||||||
|  |  | ||||||
|  | EOF | ||||||
|  |    | ||||||
|  |   exit $1 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | if test $# -eq 0; then | ||||||
|  |   usage 1 | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | cflags=false | ||||||
|  | libs=false | ||||||
|  |  | ||||||
|  | while test $# -gt 0; do | ||||||
|  |   case "$1" in | ||||||
|  |     -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; | ||||||
|  |     *) optarg= ;; | ||||||
|  |   esac | ||||||
|  |    | ||||||
|  |   case "$1" in | ||||||
|  |     --prefix) | ||||||
|  |       echo $prefix | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --version) | ||||||
|  |       echo @VERSION@ | ||||||
|  |       exit 0 | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --git) | ||||||
|  |       echo "@GRID_BRANCH@ @GRID_SHA@" | ||||||
|  |       exit 0 | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --help) | ||||||
|  |       usage 0 | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --cxxflags) | ||||||
|  |       echo @GRID_CXXFLAGS@ | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --ldflags) | ||||||
|  |       echo @GRID_LDFLAGS@ | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --libs) | ||||||
|  |       echo @GRID_LIBS@ | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     --summary) | ||||||
|  |       echo "" | ||||||
|  |       echo "@GRID_SUMMARY@" | ||||||
|  |       echo "" | ||||||
|  |     ;; | ||||||
|  |      | ||||||
|  |     *) | ||||||
|  |       usage | ||||||
|  |       exit 1 | ||||||
|  |     ;; | ||||||
|  |   esac | ||||||
|  |   shift | ||||||
|  | done | ||||||
|  |  | ||||||
|  | exit 0 | ||||||
							
								
								
									
										37
									
								
								lib/DisableWarnings.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								lib/DisableWarnings.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/DisableWarnings.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | #ifndef DISABLE_WARNINGS_H | ||||||
|  | #define DISABLE_WARNINGS_H | ||||||
|  |  | ||||||
|  |  //disables and intel compiler specific warning (in json.hpp) | ||||||
|  | #pragma warning disable 488   | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #endif | ||||||
| @@ -41,7 +41,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/GridQCDcore.h> | #include <Grid/GridQCDcore.h> | ||||||
| #include <Grid/qcd/action/Action.h> | #include <Grid/qcd/action/Action.h> | ||||||
|  | #include <Grid/qcd/utils/GaugeFix.h> | ||||||
| #include <Grid/qcd/smearing/Smearing.h> | #include <Grid/qcd/smearing/Smearing.h> | ||||||
|  | #include <Grid/parallelIO/MetaData.h> | ||||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -38,28 +38,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_BASE_H | #ifndef GRID_BASE_H | ||||||
| #define GRID_BASE_H | #define GRID_BASE_H | ||||||
|  |  | ||||||
| /////////////////// | #include <Grid/GridStd.h> | ||||||
| // Std C++ dependencies |  | ||||||
| /////////////////// |  | ||||||
| #include <cassert> |  | ||||||
| #include <complex> |  | ||||||
| #include <vector> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <random> |  | ||||||
| #include <functional> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <signal.h> |  | ||||||
| #include <ctime> |  | ||||||
| #include <sys/time.h> |  | ||||||
| #include <chrono> |  | ||||||
|  |  | ||||||
| /////////////////// |  | ||||||
| // Grid headers |  | ||||||
| /////////////////// |  | ||||||
| #include "Config.h" |  | ||||||
|  |  | ||||||
| #include <Grid/perfmon/Timer.h> | #include <Grid/perfmon/Timer.h> | ||||||
| #include <Grid/perfmon/PerfCount.h> | #include <Grid/perfmon/PerfCount.h> | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								lib/GridStd.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								lib/GridStd.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | |||||||
|  | #ifndef GRID_STD_H | ||||||
|  | #define GRID_STD_H | ||||||
|  |  | ||||||
|  | /////////////////// | ||||||
|  | // Std C++ dependencies | ||||||
|  | /////////////////// | ||||||
|  | #include <cassert> | ||||||
|  | #include <complex> | ||||||
|  | #include <vector> | ||||||
|  | #include <string> | ||||||
|  | #include <iostream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <random> | ||||||
|  | #include <functional> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <signal.h> | ||||||
|  | #include <ctime> | ||||||
|  | #include <sys/time.h> | ||||||
|  | #include <chrono> | ||||||
|  | #include <zlib.h> | ||||||
|  |  | ||||||
|  | /////////////////// | ||||||
|  | // Grid config | ||||||
|  | /////////////////// | ||||||
|  | #include "Config.h" | ||||||
|  |  | ||||||
|  | #endif /* GRID_STD_H */ | ||||||
							
								
								
									
										9
									
								
								lib/Grid_Eigen_Dense.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								lib/Grid_Eigen_Dense.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | |||||||
|  | #pragma once | ||||||
|  | #if defined __GNUC__ | ||||||
|  | #pragma GCC diagnostic push | ||||||
|  | #pragma GCC diagnostic ignored "-Wdeprecated-declarations" | ||||||
|  | #endif | ||||||
|  | #include <Grid/Eigen/Dense> | ||||||
|  | #if defined __GNUC__ | ||||||
|  | #pragma GCC diagnostic pop | ||||||
|  | #endif | ||||||
| @@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3 | |||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
| if BUILD_COMMS_MPI3L | if BUILD_COMMS_MPIT | ||||||
|   extra_sources+=communicator/Communicator_mpi3_leader.cc |   extra_sources+=communicator/Communicator_mpit.cc | ||||||
|   extra_sources+=communicator/Communicator_base.cc |   extra_sources+=communicator/Communicator_base.cc | ||||||
| endif | endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -46,7 +46,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||||
|  |  | ||||||
| // Lanczos support | // Lanczos support | ||||||
| #include <Grid/algorithms/iterative/MatrixUtils.h> | //#include <Grid/algorithms/iterative/MatrixUtils.h> | ||||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | #include <Grid/algorithms/CoarsenedMatrix.h> | ||||||
| #include <Grid/algorithms/FFT.h> | #include <Grid/algorithms/FFT.h> | ||||||
|   | |||||||
| @@ -197,8 +197,9 @@ namespace Grid { | |||||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { |     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||||
|  |  | ||||||
|       GridBase *grid=in._grid; |       GridBase *grid=in._grid; | ||||||
| //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; |  | ||||||
| //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; |       // std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | ||||||
|  |       //std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | ||||||
|  |  | ||||||
|       int vol=grid->gSites(); |       int vol=grid->gSites(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,7 +16,7 @@ | |||||||
| #define INCLUDED_ALG_REMEZ_H | #define INCLUDED_ALG_REMEZ_H | ||||||
|  |  | ||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <Config.h> | #include <Grid/GridStd.h> | ||||||
|  |  | ||||||
| #ifdef HAVE_LIBGMP | #ifdef HAVE_LIBGMP | ||||||
| #include "bigfloat.h" | #include "bigfloat.h" | ||||||
|   | |||||||
							
								
								
									
										600
									
								
								lib/algorithms/iterative/BlockConjugateGradient.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										600
									
								
								lib/algorithms/iterative/BlockConjugateGradient.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,600 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2017 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H | ||||||
|  | #define GRID_BLOCK_CONJUGATE_GRADIENT_H | ||||||
|  |  | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; | ||||||
|  |  | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Block conjugate gradient. Dimension zero should be the block direction | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | template <class Field> | ||||||
|  | class BlockConjugateGradient : public OperatorFunction<Field> { | ||||||
|  |  public: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   typedef typename Field::scalar_type scomplex; | ||||||
|  |  | ||||||
|  |   int blockDim ; | ||||||
|  |   int Nblock; | ||||||
|  |  | ||||||
|  |   BlockCGtype CGtype; | ||||||
|  |   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||||
|  |                            // Defaults true. | ||||||
|  |   RealD Tolerance; | ||||||
|  |   Integer MaxIterations; | ||||||
|  |   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||||
|  |    | ||||||
|  |   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||||
|  |     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) | ||||||
|  |   {}; | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Thin QR factorisation (google it) | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||||
|  | 		 Eigen::MatrixXcd &C, | ||||||
|  | 		 Eigen::MatrixXcd &Cinv, | ||||||
|  | 		 Field & Q, | ||||||
|  | 		 const Field & R) | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   //Dimensions | ||||||
|  |   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock | ||||||
|  |   // | ||||||
|  |   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) | ||||||
|  |   // | ||||||
|  |   //   Q  C = R => Q = R C^{-1} | ||||||
|  |   // | ||||||
|  |   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  | ||||||
|  |   // | ||||||
|  |   // Set C = L^{dag}, and then Q^dag Q = ident  | ||||||
|  |   // | ||||||
|  |   // Checks: | ||||||
|  |   // Cdag C = Rdag R ; passes. | ||||||
|  |   // QdagQ  = 1      ; passes | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Cholesky from Eigen | ||||||
|  |   // There exists a ldlt that is documented as more stable | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||||
|  |  | ||||||
|  |   C    = L.adjoint(); | ||||||
|  |   Cinv = C.inverse(); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Q = R C^{-1} | ||||||
|  |   // | ||||||
|  |   // Q_j  = R_i Cinv(i,j)  | ||||||
|  |   // | ||||||
|  |   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already | ||||||
|  |   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // FIXME:: make a sliceMulMatrix to avoid zero vector | ||||||
|  |   sliceMulMatrix(Q,Cinv,R,Orthog); | ||||||
|  | } | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Call one of several implementations | ||||||
|  | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
|  | { | ||||||
|  |   if ( CGtype == BlockCGrQ ) { | ||||||
|  |     BlockCGrQsolve(Linop,Src,Psi); | ||||||
|  |   } else if (CGtype == BlockCG ) { | ||||||
|  |     BlockCGsolve(Linop,Src,Psi); | ||||||
|  |   } else if (CGtype == CGmultiRHS ) { | ||||||
|  |     CGmultiRHSsolve(Linop,Src,Psi); | ||||||
|  |   } else { | ||||||
|  |     assert(0); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////////////////////// | ||||||
|  | // BlockCGrQ implementation: | ||||||
|  | //-------------------------- | ||||||
|  | // X is guess/Solution | ||||||
|  | // B is RHS | ||||||
|  | // Solve A X_i = B_i    ;        i refers to Nblock index | ||||||
|  | //////////////////////////////////////////////////////////////////////////// | ||||||
|  | void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||||
|  |   Nblock = B._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
|  |  | ||||||
|  |   X.checkerboard = B.checkerboard; | ||||||
|  |   conformable(X, B); | ||||||
|  |  | ||||||
|  |   Field tmp(B); | ||||||
|  |   Field Q(B); | ||||||
|  |   Field D(B); | ||||||
|  |   Field Z(B); | ||||||
|  |   Field AD(B); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   // Initial residual computation & set up | ||||||
|  |   std::vector<RealD> residuals(Nblock); | ||||||
|  |   std::vector<RealD> ssq(Nblock); | ||||||
|  |  | ||||||
|  |   sliceNorm(ssq,B,Orthog); | ||||||
|  |   RealD sssum=0; | ||||||
|  |   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,B,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,X,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   /************************************************************************ | ||||||
|  |    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) | ||||||
|  |    ************************************************************************ | ||||||
|  |    * Dimensions: | ||||||
|  |    * | ||||||
|  |    *   X,B==(Nferm x Nblock) | ||||||
|  |    *   A==(Nferm x Nferm) | ||||||
|  |    *   | ||||||
|  |    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site | ||||||
|  |    *  | ||||||
|  |    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||||
|  |    * for k:  | ||||||
|  |    *   Z  = AD | ||||||
|  |    *   M  = [D^dag Z]^{-1} | ||||||
|  |    *   X  = X + D MC | ||||||
|  |    *   QS = Q - ZM | ||||||
|  |    *   D  = Q + D S^dag | ||||||
|  |    *   C  = S C | ||||||
|  |    */ | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   // Initial block: initial search dir is guess | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; | ||||||
|  |  | ||||||
|  |   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||||
|  |  | ||||||
|  |   Linop.HermOp(X, AD); | ||||||
|  |   tmp = B - AD;   | ||||||
|  |   //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; | ||||||
|  |   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); | ||||||
|  |   //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; | ||||||
|  |   //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl; | ||||||
|  |   //std::cout << GridLogMessage << " m_C " << m_C<<std::endl; | ||||||
|  |   //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl; | ||||||
|  |   D=Q; | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   // Timers | ||||||
|  |   /////////////////////////////////////// | ||||||
|  |   GridStopWatch sliceInnerTimer; | ||||||
|  |   GridStopWatch sliceMaddTimer; | ||||||
|  |   GridStopWatch QRTimer; | ||||||
|  |   GridStopWatch MatrixTimer; | ||||||
|  |   GridStopWatch SolverTimer; | ||||||
|  |   SolverTimer.Start(); | ||||||
|  |  | ||||||
|  |   int k; | ||||||
|  |   for (k = 1; k <= MaxIterations; k++) { | ||||||
|  |  | ||||||
|  |     //3. Z  = AD | ||||||
|  |     MatrixTimer.Start(); | ||||||
|  |     Linop.HermOp(D, Z);       | ||||||
|  |     MatrixTimer.Stop(); | ||||||
|  |     //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl; | ||||||
|  |  | ||||||
|  |     //4. M  = [D^dag Z]^{-1} | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     m_M       = m_DZ.inverse(); | ||||||
|  |     //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl; | ||||||
|  |      | ||||||
|  |     //5. X  = X + D MC | ||||||
|  |     m_tmp     = m_M * m_C; | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     //6. QS = Q - ZM | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |     QRTimer.Start(); | ||||||
|  |     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); | ||||||
|  |     QRTimer.Stop(); | ||||||
|  |      | ||||||
|  |     //7. D  = Q + D S^dag | ||||||
|  |     m_tmp = m_S.adjoint(); | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     //8. C  = S C | ||||||
|  |     m_C = m_S*m_C; | ||||||
|  |      | ||||||
|  |     /********************* | ||||||
|  |      * convergence monitor | ||||||
|  |      ********************* | ||||||
|  |      */ | ||||||
|  |     m_rr = m_C.adjoint() * m_C; | ||||||
|  |  | ||||||
|  |     RealD max_resid=0; | ||||||
|  |     RealD rrsum=0; | ||||||
|  |     RealD rr; | ||||||
|  |  | ||||||
|  |     for(int b=0;b<Nblock;b++) { | ||||||
|  |       rrsum+=real(m_rr(b,b)); | ||||||
|  |       rr = real(m_rr(b,b))/ssq[b]; | ||||||
|  |       if ( rr > max_resid ) max_resid = rr; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||||
|  | 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; | ||||||
|  |  | ||||||
|  |     if ( max_resid < Tolerance*Tolerance ) {  | ||||||
|  |  | ||||||
|  |       SolverTimer.Stop(); | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; | ||||||
|  |  | ||||||
|  |       for(int b=0;b<Nblock;b++){ | ||||||
|  | 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||||
|  | 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||||
|  |       } | ||||||
|  |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|  |       Linop.HermOp(X, AD); | ||||||
|  |       AD = AD-B; | ||||||
|  |       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; | ||||||
|  | 	     | ||||||
|  |       IterationsToComplete = k; | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; | ||||||
|  |  | ||||||
|  |   if (ErrorOnNoConverge) assert(0); | ||||||
|  |   IterationsToComplete = k; | ||||||
|  | } | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||||
|  |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
|  |  | ||||||
|  |   Psi.checkerboard = Src.checkerboard; | ||||||
|  |   conformable(Psi, Src); | ||||||
|  |  | ||||||
|  |   Field P(Src); | ||||||
|  |   Field AP(Src); | ||||||
|  |   Field R(Src); | ||||||
|  |    | ||||||
|  |   Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |   Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||||
|  |  | ||||||
|  |   // Initial residual computation & set up | ||||||
|  |   std::vector<RealD> residuals(Nblock); | ||||||
|  |   std::vector<RealD> ssq(Nblock); | ||||||
|  |  | ||||||
|  |   sliceNorm(ssq,Src,Orthog); | ||||||
|  |   RealD sssum=0; | ||||||
|  |   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Src,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Psi,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   // Initial search dir is guess | ||||||
|  |   Linop.HermOp(Psi, AP); | ||||||
|  |    | ||||||
|  |  | ||||||
|  |   /************************************************************************ | ||||||
|  |    * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) | ||||||
|  |    ************************************************************************ | ||||||
|  |    * O'Leary : R = B - A X | ||||||
|  |    * O'Leary : P = M R ; preconditioner M = 1 | ||||||
|  |    * O'Leary : alpha = PAP^{-1} RMR | ||||||
|  |    * O'Leary : beta  = RMR^{-1}_old RMR_new | ||||||
|  |    * O'Leary : X=X+Palpha | ||||||
|  |    * O'Leary : R_new=R_old-AP alpha | ||||||
|  |    * O'Leary : P=MR_new+P beta | ||||||
|  |    */ | ||||||
|  |  | ||||||
|  |   R = Src - AP;   | ||||||
|  |   P = R; | ||||||
|  |   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||||
|  |  | ||||||
|  |   GridStopWatch sliceInnerTimer; | ||||||
|  |   GridStopWatch sliceMaddTimer; | ||||||
|  |   GridStopWatch MatrixTimer; | ||||||
|  |   GridStopWatch SolverTimer; | ||||||
|  |   SolverTimer.Start(); | ||||||
|  |  | ||||||
|  |   int k; | ||||||
|  |   for (k = 1; k <= MaxIterations; k++) { | ||||||
|  |  | ||||||
|  |     RealD rrsum=0; | ||||||
|  |     for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b)); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||||
|  | 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; | ||||||
|  |  | ||||||
|  |     MatrixTimer.Start(); | ||||||
|  |     Linop.HermOp(P, AP); | ||||||
|  |     MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Alpha | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductMatrix(m_pAp,P,AP,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     m_pAp_inv = m_pAp.inverse(); | ||||||
|  |     m_alpha   = m_pAp_inv * m_rr ; | ||||||
|  |  | ||||||
|  |     // Psi, R update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi | ||||||
|  |     sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Beta | ||||||
|  |     m_rr_inv = m_rr.inverse(); | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     m_beta = m_rr_inv *m_rr; | ||||||
|  |  | ||||||
|  |     // Search update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddMatrix(AP,m_beta,P,R,Orthog); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |     P= AP; | ||||||
|  |  | ||||||
|  |     /********************* | ||||||
|  |      * convergence monitor | ||||||
|  |      ********************* | ||||||
|  |      */ | ||||||
|  |     RealD max_resid=0; | ||||||
|  |     RealD rr; | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       rr = real(m_rr(b,b))/ssq[b]; | ||||||
|  |       if ( rr > max_resid ) max_resid = rr; | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     if ( max_resid < Tolerance*Tolerance ) {  | ||||||
|  |  | ||||||
|  |       SolverTimer.Stop(); | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||||
|  |       for(int b=0;b<Nblock;b++){ | ||||||
|  | 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||||
|  | 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||||
|  |       } | ||||||
|  |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|  |       Linop.HermOp(Psi, AP); | ||||||
|  |       AP = AP-Src; | ||||||
|  |       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||||
|  | 	     | ||||||
|  |       IterationsToComplete = k; | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl; | ||||||
|  |  | ||||||
|  |   if (ErrorOnNoConverge) assert(0); | ||||||
|  |   IterationsToComplete = k; | ||||||
|  | } | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||||
|  | // Use this for spread out across nodes | ||||||
|  | ////////////////////////////////////////////////////////////////////////// | ||||||
|  | void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||||
|  | { | ||||||
|  |   int Orthog = blockDim; // First dimension is block dim | ||||||
|  |   Nblock = Src._grid->_fdimensions[Orthog]; | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||||
|  |  | ||||||
|  |   Psi.checkerboard = Src.checkerboard; | ||||||
|  |   conformable(Psi, Src); | ||||||
|  |  | ||||||
|  |   Field P(Src); | ||||||
|  |   Field AP(Src); | ||||||
|  |   Field R(Src); | ||||||
|  |    | ||||||
|  |   std::vector<ComplexD> v_pAp(Nblock); | ||||||
|  |   std::vector<RealD> v_rr (Nblock); | ||||||
|  |   std::vector<RealD> v_rr_inv(Nblock); | ||||||
|  |   std::vector<RealD> v_alpha(Nblock); | ||||||
|  |   std::vector<RealD> v_beta(Nblock); | ||||||
|  |  | ||||||
|  |   // Initial residual computation & set up | ||||||
|  |   std::vector<RealD> residuals(Nblock); | ||||||
|  |   std::vector<RealD> ssq(Nblock); | ||||||
|  |  | ||||||
|  |   sliceNorm(ssq,Src,Orthog); | ||||||
|  |   RealD sssum=0; | ||||||
|  |   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Src,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   sliceNorm(residuals,Psi,Orthog); | ||||||
|  |   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||||
|  |  | ||||||
|  |   // Initial search dir is guess | ||||||
|  |   Linop.HermOp(Psi, AP); | ||||||
|  |  | ||||||
|  |   R = Src - AP;   | ||||||
|  |   P = R; | ||||||
|  |   sliceNorm(v_rr,R,Orthog); | ||||||
|  |  | ||||||
|  |   GridStopWatch sliceInnerTimer; | ||||||
|  |   GridStopWatch sliceMaddTimer; | ||||||
|  |   GridStopWatch sliceNormTimer; | ||||||
|  |   GridStopWatch MatrixTimer; | ||||||
|  |   GridStopWatch SolverTimer; | ||||||
|  |  | ||||||
|  |   SolverTimer.Start(); | ||||||
|  |   int k; | ||||||
|  |   for (k = 1; k <= MaxIterations; k++) { | ||||||
|  |  | ||||||
|  |     RealD rrsum=0; | ||||||
|  |     for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||||
|  | 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; | ||||||
|  |  | ||||||
|  |     MatrixTimer.Start(); | ||||||
|  |     Linop.HermOp(P, AP); | ||||||
|  |     MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Alpha | ||||||
|  |     sliceInnerTimer.Start(); | ||||||
|  |     sliceInnerProductVector(v_pAp,P,AP,Orthog); | ||||||
|  |     sliceInnerTimer.Stop(); | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       v_alpha[b] = v_rr[b]/real(v_pAp[b]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Psi, R update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddVector(Psi,v_alpha, P,Psi,Orthog);     // add alpha *  P to psi | ||||||
|  |     sliceMaddVector(R  ,v_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     // Beta | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       v_rr_inv[b] = 1.0/v_rr[b]; | ||||||
|  |     } | ||||||
|  |     sliceNormTimer.Start(); | ||||||
|  |     sliceNorm(v_rr,R,Orthog); | ||||||
|  |     sliceNormTimer.Stop(); | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       v_beta[b] = v_rr_inv[b] *v_rr[b]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Search update | ||||||
|  |     sliceMaddTimer.Start(); | ||||||
|  |     sliceMaddVector(P,v_beta,P,R,Orthog); | ||||||
|  |     sliceMaddTimer.Stop(); | ||||||
|  |  | ||||||
|  |     /********************* | ||||||
|  |      * convergence monitor | ||||||
|  |      ********************* | ||||||
|  |      */ | ||||||
|  |     RealD max_resid=0; | ||||||
|  |     for(int b=0;b<Nblock;b++){ | ||||||
|  |       RealD rr = v_rr[b]/ssq[b]; | ||||||
|  |       if ( rr > max_resid ) max_resid = rr; | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     if ( max_resid < Tolerance*Tolerance ) {  | ||||||
|  |  | ||||||
|  |       SolverTimer.Stop(); | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; | ||||||
|  |       for(int b=0;b<Nblock;b++){ | ||||||
|  | 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||||
|  |       } | ||||||
|  |       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||||
|  |  | ||||||
|  |       Linop.HermOp(Psi, AP); | ||||||
|  |       AP = AP-Src; | ||||||
|  |       std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tNorm       " << sliceNormTimer.Elapsed() <<std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |       IterationsToComplete = k; | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl; | ||||||
|  |  | ||||||
|  |   if (ErrorOnNoConverge) assert(0); | ||||||
|  |   IterationsToComplete = k; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } | ||||||
|  | #endif | ||||||
| @@ -78,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     cp = a; |     cp = a; | ||||||
|     ssq = norm2(src); |     ssq = norm2(src); | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; | ||||||
|               << "ConjugateGradient: guess " << guess << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   src " << ssq << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:    mp " << d << std::endl; | ||||||
|               << "ConjugateGradient:   src " << ssq << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   mmp " << b << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||||
|               << "ConjugateGradient:    mp " << d << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:     p " << a << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:   mmp " << b << std::endl; |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:  cp,r " << cp << std::endl; |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:     p " << a << std::endl; |  | ||||||
|  |  | ||||||
|     RealD rsq = Tolerance * Tolerance * ssq; |     RealD rsq = Tolerance * Tolerance * ssq; | ||||||
|  |  | ||||||
| @@ -99,8 +93,7 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq |               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; | ||||||
|               << std::endl; |  | ||||||
|  |  | ||||||
|     GridStopWatch LinalgTimer; |     GridStopWatch LinalgTimer; | ||||||
|     GridStopWatch MatrixTimer; |     GridStopWatch MatrixTimer; | ||||||
| @@ -130,8 +123,11 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|       p = p * b + r; |       p = p * b + r; | ||||||
|  |  | ||||||
|       LinalgTimer.Stop(); |       LinalgTimer.Stop(); | ||||||
|  |  | ||||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k |       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||||
|                 << " residual " << cp << " target " << rsq << std::endl; |                 << " residual " << cp << " target " << rsq << std::endl; | ||||||
|  |       std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl; | ||||||
|  |       std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl; | ||||||
|  |  | ||||||
|       // Stopping condition |       // Stopping condition | ||||||
|       if (cp <= rsq) { |       if (cp <= rsq) { | ||||||
| @@ -139,32 +135,33 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|         Linop.HermOpAndNorm(psi, mmp, d, qq); |         Linop.HermOpAndNorm(psi, mmp, d, qq); | ||||||
|         p = mmp - src; |         p = mmp - src; | ||||||
|  |  | ||||||
|         RealD mmpnorm = sqrt(norm2(mmp)); |  | ||||||
|         RealD psinorm = sqrt(norm2(psi)); |  | ||||||
|         RealD srcnorm = sqrt(norm2(src)); |         RealD srcnorm = sqrt(norm2(src)); | ||||||
|         RealD resnorm = sqrt(norm2(p)); |         RealD resnorm = sqrt(norm2(p)); | ||||||
|         RealD true_residual = resnorm / srcnorm; |         RealD true_residual = resnorm / srcnorm; | ||||||
|  |  | ||||||
|         std::cout << GridLogMessage |         std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; | ||||||
|                   << "ConjugateGradient: Converged on iteration " << k << std::endl; |         std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; | ||||||
|         std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq) | 	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; | ||||||
|                   << " true residual " << true_residual << " target " | 	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; | ||||||
|                   << Tolerance << std::endl; |  | ||||||
|         std::cout << GridLogMessage << "Time elapsed: Iterations " |         std::cout << GridLogMessage << "Time breakdown "<<std::endl; | ||||||
|                   << SolverTimer.Elapsed() << " Matrix  " | 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||||
|                   << MatrixTimer.Elapsed() << " Linalg " | 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||||
|                   << LinalgTimer.Elapsed(); | 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||||
|         std::cout << std::endl; |  | ||||||
|  |  | ||||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); |         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||||
|  |  | ||||||
| 	IterationsToComplete = k;	 | 	IterationsToComplete = k;	 | ||||||
|  |  | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" |     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||||
|               << std::endl; |               << std::endl; | ||||||
|  |  | ||||||
|     if (ErrorOnNoConverge) assert(0); |     if (ErrorOnNoConverge) assert(0); | ||||||
|     IterationsToComplete = k; |     IterationsToComplete = k; | ||||||
|  |  | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,137 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/DenseMatrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_DENSE_MATRIX_H |  | ||||||
| #define GRID_DENSE_MATRIX_H |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     // Matrix untils |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| template<class T> using DenseVector = std::vector<T>; |  | ||||||
| template<class T> using DenseMatrix = DenseVector<DenseVector<T> >; |  | ||||||
|  |  | ||||||
| template<class T> void Size(DenseVector<T> & vec, int &N)  |  | ||||||
| {  |  | ||||||
|   N= vec.size(); |  | ||||||
| } |  | ||||||
| template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)  |  | ||||||
| {  |  | ||||||
|   N= mat.size(); |  | ||||||
|   M= mat[0].size(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)  |  | ||||||
| {  |  | ||||||
|   int M; Size(mat,N,M); |  | ||||||
|   assert(N==M); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class T> void Resize(DenseVector<T > & mat, int N) {  |  | ||||||
|   mat.resize(N); |  | ||||||
| } |  | ||||||
| template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {  |  | ||||||
|   mat.resize(N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     mat[i].resize(M); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| template<class T> void Fill(DenseMatrix<T> & mat, T&val) {  |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     mat[i][j] = val; |  | ||||||
|   }} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Transpose of a matrix **/ |  | ||||||
| template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){ |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   DenseMatrix<T> C; Resize(C,M,N); |  | ||||||
|   for(int i=0;i<M;i++){ |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     C[i][j] = mat[j][i]; |  | ||||||
|   }}  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
| /** Set DenseMatrix to unit matrix **/ |  | ||||||
| template<class T> void Unity(DenseMatrix<T> &A){ |  | ||||||
|   int N;  SizeSquare(A,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     for(int j=0;j<N;j++){ |  | ||||||
|       if ( i==j ) A[i][j] = 1; |  | ||||||
|       else        A[i][j] = 0; |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Add C * I to matrix **/ |  | ||||||
| template<class T> |  | ||||||
| void PlusUnit(DenseMatrix<T> & A,T c){ |  | ||||||
|   int dim;  SizeSquare(A,dim); |  | ||||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return the Hermitian conjugate of matrix **/ |  | ||||||
| template<class T> |  | ||||||
| DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){ |  | ||||||
|  |  | ||||||
|   int dim; SizeSquare(mat,dim); |  | ||||||
|  |  | ||||||
|   DenseMatrix<T> C; Resize(C,dim,dim); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     for(int j=0;j<dim;j++){ |  | ||||||
|       C[i][j] = conj(mat[j][i]); |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
| /**Get a square submatrix**/ |  | ||||||
| template <class T> |  | ||||||
| DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end) |  | ||||||
| { |  | ||||||
|   DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st); |  | ||||||
|  |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|   for(int j = col_st; j<col_end; j++){ |  | ||||||
|     H[i-row_st][j-col_st]=A[i][j]; |  | ||||||
|   }} |  | ||||||
|   return H; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #include "Householder.h" |  | ||||||
| #include "Francis.h" |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| @@ -1,81 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/EigenSort.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_EIGENSORT_H |  | ||||||
| #define GRID_EIGENSORT_H |  | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|     // Eigen sorter to begin with |  | ||||||
|     ///////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class SortEigen { |  | ||||||
|  private: |  | ||||||
|    |  | ||||||
| //hacking for testing for now |  | ||||||
|  private: |  | ||||||
|   static bool less_lmd(RealD left,RealD right){ |  | ||||||
|     return left > right; |  | ||||||
|   }   |  | ||||||
|   static bool less_pair(std::pair<RealD,Field const*>& left, |  | ||||||
|                         std::pair<RealD,Field const*>& right){ |  | ||||||
|     return left.first > (right.first); |  | ||||||
|   }   |  | ||||||
|    |  | ||||||
|    |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|   void push(DenseVector<RealD>& lmd, |  | ||||||
|             DenseVector<Field>& evec,int N) { |  | ||||||
|     DenseVector<Field> cpy(lmd.size(),evec[0]._grid); |  | ||||||
|     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; |  | ||||||
|      |  | ||||||
|     DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());     |  | ||||||
|     for(int i=0;i<lmd.size();++i) |  | ||||||
|       emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); |  | ||||||
|  |  | ||||||
|     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); |  | ||||||
|  |  | ||||||
|     typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); |  | ||||||
|     for(int i=0;i<N;++i){ |  | ||||||
|       lmd[i]=it->first; |  | ||||||
|       evec[i]=*(it->second); |  | ||||||
|       ++it; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   void push(DenseVector<RealD>& lmd,int N) { |  | ||||||
|     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); |  | ||||||
|   } |  | ||||||
|   bool saturated(RealD lmd, RealD thrs) { |  | ||||||
|     return fabs(lmd) > fabs(thrs); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -1,525 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/Francis.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef FRANCIS_H |  | ||||||
| #define FRANCIS_H |  | ||||||
|  |  | ||||||
| #include <cstdlib> |  | ||||||
| #include <string> |  | ||||||
| #include <cmath> |  | ||||||
| #include <iostream> |  | ||||||
| #include <sstream> |  | ||||||
| #include <stdexcept> |  | ||||||
| #include <fstream> |  | ||||||
| #include <complex> |  | ||||||
| #include <algorithm> |  | ||||||
|  |  | ||||||
| //#include <timer.h> |  | ||||||
| //#include <lapacke.h> |  | ||||||
| //#include <Eigen/Dense> |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
| template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); |  | ||||||
| template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|   Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. |  | ||||||
| H = |  | ||||||
|       x  x  x  x  x  x  x  x  x |  | ||||||
|       x  x  x  x  x  x  x  x  x |  | ||||||
|       0  x  x  x  x  x  x  x  x |  | ||||||
|       0  0  x  x  x  x  x  x  x |  | ||||||
|       0  0  0  x  x  x  x  x  x |  | ||||||
|       0  0  0  0  x  x  x  x  x |  | ||||||
|       0  0  0  0  0  x  x  x  x |  | ||||||
|       0  0  0  0  0  0  x  x  x |  | ||||||
|       0  0  0  0  0  0  0  x  x |  | ||||||
| Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. |  | ||||||
| **/ |  | ||||||
| template <class T> |  | ||||||
| int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) |  | ||||||
| { |  | ||||||
|   DenseMatrix<T> H = Hin;  |  | ||||||
|  |  | ||||||
|   int N ; SizeSquare(H,N); |  | ||||||
|   int M = N; |  | ||||||
|  |  | ||||||
|   Fill(evals,0); |  | ||||||
|   Fill(evecs,0); |  | ||||||
|  |  | ||||||
|   T s,t,x=0,y=0,z=0; |  | ||||||
|   T u,d; |  | ||||||
|   T apd,amd,bc; |  | ||||||
|   DenseVector<T> p(N,0); |  | ||||||
|   T nrm = Norm(H);    ///DenseMatrix Norm |  | ||||||
|   int n, m; |  | ||||||
|   int e = 0; |  | ||||||
|   int it = 0; |  | ||||||
|   int tot_it = 0; |  | ||||||
|   int l = 0; |  | ||||||
|   int r = 0; |  | ||||||
|   DenseMatrix<T> P; Resize(P,N,N); Unity(P); |  | ||||||
|   DenseVector<int> trows(N,0); |  | ||||||
|  |  | ||||||
|   /// Check if the matrix is really hessenberg, if not abort |  | ||||||
|   RealD sth = 0; |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     for(int i=j+2;i<N;i++){ |  | ||||||
|       sth = abs(H[i][j]); |  | ||||||
|       if(sth > small){ |  | ||||||
| 	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; |  | ||||||
| 	exit(1); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   do{ |  | ||||||
|     std::cout << "Francis QR Step N = " << N << std::endl; |  | ||||||
|     /** Check for convergence |  | ||||||
|       x  x  x  x  x |  | ||||||
|       0  x  x  x  x |  | ||||||
|       0  0  x  x  x |  | ||||||
|       0  0  x  x  x |  | ||||||
|       0  0  0  0  x |  | ||||||
|       for this matrix l = 4 |  | ||||||
|      **/ |  | ||||||
|     do{ |  | ||||||
|       l = Chop_subdiag(H,nrm,e,small); |  | ||||||
|       r = 0;    ///May have converged on more than one eval |  | ||||||
|       ///Single eval |  | ||||||
|       if(l == N-1){ |  | ||||||
|         evals[e] = H[l][l]; |  | ||||||
|         N--; e++; r++; it = 0; |  | ||||||
|       } |  | ||||||
|       ///RealD eval |  | ||||||
|       if(l == N-2){ |  | ||||||
|         trows[l+1] = 1;    ///Needed for UTSolve |  | ||||||
|         apd = H[l][l] + H[l+1][l+1]; |  | ||||||
|         amd = H[l][l] - H[l+1][l+1]; |  | ||||||
|         bc =  (T)4.0*H[l+1][l]*H[l][l+1]; |  | ||||||
|         evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) ); |  | ||||||
|         evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); |  | ||||||
|         N-=2; e+=2; r++; it = 0; |  | ||||||
|       } |  | ||||||
|     } while(r>0); |  | ||||||
|  |  | ||||||
|     if(N ==0) break; |  | ||||||
|  |  | ||||||
|     DenseVector<T > ck; Resize(ck,3); |  | ||||||
|     DenseVector<T> v;   Resize(v,3); |  | ||||||
|  |  | ||||||
|     for(int m = N-3; m >= l; m--){ |  | ||||||
|       ///Starting vector essentially random shift. |  | ||||||
|       if(it%10 == 0 && N >= 3 && it > 0){ |  | ||||||
|         s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); |  | ||||||
|         t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); |  | ||||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; |  | ||||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); |  | ||||||
|         z = H[m+1][m]*H[m+2][m+1]; |  | ||||||
|       } |  | ||||||
|       ///Starting vector implicit Q theorem |  | ||||||
|       else{ |  | ||||||
|         s = (H[N-2][N-2] + H[N-1][N-1]); |  | ||||||
|         t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); |  | ||||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; |  | ||||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); |  | ||||||
|         z = H[m+1][m]*H[m+2][m+1]; |  | ||||||
|       } |  | ||||||
|       ck[0] = x; ck[1] = y; ck[2] = z; |  | ||||||
|  |  | ||||||
|       if(m == l) break; |  | ||||||
|  |  | ||||||
|       /** Some stupid thing from numerical recipies, seems to work**/ |  | ||||||
|       // PAB.. for heaven's sake quote page, purpose, evidence it works. |  | ||||||
|       //       what sort of comment is that!?!?!? |  | ||||||
|       u=abs(H[m][m-1])*(abs(y)+abs(z)); |  | ||||||
|       d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); |  | ||||||
|       if ((T)abs(u+d) == (T)abs(d) ){ |  | ||||||
| 	l = m; break; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       //if (u < small){l = m; break;} |  | ||||||
|     } |  | ||||||
|     if(it > 100000){ |  | ||||||
|      std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; |  | ||||||
|      std::cout << "got " << e << " evals " << l << " " << N << std::endl; |  | ||||||
|       exit(1); |  | ||||||
|     } |  | ||||||
|     normalize(ck);    ///Normalization cancels in PHP anyway |  | ||||||
|     T beta; |  | ||||||
|     Householder_vector<T >(ck, 0, 2, v, beta); |  | ||||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,0); |  | ||||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,1); |  | ||||||
|     ///Accumulate eigenvector |  | ||||||
|     Householder_mult<T >(P,v,beta,0,l,l+2,1); |  | ||||||
|     int sw = 0;      ///Are we on the last row? |  | ||||||
|     for(int k=l;k<N-2;k++){ |  | ||||||
|       x = H[k+1][k]; |  | ||||||
|       y = H[k+2][k]; |  | ||||||
|       z = (T)0.0; |  | ||||||
|       if(k+3 <= N-1){ |  | ||||||
| 	z = H[k+3][k]; |  | ||||||
|       } else{ |  | ||||||
| 	sw = 1;  |  | ||||||
| 	v[2] = (T)0.0; |  | ||||||
|       } |  | ||||||
|       ck[0] = x; ck[1] = y; ck[2] = z; |  | ||||||
|       normalize(ck); |  | ||||||
|       Householder_vector<T >(ck, 0, 2-sw, v, beta); |  | ||||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0); |  | ||||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1); |  | ||||||
|       ///Accumulate eigenvector |  | ||||||
|       Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1); |  | ||||||
|     } |  | ||||||
|     it++; |  | ||||||
|     tot_it++; |  | ||||||
|   }while(N > 1); |  | ||||||
|   N = evals.size(); |  | ||||||
|   ///Annoying - UT solves in reverse order; |  | ||||||
|   DenseVector<T> tmp; Resize(tmp,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     tmp[i] = evals[N-i-1]; |  | ||||||
|   }  |  | ||||||
|   evals = tmp; |  | ||||||
|   UTeigenvectors(H, trows, evals, evecs); |  | ||||||
|   for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);} |  | ||||||
|   return tot_it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) |  | ||||||
| { |  | ||||||
|   /** |  | ||||||
|   Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. |  | ||||||
|   H = |  | ||||||
|   x  x  0  0  0  0 |  | ||||||
|   x  x  x  0  0  0 |  | ||||||
|   0  x  x  x  0  0 |  | ||||||
|   0  0  x  x  x  0 |  | ||||||
|   0  0  0  x  x  x |  | ||||||
|   0  0  0  0  x  x |  | ||||||
|   Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/ |  | ||||||
|   return my_Wilkinson(Hin, evals, evecs, small, small); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol) |  | ||||||
| { |  | ||||||
|   int N; SizeSquare(Hin,N); |  | ||||||
|   int M = N; |  | ||||||
|  |  | ||||||
|   ///I don't want to modify the input but matricies must be passed by reference |  | ||||||
|   //Scale a matrix by its "norm" |  | ||||||
|   //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm); |  | ||||||
|   DenseMatrix<T> H;  H = Hin; |  | ||||||
|    |  | ||||||
|   RealD Hnorm = abs(Norm(Hin)); |  | ||||||
|   H = H * (1.0 / Hnorm); |  | ||||||
|  |  | ||||||
|   // TODO use openmp and memset |  | ||||||
|   Fill(evals,0); |  | ||||||
|   Fill(evecs,0); |  | ||||||
|  |  | ||||||
|   T s, t, x = 0, y = 0, z = 0; |  | ||||||
|   T u, d; |  | ||||||
|   T apd, amd, bc; |  | ||||||
|   DenseVector<T> p; Resize(p,N); Fill(p,0); |  | ||||||
|  |  | ||||||
|   T nrm = Norm(H);    ///DenseMatrix Norm |  | ||||||
|   int n, m; |  | ||||||
|   int e = 0; |  | ||||||
|   int it = 0; |  | ||||||
|   int tot_it = 0; |  | ||||||
|   int l = 0; |  | ||||||
|   int r = 0; |  | ||||||
|   DenseMatrix<T> P; Resize(P,N,N); |  | ||||||
|   Unity(P); |  | ||||||
|   DenseVector<int> trows(N, 0); |  | ||||||
|   /// Check if the matrix is really symm tridiag |  | ||||||
|   RealD sth = 0; |  | ||||||
|   for(int j = 0; j < N; ++j) |  | ||||||
|   { |  | ||||||
|     for(int i = j + 2; i < N; ++i) |  | ||||||
|     { |  | ||||||
|       if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) |  | ||||||
|       { |  | ||||||
| 	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; |  | ||||||
| 	std::cout << "Warning tridiagonalize and call again" << std::endl; |  | ||||||
|         // exit(1); // see what is going on |  | ||||||
|         //return; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   do{ |  | ||||||
|     do{ |  | ||||||
|       //Jasper |  | ||||||
|       //Check if the subdiagonal term is small enough (<small) |  | ||||||
|       //if true then it is converged. |  | ||||||
|       //check start from H.dim - e - 1 |  | ||||||
|       //How to deal with more than 2 are converged? |  | ||||||
|       //What if Chop_symm_subdiag return something int the middle? |  | ||||||
|       //-------------- |  | ||||||
|       l = Chop_symm_subdiag(H,nrm, e, small); |  | ||||||
|       r = 0;    ///May have converged on more than one eval |  | ||||||
|       //Jasper |  | ||||||
|       //In this case |  | ||||||
|       // x  x  0  0  0  0 |  | ||||||
|       // x  x  x  0  0  0 |  | ||||||
|       // 0  x  x  x  0  0 |  | ||||||
|       // 0  0  x  x  x  0 |  | ||||||
|       // 0  0  0  x  x  0 |  | ||||||
|       // 0  0  0  0  0  x  <- l |  | ||||||
|       //-------------- |  | ||||||
|       ///Single eval |  | ||||||
|       if(l == N - 1) |  | ||||||
|       { |  | ||||||
|         evals[e] = H[l][l]; |  | ||||||
|         N--; |  | ||||||
|         e++; |  | ||||||
|         r++; |  | ||||||
|         it = 0; |  | ||||||
|       } |  | ||||||
|       //Jasper |  | ||||||
|       // x  x  0  0  0  0 |  | ||||||
|       // x  x  x  0  0  0 |  | ||||||
|       // 0  x  x  x  0  0 |  | ||||||
|       // 0  0  x  x  0  0 |  | ||||||
|       // 0  0  0  0  x  x  <- l |  | ||||||
|       // 0  0  0  0  x  x |  | ||||||
|       //-------------- |  | ||||||
|       ///RealD eval |  | ||||||
|       if(l == N - 2) |  | ||||||
|       { |  | ||||||
|         trows[l + 1] = 1;    ///Needed for UTSolve |  | ||||||
|         apd = H[l][l] + H[l + 1][ l + 1]; |  | ||||||
|         amd = H[l][l] - H[l + 1][l + 1]; |  | ||||||
|         bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1]; |  | ||||||
|         evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc)); |  | ||||||
|         evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc)); |  | ||||||
|         N -= 2; |  | ||||||
|         e += 2; |  | ||||||
|         r++; |  | ||||||
|         it = 0; |  | ||||||
|       } |  | ||||||
|     }while(r > 0); |  | ||||||
|     //Jasper |  | ||||||
|     //Already converged |  | ||||||
|     //-------------- |  | ||||||
|     if(N == 0) break; |  | ||||||
|  |  | ||||||
|     DenseVector<T> ck,v; Resize(ck,2); Resize(v,2); |  | ||||||
|  |  | ||||||
|     for(int m = N - 3; m >= l; m--) |  | ||||||
|     { |  | ||||||
|       ///Starting vector essentially random shift. |  | ||||||
|       if(it%10 == 0 && N >= 3 && it > 0) |  | ||||||
|       { |  | ||||||
|         t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); |  | ||||||
|         x = H[m][m] - t; |  | ||||||
|         z = H[m + 1][m]; |  | ||||||
|       } else { |  | ||||||
|       ///Starting vector implicit Q theorem |  | ||||||
|         d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; |  | ||||||
|         t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]  |  | ||||||
| 	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); |  | ||||||
|         x = H[m][m] - t; |  | ||||||
|         z = H[m + 1][m]; |  | ||||||
|       } |  | ||||||
|       //Jasper |  | ||||||
|       //why it is here???? |  | ||||||
|       //----------------------- |  | ||||||
|       if(m == l) |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|       u = abs(H[m][m - 1]) * (abs(y) + abs(z)); |  | ||||||
|       d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); |  | ||||||
|       if ((T)abs(u + d) == (T)abs(d)) |  | ||||||
|       { |  | ||||||
|         l = m; |  | ||||||
|         break; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     //Jasper |  | ||||||
|     if(it > 1000000) |  | ||||||
|     { |  | ||||||
|       std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; |  | ||||||
|       std::cout << "got " << e << " evals " << l << " " << N << std::endl; |  | ||||||
|       exit(1); |  | ||||||
|     } |  | ||||||
|     // |  | ||||||
|     T s, c; |  | ||||||
|     Givens_calc<T>(x, z, c, s); |  | ||||||
|     Givens_mult<T>(H, l, l + 1, c, -s, 0); |  | ||||||
|     Givens_mult<T>(H, l, l + 1, c,  s, 1); |  | ||||||
|     Givens_mult<T>(P, l, l + 1, c,  s, 1); |  | ||||||
|     // |  | ||||||
|     for(int k = l; k < N - 2; ++k) |  | ||||||
|     { |  | ||||||
|       x = H.A[k + 1][k]; |  | ||||||
|       z = H.A[k + 2][k]; |  | ||||||
|       Givens_calc<T>(x, z, c, s); |  | ||||||
|       Givens_mult<T>(H, k + 1, k + 2, c, -s, 0); |  | ||||||
|       Givens_mult<T>(H, k + 1, k + 2, c,  s, 1); |  | ||||||
|       Givens_mult<T>(P, k + 1, k + 2, c,  s, 1); |  | ||||||
|     } |  | ||||||
|     it++; |  | ||||||
|     tot_it++; |  | ||||||
|   }while(N > 1); |  | ||||||
|  |  | ||||||
|   N = evals.size(); |  | ||||||
|   ///Annoying - UT solves in reverse order; |  | ||||||
|   DenseVector<T> tmp(N); |  | ||||||
|   for(int i = 0; i < N; ++i) |  | ||||||
|     tmp[i] = evals[N-i-1]; |  | ||||||
|   evals = tmp; |  | ||||||
|   // |  | ||||||
|   UTeigenvectors(H, trows, evals, evecs); |  | ||||||
|   //UTSymmEigenvectors(H, trows, evals, evecs); |  | ||||||
|   for(int i = 0; i < evals.size(); ++i) |  | ||||||
|   { |  | ||||||
|     evecs[i] = P * evecs[i]; |  | ||||||
|     normalize(evecs[i]); |  | ||||||
|     evals[i] = evals[i] * Hnorm; |  | ||||||
|   } |  | ||||||
|   // // FIXME this is to test |  | ||||||
|   // Hin.write("evecs3", evecs); |  | ||||||
|   // Hin.write("evals3", evals); |  | ||||||
|   // // check rsd |  | ||||||
|   // for(int i = 0; i < M; i++) { |  | ||||||
|   //   vector<T> Aevec = Hin * evecs[i]; |  | ||||||
|   //   RealD norm2(0.); |  | ||||||
|   //   for(int j = 0; j < M; j++) { |  | ||||||
|   //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); |  | ||||||
|   //   } |  | ||||||
|   // } |  | ||||||
|   return tot_it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|   turn a matrix A = |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   into |  | ||||||
|   x  x  x  x  x |  | ||||||
|   x  x  x  x  x |  | ||||||
|   0  x  x  x  x |  | ||||||
|   0  0  x  x  x |  | ||||||
|   0  0  0  x  x |  | ||||||
|   with householder rotations |  | ||||||
|   Slow. |  | ||||||
|   */ |  | ||||||
|   int N ; SizeSquare(A,N); |  | ||||||
|   DenseVector<T > p; Resize(p,N); Fill(p,0); |  | ||||||
|  |  | ||||||
|   for(int k=start;k<N-2;k++){ |  | ||||||
|     //cerr << "hess" << k << std::endl; |  | ||||||
|     DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); |  | ||||||
|     for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column |  | ||||||
|     normalize(ck);    ///Normalization cancels in PHP anyway |  | ||||||
|     T beta; |  | ||||||
|     Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector |  | ||||||
|     Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA |  | ||||||
|     Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H |  | ||||||
|     ///Accumulate eigenvector |  | ||||||
|     Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H |  | ||||||
|   } |  | ||||||
|   /*for(int l=0;l<N-2;l++){ |  | ||||||
|     for(int k=l+2;k<N;k++){ |  | ||||||
|     A(0,k,l); |  | ||||||
|     } |  | ||||||
|     }*/ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ |  | ||||||
| ///Tridiagonalize a matrix |  | ||||||
|   int N; SizeSquare(A,N); |  | ||||||
|   Hess(A,Q,start); |  | ||||||
|   /*for(int l=0;l<N-2;l++){ |  | ||||||
|     for(int k=l+2;k<N;k++){ |  | ||||||
|     A(0,l,k); |  | ||||||
|     } |  | ||||||
|     }*/ |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| void ForceTridiagonal(DenseMatrix<T> &A){ |  | ||||||
| ///Tridiagonalize a matrix |  | ||||||
|   int N ; SizeSquare(A,N); |  | ||||||
|   for(int l=0;l<N-2;l++){ |  | ||||||
|     for(int k=l+2;k<N;k++){ |  | ||||||
|       A[l][k]=0; |  | ||||||
|       A[k][l]=0; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
|   ///Solve a symmetric eigensystem, not necessarily in tridiagonal form |  | ||||||
|   int N; SizeSquare(Ain,N); |  | ||||||
|   DenseMatrix<T > A; A = Ain; |  | ||||||
|   DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q); |  | ||||||
|   Tri(A,Q,0); |  | ||||||
|   int it = my_Wilkinson<T>(A, evals, evecs, small); |  | ||||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} |  | ||||||
|   return it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
|   return my_Wilkinson(Ain, evals, evecs, small); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
|   return my_SymmEigensystem(Ain, evals, evecs, small); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> |  | ||||||
| int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ |  | ||||||
| ///Solve a general eigensystem, not necessarily in tridiagonal form |  | ||||||
|   int N = Ain.dim; |  | ||||||
|   DenseMatrix<T > A(N); A = Ain; |  | ||||||
|   DenseMatrix<T > Q(N);Q.Unity(); |  | ||||||
|   Hess(A,Q,0); |  | ||||||
|   int it = QReigensystem<T>(A, evals, evecs, small); |  | ||||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} |  | ||||||
|   return it; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -1,242 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/Householder.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef HOUSEHOLDER_H |  | ||||||
| #define HOUSEHOLDER_H |  | ||||||
|  |  | ||||||
| #define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; |  | ||||||
| #define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; |  | ||||||
| #define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; |  | ||||||
|  |  | ||||||
| #include <cstdlib> |  | ||||||
| #include <string> |  | ||||||
| #include <cmath> |  | ||||||
| #include <iostream> |  | ||||||
| #include <sstream> |  | ||||||
| #include <stdexcept> |  | ||||||
| #include <fstream> |  | ||||||
| #include <complex> |  | ||||||
| #include <algorithm> |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
| /** Comparison function for finding the max element in a vector **/ |  | ||||||
| template <class T> bool cf(T i, T j) {  |  | ||||||
|   return abs(i) < abs(j);  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /**  |  | ||||||
| 	Calculate a real Givens angle  |  | ||||||
|  **/ |  | ||||||
| template <class T> inline void Givens_calc(T y, T z, T &c, T &s){ |  | ||||||
|  |  | ||||||
|   RealD mz = (RealD)abs(z); |  | ||||||
|    |  | ||||||
|   if(mz==0.0){ |  | ||||||
|     c = 1; s = 0; |  | ||||||
|   } |  | ||||||
|   if(mz >= (RealD)abs(y)){ |  | ||||||
|     T t = -y/z; |  | ||||||
|     s = (T)1.0 / sqrt ((T)1.0 + t * t); |  | ||||||
|     c = s * t; |  | ||||||
|   } else { |  | ||||||
|     T t = -z/y; |  | ||||||
|     c = (T)1.0 / sqrt ((T)1.0 + t * t); |  | ||||||
|     s = c * t; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir) |  | ||||||
| { |  | ||||||
|   int q ; SizeSquare(A,q); |  | ||||||
|  |  | ||||||
|   if(dir == 0){ |  | ||||||
|     for(int j=0;j<q;j++){ |  | ||||||
|       T nu = A[i][j]; |  | ||||||
|       T w  = A[k][j]; |  | ||||||
|       A[i][j] = (c*nu + s*w); |  | ||||||
|       A[k][j] = (-s*nu + c*w); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if(dir == 1){ |  | ||||||
|     for(int j=0;j<q;j++){ |  | ||||||
|       T nu = A[j][i]; |  | ||||||
|       T w  = A[j][k]; |  | ||||||
|       A[j][i] = (c*nu - s*w); |  | ||||||
|       A[j][k] = (s*nu + c*w); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	from input = x; |  | ||||||
| 	Compute the complex Householder vector, v, such that |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
|  |  | ||||||
| 	P | x |    | x | k = 0 |  | ||||||
| 	| x |    | 0 |  |  | ||||||
| 	| x | =  | 0 | |  | ||||||
| 	| x |    | 0 | j = 3 |  | ||||||
| 	| x |	   | x | |  | ||||||
|  |  | ||||||
| 	These are the "Unreduced" Householder vectors. |  | ||||||
|  |  | ||||||
|  **/ |  | ||||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta) |  | ||||||
| { |  | ||||||
|   int N ; Size(input,N); |  | ||||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> ); |  | ||||||
|  |  | ||||||
|   if(abs(m) > 0.0){ |  | ||||||
|     T alpha = 0; |  | ||||||
|  |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = input[i]/m; |  | ||||||
|       alpha = alpha + v[i]*conj(v[i]); |  | ||||||
|     } |  | ||||||
|     alpha = sqrt(alpha); |  | ||||||
|     beta = (T)1.0/(alpha*(alpha + abs(v[k]) )); |  | ||||||
|  |  | ||||||
|     if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha; |  | ||||||
|     else                 v[k] = -alpha; |  | ||||||
|   } else{ |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = 0.0; |  | ||||||
|     }  |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	from input = x; |  | ||||||
| 	Compute the complex Householder vector, v, such that |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
|  |  | ||||||
| 	Px = alpha*e_dir |  | ||||||
|  |  | ||||||
| 	These are the "Unreduced" Householder vectors. |  | ||||||
|  |  | ||||||
|  **/ |  | ||||||
|  |  | ||||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta) |  | ||||||
| { |  | ||||||
|   int N = input.size(); |  | ||||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); |  | ||||||
|    |  | ||||||
|   if(abs(m) > 0.0){ |  | ||||||
|     T alpha = 0; |  | ||||||
|  |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = input[i]/m; |  | ||||||
|       alpha = alpha + v[i]*conj(v[i]); |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     alpha = sqrt(alpha); |  | ||||||
|     beta = 1.0/(alpha*(alpha + abs(v[dir]) )); |  | ||||||
| 	 |  | ||||||
|     if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; |  | ||||||
|     else                  v[dir] = -alpha; |  | ||||||
|   }else{ |  | ||||||
|     for(int i=k; i<j+1; i++){ |  | ||||||
|       v[i] = 0.0; |  | ||||||
|     }  |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	Compute the product PA if trans = 0 |  | ||||||
| 	AP if trans = 1 |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
| 	start at element l of matrix A |  | ||||||
| 	v is of length j - k + 1 of v are nonzero |  | ||||||
|  **/ |  | ||||||
|  |  | ||||||
| template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans) |  | ||||||
| { |  | ||||||
|   int N ; SizeSquare(A,N); |  | ||||||
|  |  | ||||||
|   if(abs(beta) > 0.0){ |  | ||||||
|     for(int p=l; p<N; p++){ |  | ||||||
|       T s = 0; |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p]; |  | ||||||
| 	s *= beta; |  | ||||||
| 	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);} |  | ||||||
|       } else { |  | ||||||
| 	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];} |  | ||||||
| 	s *= beta; |  | ||||||
| 	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);} |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
| 	Compute the product PA if trans = 0 |  | ||||||
| 	AP if trans = 1 |  | ||||||
| 	P = (I - b v transpose(v) ) |  | ||||||
| 	b = 2/v.v |  | ||||||
| 	start at element l of matrix A |  | ||||||
| 	v is of length j - k + 1 of v are nonzero |  | ||||||
| 	A is tridiagonal |  | ||||||
|  **/ |  | ||||||
| template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans) |  | ||||||
| { |  | ||||||
|   if(abs(beta) > 0.0){ |  | ||||||
|  |  | ||||||
|     int N ; SizeSquare(A,N); |  | ||||||
|  |  | ||||||
|     DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);  |  | ||||||
|  |  | ||||||
|     T s; |  | ||||||
|     for(int p=l; p<M; p++){ |  | ||||||
|       s = 0; |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p]; |  | ||||||
|       }else{ |  | ||||||
| 	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i]; |  | ||||||
|       } |  | ||||||
|       s = beta*s; |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k]; |  | ||||||
|       }else{ |  | ||||||
| 	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     for(int p=l; p<M; p++){ |  | ||||||
|       if(trans==0){ |  | ||||||
| 	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p]; |  | ||||||
|       }else{ |  | ||||||
| 	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i]; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,453 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/Matrix.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef MATRIX_H |  | ||||||
| #define MATRIX_H |  | ||||||
|  |  | ||||||
| #include <cstdlib> |  | ||||||
| #include <string> |  | ||||||
| #include <cmath> |  | ||||||
| #include <vector> |  | ||||||
| #include <iostream> |  | ||||||
| #include <iomanip> |  | ||||||
| #include <complex> |  | ||||||
| #include <typeinfo> |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /** Sign function **/ |  | ||||||
| template <class T> T sign(T p){return ( p/abs(p) );} |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| ///////////////////// Hijack STL containers for our wicked means ///////////////////////////////////////// |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| template<class T> using Vector = Vector<T>; |  | ||||||
| template<class T> using Matrix = Vector<Vector<T> >; |  | ||||||
|  |  | ||||||
| template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); } |  | ||||||
|  |  | ||||||
| template<class T> void Resize(Matrix<T > & mat, int N, int M) {  |  | ||||||
|   mat.resize(N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     mat[i].resize(M); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| template<class T> void Size(Vector<T> & vec, int &N)  |  | ||||||
| {  |  | ||||||
|   N= vec.size(); |  | ||||||
| } |  | ||||||
| template<class T> void Size(Matrix<T> & mat, int &N,int &M)  |  | ||||||
| {  |  | ||||||
|   N= mat.size(); |  | ||||||
|   M= mat[0].size(); |  | ||||||
| } |  | ||||||
| template<class T> void SizeSquare(Matrix<T> & mat, int &N)  |  | ||||||
| {  |  | ||||||
|   int M; Size(mat,N,M); |  | ||||||
|   assert(N==M); |  | ||||||
| } |  | ||||||
| template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)  |  | ||||||
| {  |  | ||||||
|   int N2,M2; |  | ||||||
|   Size(mat1,N1,M1); |  | ||||||
|   Size(mat2,N2,M2); |  | ||||||
|   assert(N1==N2); |  | ||||||
|   assert(M1==M2); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //***************************************** |  | ||||||
| //*	(Complex) Vector operations	* |  | ||||||
| //***************************************** |  | ||||||
|  |  | ||||||
| /**Conj of a Vector **/ |  | ||||||
| template <class T> Vector<T> conj(Vector<T> p){ |  | ||||||
| 	Vector<T> q(p.size()); |  | ||||||
| 	for(int i=0;i<p.size();i++){q[i] = conj(p[i]);} |  | ||||||
| 	return q; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Norm of a Vector**/ |  | ||||||
| template <class T> T norm(Vector<T> p){ |  | ||||||
| 	T sum = 0; |  | ||||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} |  | ||||||
| 	return abs(sqrt(sum)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Norm squared of a Vector **/ |  | ||||||
| template <class T> T norm2(Vector<T> p){ |  | ||||||
| 	T sum = 0; |  | ||||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} |  | ||||||
| 	return abs((sum)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Sum elements of a Vector **/ |  | ||||||
| template <class T> T trace(Vector<T> p){ |  | ||||||
| 	T sum = 0; |  | ||||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i];} |  | ||||||
| 	return sum; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Fill a Vector with constant c **/ |  | ||||||
| template <class T> void Fill(Vector<T> &p, T c){ |  | ||||||
| 	for(int i=0;i<p.size();i++){p[i] = c;} |  | ||||||
| } |  | ||||||
| /** Normalize a Vector **/ |  | ||||||
| template <class T> void normalize(Vector<T> &p){ |  | ||||||
| 	T m = norm(p); |  | ||||||
| 	if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;} |  | ||||||
| } |  | ||||||
| /** Vector by scalar **/ |  | ||||||
| template <class T, class U> Vector<T> times(Vector<T> p, U s){ |  | ||||||
| 	for(int i=0;i<p.size();i++){p[i] *= s;} |  | ||||||
| 	return p; |  | ||||||
| } |  | ||||||
| template <class T, class U> Vector<T> times(U s, Vector<T> p){ |  | ||||||
| 	for(int i=0;i<p.size();i++){p[i] *= s;} |  | ||||||
| 	return p; |  | ||||||
| } |  | ||||||
| /** inner product of a and b = conj(a) . b **/ |  | ||||||
| template <class T> T inner(Vector<T> a, Vector<T> b){ |  | ||||||
| 	T m = 0.; |  | ||||||
| 	for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];} |  | ||||||
| 	return m; |  | ||||||
| } |  | ||||||
| /** sum of a and b = a + b **/ |  | ||||||
| template <class T> Vector<T> add(Vector<T> a, Vector<T> b){ |  | ||||||
| 	Vector<T> m(a.size()); |  | ||||||
| 	for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];} |  | ||||||
| 	return m; |  | ||||||
| } |  | ||||||
| /** sum of a and b = a - b **/ |  | ||||||
| template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){ |  | ||||||
| 	Vector<T> m(a.size()); |  | ||||||
| 	for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];} |  | ||||||
| 	return m; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /**  |  | ||||||
|  ********************************* |  | ||||||
|  *	Matrices	         * |  | ||||||
|  ********************************* |  | ||||||
|  **/ |  | ||||||
|  |  | ||||||
| template<class T> void Fill(Matrix<T> & mat, T&val) {  |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     mat[i][j] = val; |  | ||||||
|   }} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Transpose of a matrix **/ |  | ||||||
| Matrix<T> Transpose(Matrix<T> & mat){ |  | ||||||
|   int N,M; |  | ||||||
|   Size(mat,N,M); |  | ||||||
|   Matrix C; Resize(C,M,N); |  | ||||||
|   for(int i=0;i<M;i++){ |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     C[i][j] = mat[j][i]; |  | ||||||
|   }}  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
| /** Set Matrix to unit matrix **/ |  | ||||||
| template<class T> void Unity(Matrix<T> &mat){ |  | ||||||
|   int N;  SizeSquare(mat,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     for(int j=0;j<N;j++){ |  | ||||||
|       if ( i==j ) A[i][j] = 1; |  | ||||||
|       else        A[i][j] = 0; |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
| } |  | ||||||
| /** Add C * I to matrix **/ |  | ||||||
| template<class T> |  | ||||||
| void PlusUnit(Matrix<T> & A,T c){ |  | ||||||
|   int dim;  SizeSquare(A,dim); |  | ||||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return the Hermitian conjugate of matrix **/ |  | ||||||
| Matrix<T> HermitianConj(Matrix<T> &mat){ |  | ||||||
|  |  | ||||||
|   int dim; SizeSquare(mat,dim); |  | ||||||
|  |  | ||||||
|   Matrix<T> C; Resize(C,dim,dim); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     for(int j=0;j<dim;j++){ |  | ||||||
|       C[i][j] = conj(mat[j][i]); |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
|   return C; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return diagonal entries as a Vector **/ |  | ||||||
| Vector<T> diag(Matrix<T> &A) |  | ||||||
| { |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   Vector<T> d; Resize(d,dim); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     d[i] = A[i][i]; |  | ||||||
|   } |  | ||||||
|   return d; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Left multiply by a Vector **/ |  | ||||||
| Vector<T> operator *(Vector<T> &B,Matrix<T> &A) |  | ||||||
| { |  | ||||||
|   int K,M,N;  |  | ||||||
|   Size(B,K); |  | ||||||
|   Size(A,M,N); |  | ||||||
|   assert(K==M); |  | ||||||
|    |  | ||||||
|   Vector<T> C; Resize(C,N); |  | ||||||
|  |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     T sum = 0.0; |  | ||||||
|     for(int i=0;i<M;i++){ |  | ||||||
|       sum += B[i] * A[i][j]; |  | ||||||
|     } |  | ||||||
|     C[j] =  sum; |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** return 1/diagonal entries as a Vector **/ |  | ||||||
| Vector<T> inv_diag(Matrix<T> & A){ |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   Vector<T> d; Resize(d,dim); |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     d[i] = 1.0/A[i][i]; |  | ||||||
|   } |  | ||||||
|   return d; |  | ||||||
| } |  | ||||||
| /** Matrix Addition **/ |  | ||||||
| inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B) |  | ||||||
| { |  | ||||||
|   int N,M  ; SizeSame(A,B,N,M); |  | ||||||
|   Matrix C; Resize(C,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     for(int j=0;j<M;j++){ |  | ||||||
|       C[i][j] = A[i][j] +  B[i][j]; |  | ||||||
|     }  |  | ||||||
|   }  |  | ||||||
|   return C; |  | ||||||
| }  |  | ||||||
| /** Matrix Subtraction **/ |  | ||||||
| inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){ |  | ||||||
|   int N,M  ; SizeSame(A,B,N,M); |  | ||||||
|   Matrix C; Resize(C,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     C[i][j] = A[i][j] -  B[i][j]; |  | ||||||
|   }} |  | ||||||
|   return C; |  | ||||||
| }  |  | ||||||
|  |  | ||||||
| /** Matrix scalar multiplication **/ |  | ||||||
| inline Matrix<T> operator* (Matrix<T> & A,T c){ |  | ||||||
|   int N,M; Size(A,N,M); |  | ||||||
|   Matrix C; Resize(C,N,M); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|   for(int j=0;j<M;j++){ |  | ||||||
|     C[i][j] = A[i][j]*c; |  | ||||||
|   }}  |  | ||||||
|   return C; |  | ||||||
| }  |  | ||||||
| /** Matrix Matrix multiplication **/ |  | ||||||
| inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){ |  | ||||||
|   int K,L,N,M; |  | ||||||
|   Size(A,K,L); |  | ||||||
|   Size(B,N,M); assert(L==N); |  | ||||||
|   Matrix C; Resize(C,K,M); |  | ||||||
|  |  | ||||||
|   for(int i=0;i<K;i++){ |  | ||||||
|     for(int j=0;j<M;j++){ |  | ||||||
|       T sum = 0.0; |  | ||||||
|       for(int k=0;k<N;k++) sum += A[i][k]*B[k][j]; |  | ||||||
|       C[i][j] =sum; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| }  |  | ||||||
| /** Matrix Vector multiplication **/ |  | ||||||
| inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){ |  | ||||||
|   int M,N,K; |  | ||||||
|   Size(A,N,M); |  | ||||||
|   Size(B,K); assert(K==M); |  | ||||||
|   Vector<T> C; Resize(C,N); |  | ||||||
|   for(int i=0;i<N;i++){ |  | ||||||
|     T sum = 0.0; |  | ||||||
|     for(int j=0;j<M;j++) sum += A[i][j]*B[j]; |  | ||||||
|     C[i] =  sum; |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| }  |  | ||||||
|  |  | ||||||
| /** Some version of Matrix norm **/ |  | ||||||
| /* |  | ||||||
| inline T Norm(){ // this is not a usual L2 norm |  | ||||||
|     T norm = 0; |  | ||||||
|     for(int i=0;i<dim;i++){ |  | ||||||
|       for(int j=0;j<dim;j++){ |  | ||||||
| 	norm += abs(A[i][j]); |  | ||||||
|     }} |  | ||||||
|     return norm; |  | ||||||
|   } |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| /** Some version of Matrix norm **/ |  | ||||||
| template<class T> T LargestDiag(Matrix<T> &A) |  | ||||||
| { |  | ||||||
|   int dim ; SizeSquare(A,dim);  |  | ||||||
|  |  | ||||||
|   T ld = abs(A[0][0]); |  | ||||||
|   for(int i=1;i<dim;i++){ |  | ||||||
|     T cf = abs(A[i][i]); |  | ||||||
|     if(abs(cf) > abs(ld) ){ld = cf;} |  | ||||||
|   } |  | ||||||
|   return ld; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ |  | ||||||
| template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small) |  | ||||||
| { |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   for(int l = dim - 1 - offset; l >= 1; l--) {             		 |  | ||||||
|     if((U)abs(A[l][l - 1]) < (U)small) { |  | ||||||
|       A[l][l-1]=(U)0.0; |  | ||||||
|       return l; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   return 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ |  | ||||||
| template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)  |  | ||||||
| { |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   for(int l = dim - 1 - offset; l >= 1; l--) { |  | ||||||
|     if((U)abs(A[l][l - 1]) < (U)small) { |  | ||||||
|       A[l][l - 1] = (U)0.0; |  | ||||||
|       A[l - 1][l] = (U)0.0; |  | ||||||
|       return l; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   return 0; |  | ||||||
| } |  | ||||||
| /**Assign a submatrix to a larger one**/ |  | ||||||
| template<class T> |  | ||||||
| void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) |  | ||||||
| { |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|     for(int j = col_st; j<col_end; j++){ |  | ||||||
|       A[i][j] = S[i - row_st][j - col_st]; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /**Get a square submatrix**/ |  | ||||||
| template <class T> |  | ||||||
| Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end) |  | ||||||
| { |  | ||||||
|   Matrix<T> H; Resize(row_end - row_st,col_end-col_st); |  | ||||||
|  |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|   for(int j = col_st; j<col_end; j++){ |  | ||||||
|     H[i-row_st][j-col_st]=A[i][j]; |  | ||||||
|   }} |  | ||||||
|   return H; |  | ||||||
| } |  | ||||||
|    |  | ||||||
|  /**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/ |  | ||||||
| template<class T> |  | ||||||
| void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) |  | ||||||
| { |  | ||||||
|   for(int i = row_st; i<row_end; i++){ |  | ||||||
|   for(int j = col_st; j<col_end; j++){ |  | ||||||
|     A[i][j] = S[i - row_st][j - col_st]; |  | ||||||
|   }} |  | ||||||
| } |  | ||||||
|    |  | ||||||
| /** compute b_i A_ij b_j **/ // surprised no Conj |  | ||||||
| template<class T> T proj(Matrix<T> A, Vector<T> B){ |  | ||||||
|   int dim; SizeSquare(A,dim); |  | ||||||
|   int dimB; Size(B,dimB); |  | ||||||
|   assert(dimB==dim); |  | ||||||
|   T C = 0; |  | ||||||
|   for(int i=0;i<dim;i++){ |  | ||||||
|     T sum = 0.0; |  | ||||||
|     for(int j=0;j<dim;j++){ |  | ||||||
|       sum += A[i][j]*B[j]; |  | ||||||
|     } |  | ||||||
|     C +=  B[i]*sum; // No conj? |  | ||||||
|   } |  | ||||||
|   return C;  |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  ************************************************************* |  | ||||||
|  * |  | ||||||
|  * Matrix Vector products |  | ||||||
|  * |  | ||||||
|  ************************************************************* |  | ||||||
|  */ |  | ||||||
| // Instead make a linop and call my CG; |  | ||||||
|  |  | ||||||
| /// q -> q Q |  | ||||||
| template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q) |  | ||||||
| { |  | ||||||
|   int M; SizeSquare(Q,M); |  | ||||||
|   int N; Size(q,N);  |  | ||||||
|   assert(M==N); |  | ||||||
|  |  | ||||||
|   times(q,Q,N); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// q -> q Q |  | ||||||
| template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N) |  | ||||||
| { |  | ||||||
|   GridBase *grid = q[0]._grid; |  | ||||||
|   int M; SizeSquare(Q,M); |  | ||||||
|   int K; Size(q,K);  |  | ||||||
|   assert(N<M); |  | ||||||
|   assert(N<K); |  | ||||||
|   Vector<Fermion> S(N,grid ); |  | ||||||
|   for(int j=0;j<N;j++){ |  | ||||||
|     S[j] = zero; |  | ||||||
|     for(int k=0;k<N;k++){ |  | ||||||
|       S[j] = S[j] +  q[k]* Q[k][j];  |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   for(int j=0;j<q.size();j++){ |  | ||||||
|     q[j] = S[j]; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -1,75 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/MatrixUtils.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef GRID_MATRIX_UTILS_H |  | ||||||
| #define GRID_MATRIX_UTILS_H |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|   namespace MatrixUtils {  |  | ||||||
|  |  | ||||||
|     template<class T> inline void Size(Matrix<T>& A,int &N,int &M){ |  | ||||||
|       N=A.size(); assert(N>0); |  | ||||||
|       M=A[0].size(); |  | ||||||
|       for(int i=0;i<N;i++){ |  | ||||||
| 	assert(A[i].size()==M); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     template<class T> inline void SizeSquare(Matrix<T>& A,int &N) |  | ||||||
|     { |  | ||||||
|       int M; |  | ||||||
|       Size(A,N,M); |  | ||||||
|       assert(N==M); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     template<class T> inline void Fill(Matrix<T>& A,T & val) |  | ||||||
|     {  |  | ||||||
|       int N,M; |  | ||||||
|       Size(A,N,M); |  | ||||||
|       for(int i=0;i<N;i++){ |  | ||||||
|       for(int j=0;j<M;j++){ |  | ||||||
| 	A[i][j]=val; |  | ||||||
|       }} |  | ||||||
|     } |  | ||||||
|     template<class T> inline void Diagonal(Matrix<T>& A,T & val) |  | ||||||
|     {  |  | ||||||
|       int N; |  | ||||||
|       SizeSquare(A,N); |  | ||||||
|       for(int i=0;i<N;i++){ |  | ||||||
| 	A[i][i]=val; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     template<class T> inline void Identity(Matrix<T>& A) |  | ||||||
|     { |  | ||||||
|       Fill(A,0.0); |  | ||||||
|       Diagonal(A,1.0); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   }; |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -141,85 +141,5 @@ namespace Grid { | |||||||
|     }      |     }      | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Take a matrix and form a Red Black solver calling a Herm solver |  | ||||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   template<class Field> class SchurRedBlackDiagTwoSolve { |  | ||||||
|   private: |  | ||||||
|     OperatorFunction<Field> & _HermitianRBSolver; |  | ||||||
|     int CBfactorise; |  | ||||||
|   public: |  | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|     // Wrap the usual normal equations Schur trick |  | ||||||
|     ///////////////////////////////////////////////////// |  | ||||||
|   SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver)  : |  | ||||||
|      _HermitianRBSolver(HermitianRBSolver)  |  | ||||||
|     {  |  | ||||||
|       CBfactorise=0; |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     template<class Matrix> |  | ||||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ |  | ||||||
|  |  | ||||||
|       // FIXME CGdiagonalMee not implemented virtual function |  | ||||||
|       // FIXME use CBfactorise to control schur decomp |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       GridBase *fgrid= _Matrix.Grid(); |  | ||||||
|  |  | ||||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); |  | ||||||
|   |  | ||||||
|       Field src_e(grid); |  | ||||||
|       Field src_o(grid); |  | ||||||
|       Field sol_e(grid); |  | ||||||
|       Field sol_o(grid); |  | ||||||
|       Field   tmp(grid); |  | ||||||
|       Field  Mtmp(grid); |  | ||||||
|       Field resid(fgrid); |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,in); |  | ||||||
|       pickCheckerboard(Odd ,src_o,in); |  | ||||||
|       pickCheckerboard(Even,sol_e,out); |  | ||||||
|       pickCheckerboard(Odd ,sol_o,out); |  | ||||||
|      |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); |  | ||||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      |  | ||||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      |  | ||||||
|  |  | ||||||
|       // get the right MpcDag |  | ||||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        |  | ||||||
|  |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       // Call the red-black solver |  | ||||||
|       ////////////////////////////////////////////////////////////// |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; |  | ||||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); |  | ||||||
|       _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); |  | ||||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); |  | ||||||
|  |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); |  | ||||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); |  | ||||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); |  | ||||||
|       |  | ||||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); |  | ||||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); |  | ||||||
|  |  | ||||||
|       // Verify the unprec residual |  | ||||||
|       _Matrix.M(out,resid);  |  | ||||||
|       resid = resid-in; |  | ||||||
|       RealD ns = norm2(in); |  | ||||||
|       RealD nr = norm2(resid); |  | ||||||
|  |  | ||||||
|       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; |  | ||||||
|     }      |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -1,15 +0,0 @@ | |||||||
| - ConjugateGradientMultiShift |  | ||||||
| - MCR |  | ||||||
|  |  | ||||||
| - Potentially Useful Boost libraries |  | ||||||
|  |  | ||||||
| - MultiArray |  | ||||||
| - Aligned allocator; memory pool |  | ||||||
| - Remez -- Mike or Boost? |  | ||||||
| - Multiprecision |  | ||||||
| - quaternians |  | ||||||
| - Tokenize |  | ||||||
| - Serialization |  | ||||||
| - Regex |  | ||||||
| - Proto (ET) |  | ||||||
| - uBlas |  | ||||||
| @@ -1,122 +0,0 @@ | |||||||
| #include <math.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <vector> |  | ||||||
|  |  | ||||||
| struct Bisection { |  | ||||||
|  |  | ||||||
| static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig) |  | ||||||
| { |  | ||||||
|   int i,j; |  | ||||||
|   std::vector<RealD> evec1(row_num+3); |  | ||||||
|   std::vector<RealD> evec2(row_num+3); |  | ||||||
|   RealD eps2; |  | ||||||
|   ALPHA[1]=0.; |  | ||||||
|   BETHA[1]=0.; |  | ||||||
|   for(i=0;i<row_num-1;i++) { |  | ||||||
|     ALPHA[i+1] = A[i*(row_num+1)].real(); |  | ||||||
|     BETHA[i+2] = A[i*(row_num+1)+1].real(); |  | ||||||
|   } |  | ||||||
|   ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real(); |  | ||||||
|   bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2); |  | ||||||
|   bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2); |  | ||||||
|  |  | ||||||
|   // Do we really need to sort here? |  | ||||||
|   int begin=1; |  | ||||||
|   int end = row_num; |  | ||||||
|   int swapped=1; |  | ||||||
|   while(swapped) { |  | ||||||
|     swapped=0; |  | ||||||
|     for(i=begin;i<end;i++){ |  | ||||||
|       if(mag(evec2[i])>mag(evec2[i+1]))	{ |  | ||||||
| 	swap(evec2+i,evec2+i+1); |  | ||||||
| 	swapped=1; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     end--; |  | ||||||
|     for(i=end-1;i>=begin;i--){ |  | ||||||
|       if(mag(evec2[i])>mag(evec2[i+1]))	{ |  | ||||||
| 	swap(evec2+i,evec2+i+1); |  | ||||||
| 	swapped=1; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     begin++; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   for(i=0;i<row_num;i++){ |  | ||||||
|     for(j=0;j<row_num;j++) { |  | ||||||
|       if(i==j) H[i*row_num+j]=evec2[i+1]; |  | ||||||
|       else H[i*row_num+j]=0.; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void bisec(std::vector<RealD> &c,    |  | ||||||
| 		  std::vector<RealD> &b, |  | ||||||
| 		  int n, |  | ||||||
| 		  int m1, |  | ||||||
| 		  int m2, |  | ||||||
| 		  RealD eps1, |  | ||||||
| 		  RealD relfeh, |  | ||||||
| 		  std::vector<RealD> &x, |  | ||||||
| 		  RealD &eps2) |  | ||||||
| { |  | ||||||
|   std::vector<RealD> wu(n+2); |  | ||||||
|  |  | ||||||
|   RealD h,q,x1,xu,x0,xmin,xmax;  |  | ||||||
|   int i,a,k; |  | ||||||
|  |  | ||||||
|   b[1]=0.0; |  | ||||||
|   xmin=c[n]-fabs(b[n]); |  | ||||||
|   xmax=c[n]+fabs(b[n]); |  | ||||||
|   for(i=1;i<n;i++){ |  | ||||||
|     h=fabs(b[i])+fabs(b[i+1]); |  | ||||||
|     if(c[i]+h>xmax) xmax= c[i]+h; |  | ||||||
|     if(c[i]-h<xmin) xmin= c[i]-h; |  | ||||||
|   } |  | ||||||
|   xmax *=2.; |  | ||||||
|  |  | ||||||
|   eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin); |  | ||||||
|   if(eps1<=0.0) eps1=eps2; |  | ||||||
|   eps2=0.5*eps1+7.0*(eps2); |  | ||||||
|   x0=xmax; |  | ||||||
|   for(i=m1;i<=m2;i++){ |  | ||||||
|     x[i]=xmax; |  | ||||||
|     wu[i]=xmin; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   for(k=m2;k>=m1;k--){ |  | ||||||
|     xu=xmin; |  | ||||||
|     i=k; |  | ||||||
|     do{ |  | ||||||
|       if(xu<wu[i]){ |  | ||||||
| 	xu=wu[i]; |  | ||||||
| 	i=m1-1; |  | ||||||
|       } |  | ||||||
|       i--; |  | ||||||
|     }while(i>=m1); |  | ||||||
|     if(x0>x[k]) x0=x[k]; |  | ||||||
|     while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){ |  | ||||||
|       x1=(xu+x0)/2; |  | ||||||
|  |  | ||||||
|       a=0; |  | ||||||
|       q=1.0; |  | ||||||
|       for(i=1;i<=n;i++){ |  | ||||||
| 	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh); |  | ||||||
| 	if(q<0) a++; |  | ||||||
|       } |  | ||||||
|       //			printf("x1=%e a=%d\n",x1,a); |  | ||||||
|       if(a<k){ |  | ||||||
| 	if(a<m1){ |  | ||||||
| 	  xu=x1; |  | ||||||
| 	  wu[m1]=x1; |  | ||||||
| 	}else { |  | ||||||
| 	  xu=x1; |  | ||||||
| 	  wu[a+1]=x1; |  | ||||||
| 	  if(x[a]>x1) x[a]=x1; |  | ||||||
| 	} |  | ||||||
|       }else x0=x1; |  | ||||||
|     } |  | ||||||
|     x[k]=(x0+xu)/2; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| } |  | ||||||
| @@ -1 +0,0 @@ | |||||||
|  |  | ||||||
| @@ -11,7 +11,7 @@ int PointerCache::victim; | |||||||
|  |  | ||||||
| void *PointerCache::Insert(void *ptr,size_t bytes) { | void *PointerCache::Insert(void *ptr,size_t bytes) { | ||||||
|  |  | ||||||
|   if (bytes < 4096 ) return NULL; |   if (bytes < 4096 ) return ptr; | ||||||
|  |  | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
|   assert(omp_in_parallel()==0); |   assert(omp_in_parallel()==0); | ||||||
|   | |||||||
| @@ -92,18 +92,34 @@ public: | |||||||
|     size_type bytes = __n*sizeof(_Tp); |     size_type bytes = __n*sizeof(_Tp); | ||||||
|  |  | ||||||
|     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); |     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); | ||||||
|  |     //    if ( ptr != NULL )  | ||||||
|  |     //      std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl; | ||||||
|  |  | ||||||
|  |     ////////////////// | ||||||
|  |     // Hack 2MB align; could make option probably doesn't need configurability | ||||||
|  |     ////////////////// | ||||||
|  | //define GRID_ALLOC_ALIGN (128) | ||||||
|  | #define GRID_ALLOC_ALIGN (2*1024*1024) | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); | ||||||
| #else | #else | ||||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); |     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); | ||||||
| #endif | #endif | ||||||
|  |     //    std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl; | ||||||
|  |     // First touch optimise in threaded loop | ||||||
|  |     uint8_t *cp = (uint8_t *)ptr; | ||||||
|  | #ifdef GRID_OMP | ||||||
|  | #pragma omp parallel for | ||||||
|  | #endif | ||||||
|  |     for(size_type n=0;n<bytes;n+=4096){ | ||||||
|  |       cp[n]=0; | ||||||
|  |     } | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void deallocate(pointer __p, size_type __n) {  |   void deallocate(pointer __p, size_type __n) {  | ||||||
|     size_type bytes = __n * sizeof(_Tp); |     size_type bytes = __n * sizeof(_Tp); | ||||||
|  |  | ||||||
|     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); |     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); | ||||||
|  |  | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
| @@ -182,10 +198,17 @@ public: | |||||||
|   pointer allocate(size_type __n, const void* _p= 0)  |   pointer allocate(size_type __n, const void* _p= 0)  | ||||||
|   { |   { | ||||||
| #ifdef HAVE_MM_MALLOC_H | #ifdef HAVE_MM_MALLOC_H | ||||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); |     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); | ||||||
| #else | #else | ||||||
|     _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); |     _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); | ||||||
| #endif | #endif | ||||||
|  |     size_type bytes = __n*sizeof(_Tp); | ||||||
|  |     uint8_t *cp = (uint8_t *)ptr; | ||||||
|  |     // One touch per 4k page, static OMP loop to catch same loop order | ||||||
|  | #pragma omp parallel for schedule(static) | ||||||
|  |     for(size_type n=0;n<bytes;n+=4096){ | ||||||
|  |       cp[n]=0; | ||||||
|  |     } | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
|   void deallocate(pointer __p, size_type) {  |   void deallocate(pointer __p, size_type) {  | ||||||
|   | |||||||
| @@ -6,8 +6,9 @@ | |||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |     Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -49,7 +50,6 @@ public: | |||||||
|  |  | ||||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; |     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||||
|  |  | ||||||
|  |  | ||||||
|     // Physics Grid information. |     // Physics Grid information. | ||||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. |     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||||
|     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal |     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal | ||||||
| @@ -62,13 +62,12 @@ public: | |||||||
|     int _isites; |     int _isites; | ||||||
|     int _fsites;                  // _isites*_osites = product(dimensions). |     int _fsites;                  // _isites*_osites = product(dimensions). | ||||||
|     int _gsites; |     int _gsites; | ||||||
|     std::vector<int> _slice_block;   // subslice information |     std::vector<int> _slice_block;// subslice information | ||||||
|     std::vector<int> _slice_stride; |     std::vector<int> _slice_stride; | ||||||
|     std::vector<int> _slice_nblock; |     std::vector<int> _slice_nblock; | ||||||
|  |  | ||||||
|     // Might need these at some point |     std::vector<int> _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] | ||||||
|     //    std::vector<int> _lstart;     // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] |     std::vector<int> _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||||
|     //    std::vector<int> _lend;       // local end of array in gcoors    _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 |  | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
| @@ -121,6 +120,12 @@ public: | |||||||
|       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); |       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) { | ||||||
|  |       lcoor.resize(_ndimension); | ||||||
|  |       for (int d = 0; d < _ndimension; d++) | ||||||
|  |         lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
|     // SIMD lane addressing |     // SIMD lane addressing | ||||||
|     ////////////////////////////////////////////////////////// |     ////////////////////////////////////////////////////////// | ||||||
| @@ -128,6 +133,7 @@ public: | |||||||
|     { |     { | ||||||
|       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); |       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     inline int PermuteDim(int dimension){ |     inline int PermuteDim(int dimension){ | ||||||
|       return _simd_layout[dimension]>1; |       return _simd_layout[dimension]>1; | ||||||
|     } |     } | ||||||
| @@ -168,11 +174,31 @@ public: | |||||||
|     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  |     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||||
|     inline int Nd    (void) const { return _ndimension;}; |     inline int Nd    (void) const { return _ndimension;}; | ||||||
|  |  | ||||||
|  |     inline const std::vector<int> LocalStarts(void)             { return _lstart;    }; | ||||||
|     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; |     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; | ||||||
|     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; |     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; | ||||||
|     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; |     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; | ||||||
|     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; |     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////////////////////////////// | ||||||
|  |     // Utility to print the full decomposition details  | ||||||
|  |     //////////////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |     void show_decomposition(){ | ||||||
|  |       std::cout << GridLogMessage << "\tFull Dimensions    : " << _fdimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tSIMD layout        : " << _simd_layout << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tGlobal Dimensions  : " << _gdimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tLocal Dimensions   : " << _ldimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tOuter strides      : " << _ostride << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tInner strides      : " << _istride << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tiSites             : " << _isites << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\toSites             : " << _osites << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tlSites             : " << lSites() << std::endl;         | ||||||
|  |       std::cout << GridLogMessage << "\tgSites             : " << gSites() << std::endl; | ||||||
|  |       std::cout << GridLogMessage << "\tNd                 : " << _ndimension << std::endl;              | ||||||
|  |     }  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
|     // Global addressing |     // Global addressing | ||||||
|     //////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////// | ||||||
| @@ -184,6 +210,9 @@ public: | |||||||
|       assert(lidx<lSites()); |       assert(lidx<lSites()); | ||||||
|       Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); |       Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ |     void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ | ||||||
|       gidx=0; |       gidx=0; | ||||||
|       int mult=1; |       int mult=1; | ||||||
|   | |||||||
| @@ -63,8 +63,7 @@ public: | |||||||
|     } |     } | ||||||
|     GridCartesian(const std::vector<int> &dimensions, |     GridCartesian(const std::vector<int> &dimensions, | ||||||
|                   const std::vector<int> &simd_layout, |                   const std::vector<int> &simd_layout, | ||||||
| 		  const std::vector<int> &processor_grid |                   const std::vector<int> &processor_grid) : GridBase(processor_grid) | ||||||
| 		  ) : GridBase(processor_grid) |  | ||||||
|     { |     { | ||||||
|       /////////////////////// |       /////////////////////// | ||||||
|       // Grid information |       // Grid information | ||||||
| @@ -76,34 +75,44 @@ public: | |||||||
|       _ldimensions.resize(_ndimension); |       _ldimensions.resize(_ndimension); | ||||||
|       _rdimensions.resize(_ndimension); |       _rdimensions.resize(_ndimension); | ||||||
|       _simd_layout.resize(_ndimension); |       _simd_layout.resize(_ndimension); | ||||||
|  |       _lstart.resize(_ndimension); | ||||||
|  |       _lend.resize(_ndimension); | ||||||
|  |  | ||||||
|       _ostride.resize(_ndimension); |       _ostride.resize(_ndimension); | ||||||
|       _istride.resize(_ndimension); |       _istride.resize(_ndimension); | ||||||
|  |  | ||||||
|       _fsites = _gsites = _osites = _isites = 1; |       _fsites = _gsites = _osites = _isites = 1; | ||||||
|  |  | ||||||
|         for(int d=0;d<_ndimension;d++){ |       for (int d = 0; d < _ndimension; d++) | ||||||
|  |       { | ||||||
|         _fdimensions[d] = dimensions[d];   // Global dimensions |         _fdimensions[d] = dimensions[d];   // Global dimensions | ||||||
|         _gdimensions[d] = _fdimensions[d]; // Global dimensions |         _gdimensions[d] = _fdimensions[d]; // Global dimensions | ||||||
|         _simd_layout[d] = simd_layout[d]; |         _simd_layout[d] = simd_layout[d]; | ||||||
|         _fsites = _fsites * _fdimensions[d]; |         _fsites = _fsites * _fdimensions[d]; | ||||||
|         _gsites = _gsites * _gdimensions[d]; |         _gsites = _gsites * _gdimensions[d]; | ||||||
|  |  | ||||||
| 	  //FIXME check for exact division |  | ||||||
|  |  | ||||||
|         // Use a reduced simd grid |         // Use a reduced simd grid | ||||||
| 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions |         _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions | ||||||
| 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition |         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | ||||||
|  |  | ||||||
|  |         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition | ||||||
|  |         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | ||||||
|  |  | ||||||
|  |         _lstart[d] = _processor_coor[d] * _ldimensions[d]; | ||||||
|  |         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; | ||||||
|         _osites *= _rdimensions[d]; |         _osites *= _rdimensions[d]; | ||||||
|         _isites *= _simd_layout[d]; |         _isites *= _simd_layout[d]; | ||||||
|  |  | ||||||
|         // Addressing support |         // Addressing support | ||||||
| 	  if ( d==0 ) { |         if (d == 0) | ||||||
|  |         { | ||||||
|           _ostride[d] = 1; |           _ostride[d] = 1; | ||||||
|           _istride[d] = 1; |           _istride[d] = 1; | ||||||
| 	  } else { |         } | ||||||
| 	    _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; |         else | ||||||
| 	    _istride[d] = _istride[d-1]*_simd_layout[d-1]; |         { | ||||||
|  |           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; | ||||||
|  |           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|  |  | ||||||
| @@ -114,21 +123,20 @@ public: | |||||||
|       _slice_stride.resize(_ndimension); |       _slice_stride.resize(_ndimension); | ||||||
|       _slice_nblock.resize(_ndimension); |       _slice_nblock.resize(_ndimension); | ||||||
|  |  | ||||||
|         int block =1; |       int block = 1; | ||||||
|         int nblock=1; |       int nblock = 1; | ||||||
|         for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; |       for (int d = 0; d < _ndimension; d++) | ||||||
|  |         nblock *= _rdimensions[d]; | ||||||
|  |  | ||||||
|         for(int d=0;d<_ndimension;d++){ |       for (int d = 0; d < _ndimension; d++) | ||||||
|             nblock/=_rdimensions[d]; |       { | ||||||
|             _slice_block[d] =block; |         nblock /= _rdimensions[d]; | ||||||
|             _slice_stride[d]=_ostride[d]*_rdimensions[d]; |         _slice_block[d] = block; | ||||||
|             _slice_nblock[d]=nblock; |         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | ||||||
|             block = block*_rdimensions[d]; |         _slice_nblock[d] = nblock; | ||||||
|  |         block = block * _rdimensions[d]; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|     }; |     }; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -140,46 +140,57 @@ public: | |||||||
|       // Grid information |       // Grid information | ||||||
|       /////////////////////// |       /////////////////////// | ||||||
|       _checker_dim = checker_dim; |       _checker_dim = checker_dim; | ||||||
|       assert(checker_dim_mask[checker_dim]==1); |       assert(checker_dim_mask[checker_dim] == 1); | ||||||
|       _ndimension = dimensions.size(); |       _ndimension = dimensions.size(); | ||||||
|       assert(checker_dim_mask.size()==_ndimension); |       assert(checker_dim_mask.size() == _ndimension); | ||||||
|       assert(processor_grid.size()==_ndimension); |       assert(processor_grid.size() == _ndimension); | ||||||
|       assert(simd_layout.size()==_ndimension); |       assert(simd_layout.size() == _ndimension); | ||||||
|  |  | ||||||
|       _fdimensions.resize(_ndimension); |       _fdimensions.resize(_ndimension); | ||||||
|       _gdimensions.resize(_ndimension); |       _gdimensions.resize(_ndimension); | ||||||
|       _ldimensions.resize(_ndimension); |       _ldimensions.resize(_ndimension); | ||||||
|       _rdimensions.resize(_ndimension); |       _rdimensions.resize(_ndimension); | ||||||
|       _simd_layout.resize(_ndimension); |       _simd_layout.resize(_ndimension); | ||||||
|  |       _lstart.resize(_ndimension); | ||||||
|  |       _lend.resize(_ndimension); | ||||||
|  |  | ||||||
|       _ostride.resize(_ndimension); |       _ostride.resize(_ndimension); | ||||||
|       _istride.resize(_ndimension); |       _istride.resize(_ndimension); | ||||||
|  |  | ||||||
|       _fsites = _gsites = _osites = _isites = 1; |       _fsites = _gsites = _osites = _isites = 1; | ||||||
|  |  | ||||||
|       _checker_dim_mask=checker_dim_mask; |       _checker_dim_mask = checker_dim_mask; | ||||||
|  |  | ||||||
|       for(int d=0;d<_ndimension;d++){ |       for (int d = 0; d < _ndimension; d++) | ||||||
|  |       { | ||||||
|         _fdimensions[d] = dimensions[d]; |         _fdimensions[d] = dimensions[d]; | ||||||
|         _gdimensions[d] = _fdimensions[d]; |         _gdimensions[d] = _fdimensions[d]; | ||||||
|         _fsites = _fsites * _fdimensions[d]; |         _fsites = _fsites * _fdimensions[d]; | ||||||
|         _gsites = _gsites * _gdimensions[d]; |         _gsites = _gsites * _gdimensions[d]; | ||||||
|  |  | ||||||
| 	if (d==_checker_dim) { |         if (d == _checker_dim) | ||||||
| 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard |         { | ||||||
|  |           assert((_gdimensions[d] & 0x1) == 0); | ||||||
|  |           _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard | ||||||
|         } |         } | ||||||
| 	_ldimensions[d] = _gdimensions[d]/_processors[d]; |         _ldimensions[d] = _gdimensions[d] / _processors[d]; | ||||||
|  |         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | ||||||
|  |         _lstart[d] = _processor_coor[d] * _ldimensions[d]; | ||||||
|  |         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; | ||||||
|  |  | ||||||
|         // Use a reduced simd grid |         // Use a reduced simd grid | ||||||
|         _simd_layout[d] = simd_layout[d]; |         _simd_layout[d] = simd_layout[d]; | ||||||
| 	_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; |         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer | ||||||
| 	assert(_rdimensions[d]>0); |         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | ||||||
|  |         assert(_rdimensions[d] > 0); | ||||||
|  |  | ||||||
|         // all elements of a simd vector must have same checkerboard. |         // all elements of a simd vector must have same checkerboard. | ||||||
|         // If Ls vectorised, this must still be the case; e.g. dwf rb5d |         // If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||||
| 	if ( _simd_layout[d]>1 ) { |         if (_simd_layout[d] > 1) | ||||||
| 	  if ( checker_dim_mask[d] ) {  |         { | ||||||
| 	    assert( (_rdimensions[d]&0x1) == 0 ); |           if (checker_dim_mask[d]) | ||||||
|  |           { | ||||||
|  |             assert((_rdimensions[d] & 0x1) == 0); | ||||||
|           } |           } | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -187,15 +198,16 @@ public: | |||||||
|         _isites *= _simd_layout[d]; |         _isites *= _simd_layout[d]; | ||||||
|  |  | ||||||
|         // Addressing support |         // Addressing support | ||||||
| 	if ( d==0 ) { |         if (d == 0) | ||||||
|  |         { | ||||||
|           _ostride[d] = 1; |           _ostride[d] = 1; | ||||||
|           _istride[d] = 1; |           _istride[d] = 1; | ||||||
| 	} else { |  | ||||||
| 	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; |  | ||||||
| 	  _istride[d] = _istride[d-1]*_simd_layout[d-1]; |  | ||||||
|         } |         } | ||||||
|  |         else | ||||||
|  |         { | ||||||
|  |           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; | ||||||
|  |           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; | ||||||
|  |         } | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       //////////////////////////////////////////////////////////////////////////////////////////// |       //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -205,40 +217,48 @@ public: | |||||||
|       _slice_stride.resize(_ndimension); |       _slice_stride.resize(_ndimension); | ||||||
|       _slice_nblock.resize(_ndimension); |       _slice_nblock.resize(_ndimension); | ||||||
|  |  | ||||||
|       int block =1; |       int block = 1; | ||||||
|       int nblock=1; |       int nblock = 1; | ||||||
|       for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; |       for (int d = 0; d < _ndimension; d++) | ||||||
|  |         nblock *= _rdimensions[d]; | ||||||
|  |  | ||||||
|       for(int d=0;d<_ndimension;d++){ |       for (int d = 0; d < _ndimension; d++) | ||||||
| 	nblock/=_rdimensions[d]; |       { | ||||||
| 	_slice_block[d] =block; |         nblock /= _rdimensions[d]; | ||||||
| 	_slice_stride[d]=_ostride[d]*_rdimensions[d]; |         _slice_block[d] = block; | ||||||
| 	_slice_nblock[d]=nblock; |         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | ||||||
| 	block = block*_rdimensions[d]; |         _slice_nblock[d] = nblock; | ||||||
|  |         block = block * _rdimensions[d]; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Create a checkerboard lookup table |       // Create a checkerboard lookup table | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       int rvol = 1; |       int rvol = 1; | ||||||
|       for(int d=0;d<_ndimension;d++){ |       for (int d = 0; d < _ndimension; d++) | ||||||
| 	rvol=rvol * _rdimensions[d]; |       { | ||||||
|  |         rvol = rvol * _rdimensions[d]; | ||||||
|       } |       } | ||||||
|       _checker_board.resize(rvol); |       _checker_board.resize(rvol); | ||||||
|       for(int osite=0;osite<_osites;osite++){ |       for (int osite = 0; osite < _osites; osite++) | ||||||
| 	_checker_board[osite] = CheckerBoardFromOindex (osite); |       { | ||||||
|  |         _checker_board[osite] = CheckerBoardFromOindex(osite); | ||||||
|       } |       } | ||||||
|        |  | ||||||
|     }; |     }; | ||||||
| protected: |  | ||||||
|  |   protected: | ||||||
|     virtual int oIndex(std::vector<int> &coor) |     virtual int oIndex(std::vector<int> &coor) | ||||||
|     { |     { | ||||||
|       int idx=0; |       int idx = 0; | ||||||
|       for(int d=0;d<_ndimension;d++) { |       for (int d = 0; d < _ndimension; d++) | ||||||
| 	if( d==_checker_dim ) { |       { | ||||||
| 	  idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]); |         if (d == _checker_dim) | ||||||
| 	} else { |         { | ||||||
| 	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]); |           idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); | ||||||
|  |         } | ||||||
|  |         else | ||||||
|  |         { | ||||||
|  |           idx += _ostride[d] * (coor[d] % _rdimensions[d]); | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|       return idx; |       return idx; | ||||||
| @@ -246,17 +266,20 @@ protected: | |||||||
|  |  | ||||||
|     virtual int iIndex(std::vector<int> &lcoor) |     virtual int iIndex(std::vector<int> &lcoor) | ||||||
|     { |     { | ||||||
|         int idx=0; |       int idx = 0; | ||||||
|         for(int d=0;d<_ndimension;d++) { |       for (int d = 0; d < _ndimension; d++) | ||||||
| 	  if( d==_checker_dim ) { |       { | ||||||
| 	    idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); |         if (d == _checker_dim) | ||||||
| 	  } else {  |         { | ||||||
| 	    idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); |           idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); | ||||||
|  |         } | ||||||
|  |         else | ||||||
|  |         { | ||||||
|  |           idx += _istride[d] * (lcoor[d] / _rdimensions[d]); | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|       return idx; |       return idx; | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -26,6 +26,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
|  | #include <fcntl.h> | ||||||
|  | #include <unistd.h> | ||||||
|  | #include <limits.h> | ||||||
|  | #include <sys/mman.h> | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -34,7 +38,10 @@ namespace Grid { | |||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
| void *              CartesianCommunicator::ShmCommBuf; | void *              CartesianCommunicator::ShmCommBuf; | ||||||
| uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024;  | uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024;  | ||||||
| CartesianCommunicator::CommunicatorPolicy_t  CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | CartesianCommunicator::CommunicatorPolicy_t   | ||||||
|  | CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; | ||||||
|  | int CartesianCommunicator::nCommThreads = -1; | ||||||
|  | int CartesianCommunicator::Hugepages = 0; | ||||||
|  |  | ||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| // Alloc, free shmem region | // Alloc, free shmem region | ||||||
| @@ -60,6 +67,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { | |||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
| // Grid information queries | // Grid information queries | ||||||
| ///////////////////////////////// | ///////////////////////////////// | ||||||
|  | int                      CartesianCommunicator::Dimensions(void)         { return _ndimension; }; | ||||||
| int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | ||||||
| int                      CartesianCommunicator::BossRank(void)          { return 0; }; | int                      CartesianCommunicator::BossRank(void)          { return 0; }; | ||||||
| int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | ||||||
| @@ -88,24 +96,43 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | |||||||
|   GlobalSumVector((double *)c,2*N); |   GlobalSumVector((double *)c,2*N); | ||||||
| } | } | ||||||
|  |  | ||||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | #if !defined( GRID_COMMS_MPI3)  | ||||||
|  |  | ||||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||||
|  | int                      CartesianCommunicator::RankCount(void)    { return ProcessorCount();}; | ||||||
|  | #endif | ||||||
|  | #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) | ||||||
|  | double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, | ||||||
|  | 						     int xmit_to_rank, | ||||||
|  | 						     void *recv, | ||||||
|  | 						     int recv_from_rank, | ||||||
|  | 						     int bytes, int dir) | ||||||
|  | { | ||||||
|  |   std::vector<CommsRequest_t> list; | ||||||
|  |   // Discard the "dir" | ||||||
|  |   SendToRecvFromBegin   (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||||
|  |   SendToRecvFromComplete(list); | ||||||
|  |   return 2.0*bytes; | ||||||
|  | } | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 							 void *xmit, | 							 void *xmit, | ||||||
| 							 int xmit_to_rank, | 							 int xmit_to_rank, | ||||||
| 							 void *recv, | 							 void *recv, | ||||||
| 							 int recv_from_rank, | 							 int recv_from_rank, | ||||||
| 						       int bytes) | 							 int bytes, int dir) | ||||||
| { | { | ||||||
|  |   // Discard the "dir" | ||||||
|   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); |   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||||
|   return 2.0*bytes; |   return 2.0*bytes; | ||||||
| } | } | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | ||||||
| { | { | ||||||
|   SendToRecvFromComplete(waitall); |   SendToRecvFromComplete(waitall); | ||||||
| } | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #if !defined( GRID_COMMS_MPI3)  | ||||||
|  |  | ||||||
| void CartesianCommunicator::StencilBarrier(void){}; | void CartesianCommunicator::StencilBarrier(void){}; | ||||||
|  |  | ||||||
| commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector; | commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector; | ||||||
| @@ -119,8 +146,22 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { | |||||||
|   return NULL; |   return NULL; | ||||||
| } | } | ||||||
| void CartesianCommunicator::ShmInitGeneric(void){ | void CartesianCommunicator::ShmInitGeneric(void){ | ||||||
|  | #if 1 | ||||||
|  |  | ||||||
|  |   int mmap_flag = MAP_SHARED | MAP_ANONYMOUS; | ||||||
|  | #ifdef MAP_HUGETLB | ||||||
|  |   if ( Hugepages ) mmap_flag |= MAP_HUGETLB; | ||||||
|  | #endif | ||||||
|  |   ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);  | ||||||
|  |   if (ShmCommBuf == (void *)MAP_FAILED) { | ||||||
|  |     perror("mmap failed "); | ||||||
|  |     exit(EXIT_FAILURE);   | ||||||
|  |   } | ||||||
|  | #else  | ||||||
|   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); |   ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); | ||||||
|   ShmCommBuf=(void *)&ShmBufStorageVector[0]; |   ShmCommBuf=(void *)&ShmBufStorageVector[0]; | ||||||
|  | #endif | ||||||
|  |   bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -38,7 +38,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifdef GRID_COMMS_MPI3 | #ifdef GRID_COMMS_MPI3 | ||||||
| #include <mpi.h> | #include <mpi.h> | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_MPI3L | #ifdef GRID_COMMS_MPIT | ||||||
| #include <mpi.h> | #include <mpi.h> | ||||||
| #endif | #endif | ||||||
| #ifdef GRID_COMMS_SHMEM | #ifdef GRID_COMMS_SHMEM | ||||||
| @@ -50,12 +50,24 @@ namespace Grid { | |||||||
| class CartesianCommunicator { | class CartesianCommunicator { | ||||||
|   public:     |   public:     | ||||||
|  |  | ||||||
|   // 65536 ranks per node adequate for now |  | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   // Isend/Irecv/Wait, or Sendrecv blocking | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; | ||||||
|  |   static CommunicatorPolicy_t CommunicatorPolicy; | ||||||
|  |   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////// | ||||||
|  |   // Up to 65536 ranks per node adequate for now | ||||||
|   // 128MB shared memory for comms enought for 48^4 local vol comms |   // 128MB shared memory for comms enought for 48^4 local vol comms | ||||||
|   // Give external control (command line override?) of this |   // Give external control (command line override?) of this | ||||||
|  |   /////////////////////////////////////////// | ||||||
|   static const int MAXLOG2RANKSPERNODE = 16;             |   static const int MAXLOG2RANKSPERNODE = 16;             | ||||||
|   static uint64_t  MAX_MPI_SHM_BYTES; |   static uint64_t  MAX_MPI_SHM_BYTES; | ||||||
|  |   static int       nCommThreads; | ||||||
|  |   // use explicit huge pages | ||||||
|  |   static int       Hugepages; | ||||||
|  |  | ||||||
|   // Communicator should know nothing of the physics grid, only processor grid. |   // Communicator should know nothing of the physics grid, only processor grid. | ||||||
|   int              _Nprocessors;     // How many in all |   int              _Nprocessors;     // How many in all | ||||||
| @@ -64,14 +76,18 @@ class CartesianCommunicator { | |||||||
|   std::vector<int> _processor_coor;  // linear processor coordinate |   std::vector<int> _processor_coor;  // linear processor coordinate | ||||||
|   unsigned long _ndimension; |   unsigned long _ndimension; | ||||||
|  |  | ||||||
| #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L) | #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) | ||||||
|   static MPI_Comm communicator_world; |   static MPI_Comm communicator_world; | ||||||
|  |  | ||||||
|   MPI_Comm              communicator; |   MPI_Comm              communicator; | ||||||
|  |   std::vector<MPI_Comm> communicator_halo; | ||||||
|  |  | ||||||
|   typedef MPI_Request CommsRequest_t; |   typedef MPI_Request CommsRequest_t; | ||||||
| #else  | #else  | ||||||
|   typedef int CommsRequest_t; |   typedef int CommsRequest_t; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////// | ||||||
|   // Helper functionality for SHM Windows common to all other impls |   // Helper functionality for SHM Windows common to all other impls | ||||||
|   //////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////// | ||||||
| @@ -117,10 +133,6 @@ class CartesianCommunicator { | |||||||
|   ///////////////////////////////// |   ///////////////////////////////// | ||||||
|   static void * ShmCommBuf; |   static void * ShmCommBuf; | ||||||
|  |  | ||||||
|   // Isend/Irecv/Wait, or Sendrecv blocking |  | ||||||
|   enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; |  | ||||||
|   static CommunicatorPolicy_t CommunicatorPolicy; |  | ||||||
|   static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } |  | ||||||
|    |    | ||||||
|   size_t heap_top; |   size_t heap_top; | ||||||
|   size_t heap_bytes; |   size_t heap_bytes; | ||||||
| @@ -148,6 +160,7 @@ class CartesianCommunicator { | |||||||
|   int  RankFromProcessorCoor(std::vector<int> &coor); |   int  RankFromProcessorCoor(std::vector<int> &coor); | ||||||
|   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); |   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); | ||||||
|    |    | ||||||
|  |   int                      Dimensions(void)        ; | ||||||
|   int                      IsBoss(void)            ; |   int                      IsBoss(void)            ; | ||||||
|   int                      BossRank(void)          ; |   int                      BossRank(void)          ; | ||||||
|   int                      ThisRank(void)          ; |   int                      ThisRank(void)          ; | ||||||
| @@ -155,6 +168,7 @@ class CartesianCommunicator { | |||||||
|   const std::vector<int> & ProcessorGrid(void)     ; |   const std::vector<int> & ProcessorGrid(void)     ; | ||||||
|   int                      ProcessorCount(void)    ; |   int                      ProcessorCount(void)    ; | ||||||
|   int                      NodeCount(void)    ; |   int                      NodeCount(void)    ; | ||||||
|  |   int                      RankCount(void)    ; | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid |   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||||
| @@ -175,6 +189,8 @@ class CartesianCommunicator { | |||||||
|   void GlobalSumVector(ComplexF *c,int N); |   void GlobalSumVector(ComplexF *c,int N); | ||||||
|   void GlobalSum(ComplexD &c); |   void GlobalSum(ComplexD &c); | ||||||
|   void GlobalSumVector(ComplexD *c,int N); |   void GlobalSumVector(ComplexD *c,int N); | ||||||
|  |   void GlobalXOR(uint32_t &); | ||||||
|  |   void GlobalXOR(uint64_t &); | ||||||
|    |    | ||||||
|   template<class obj> void GlobalSum(obj &o){ |   template<class obj> void GlobalSum(obj &o){ | ||||||
|     typedef typename obj::scalar_type scalar_type; |     typedef typename obj::scalar_type scalar_type; | ||||||
| @@ -207,14 +223,21 @@ class CartesianCommunicator { | |||||||
|    |    | ||||||
|   void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); |   void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); | ||||||
|  |  | ||||||
|  |   double StencilSendToRecvFrom(void *xmit, | ||||||
|  | 			       int xmit_to_rank, | ||||||
|  | 			       void *recv, | ||||||
|  | 			       int recv_from_rank, | ||||||
|  | 			       int bytes,int dir); | ||||||
|  |  | ||||||
|   double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, |   double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 				    void *xmit, | 				    void *xmit, | ||||||
| 				    int xmit_to_rank, | 				    int xmit_to_rank, | ||||||
| 				    void *recv, | 				    void *recv, | ||||||
| 				    int recv_from_rank, | 				    int recv_from_rank, | ||||||
| 				  int bytes); | 				    int bytes,int dir); | ||||||
|    |    | ||||||
|   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); |    | ||||||
|  |   void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i); | ||||||
|   void StencilBarrier(void); |   void StencilBarrier(void); | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -83,6 +83,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
|   | |||||||
| @@ -37,11 +37,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <sys/ipc.h> | #include <sys/ipc.h> | ||||||
| #include <sys/shm.h> | #include <sys/shm.h> | ||||||
| #include <sys/mman.h> | #include <sys/mman.h> | ||||||
| //#include <zlib.h> | #include <zlib.h> | ||||||
| #ifndef SHM_HUGETLB | #ifdef HAVE_NUMAIF_H | ||||||
| #define SHM_HUGETLB 04000 | #include <numaif.h> | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -65,6 +66,7 @@ std::vector<int> CartesianCommunicator::MyGroup; | |||||||
| std::vector<void *> CartesianCommunicator::ShmCommBufs; | std::vector<void *> CartesianCommunicator::ShmCommBufs; | ||||||
|  |  | ||||||
| int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | ||||||
|  | int CartesianCommunicator::RankCount(void)    { return WorldSize;}; | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef FORCE_COMMS | #undef FORCE_COMMS | ||||||
| @@ -210,9 +212,34 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } |       if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      } | ||||||
|       ftruncate(fd, size); |       ftruncate(fd, size); | ||||||
|        |        | ||||||
|       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); |       int mmap_flag = MAP_SHARED; | ||||||
|  | #ifdef MAP_HUGETLB | ||||||
|  |       if (Hugepages) mmap_flag |= MAP_HUGETLB; | ||||||
|  | #endif | ||||||
|  |       void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); | ||||||
|  |  | ||||||
|       if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    } |       if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    } | ||||||
|       assert(((uint64_t)ptr&0x3F)==0); |       assert(((uint64_t)ptr&0x3F)==0); | ||||||
|  |  | ||||||
|  | // Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h | ||||||
|  | #if 0 | ||||||
|  | //#ifdef HAVE_NUMAIF_H | ||||||
|  | 	int status; | ||||||
|  | 	int flags=MPOL_MF_MOVE; | ||||||
|  | #ifdef KNL | ||||||
|  | 	int nodes=1; // numa domain == MCDRAM | ||||||
|  | 	// Find out if in SNC2,SNC4 mode ? | ||||||
|  | #else | ||||||
|  | 	int nodes=r; // numa domain == MPI ID | ||||||
|  | #endif | ||||||
|  | 	unsigned long count=1; | ||||||
|  | 	for(uint64_t page=0;page<size;page+=4096){ | ||||||
|  | 	  void *pages = (void *) ( page + (uint64_t)ptr ); | ||||||
|  | 	  uint64_t *cow_it = (uint64_t *)pages;	*cow_it = 1; | ||||||
|  | 	  ierr= move_pages(0,count, &pages,&nodes,&status,flags); | ||||||
|  | 	  if (ierr && (page==0)) perror("numa relocate command failed"); | ||||||
|  | 	} | ||||||
|  | #endif | ||||||
|       ShmCommBufs[r] =ptr; |       ShmCommBufs[r] =ptr; | ||||||
|        |        | ||||||
|     } |     } | ||||||
| @@ -243,7 +270,11 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|     for(int r=0;r<ShmSize;r++){ |     for(int r=0;r<ShmSize;r++){ | ||||||
|       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; |       size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; | ||||||
|       key_t key   = 0x4545 + r; |       key_t key   = 0x4545 + r; | ||||||
|       if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { |       int flags = IPC_CREAT | SHM_R | SHM_W; | ||||||
|  | #ifdef SHM_HUGETLB | ||||||
|  |       flags|=SHM_HUGETLB; | ||||||
|  | #endif | ||||||
|  |       if ((shmids[r]= shmget(key,size, flags)) < 0) { | ||||||
| 	int errsv = errno; | 	int errsv = errno; | ||||||
| 	printf("Errno %d\n",errsv); | 	printf("Errno %d\n",errsv); | ||||||
| 	perror("shmget"); | 	perror("shmget"); | ||||||
| @@ -374,8 +405,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | |||||||
| {  | {  | ||||||
|   int ierr; |   int ierr; | ||||||
|   communicator=communicator_world; |   communicator=communicator_world; | ||||||
|  |  | ||||||
|   _ndimension = processors.size(); |   _ndimension = processors.size(); | ||||||
|  |  | ||||||
|  |   communicator_halo.resize (2*_ndimension); | ||||||
|  |   for(int i=0;i<_ndimension*2;i++){ | ||||||
|  |     MPI_Comm_dup(communicator,&communicator_halo[i]); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
|   // Assert power of two shm_size. |   // Assert power of two shm_size. | ||||||
|   //////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////// | ||||||
| @@ -509,6 +546,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | |||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
| void CartesianCommunicator::GlobalSum(float &f){ | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| @@ -590,13 +635,27 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, | ||||||
|  | 						     int dest, | ||||||
|  | 						     void *recv, | ||||||
|  | 						     int from, | ||||||
|  | 						     int bytes,int dir) | ||||||
|  | { | ||||||
|  |   std::vector<CommsRequest_t> list; | ||||||
|  |   double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir); | ||||||
|  |   StencilSendToRecvFromComplete(list,dir); | ||||||
|  |   return offbytes; | ||||||
|  | } | ||||||
|  |  | ||||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 							 void *xmit, | 							 void *xmit, | ||||||
| 							 int dest, | 							 int dest, | ||||||
| 							 void *recv, | 							 void *recv, | ||||||
| 							 int from, | 							 int from, | ||||||
| 						       int bytes) | 							 int bytes,int dir) | ||||||
| { | { | ||||||
|  |   assert(dir < communicator_halo.size()); | ||||||
|  |  | ||||||
|   MPI_Request xrq; |   MPI_Request xrq; | ||||||
|   MPI_Request rrq; |   MPI_Request rrq; | ||||||
|  |  | ||||||
| @@ -615,26 +674,26 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|   gfrom = MPI_UNDEFINED; |   gfrom = MPI_UNDEFINED; | ||||||
| #endif | #endif | ||||||
|   if ( gfrom ==MPI_UNDEFINED) { |   if ( gfrom ==MPI_UNDEFINED) { | ||||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); |     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(rrq); |     list.push_back(rrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( gdest == MPI_UNDEFINED ) { |   if ( gdest == MPI_UNDEFINED ) { | ||||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); |     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(xrq); |     list.push_back(xrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) {  |   if ( CommunicatorPolicy == CommunicatorPolicySequential ) {  | ||||||
|     this->StencilSendToRecvFromComplete(list); |     this->StencilSendToRecvFromComplete(list,dir); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   return off_node_bytes; |   return off_node_bytes; | ||||||
| } | } | ||||||
| void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) | void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | ||||||
| { | { | ||||||
|   SendToRecvFromComplete(waitall); |   SendToRecvFromComplete(waitall); | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										286
									
								
								lib/communicator/Communicator_mpit.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								lib/communicator/Communicator_mpit.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,286 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/communicator/Communicator_mpi.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/GridCore.h> | ||||||
|  | #include <Grid/GridQCDcore.h> | ||||||
|  | #include <Grid/qcd/action/ActionCore.h> | ||||||
|  | #include <mpi.h> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | // Info that is setup once and indept of cartesian layout | ||||||
|  | /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | MPI_Comm CartesianCommunicator::communicator_world; | ||||||
|  |  | ||||||
|  | // Should error check all MPI calls. | ||||||
|  | void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||||
|  |   int flag; | ||||||
|  |   int provided; | ||||||
|  |   MPI_Initialized(&flag); // needed to coexist with other libs apparently | ||||||
|  |   if ( !flag ) { | ||||||
|  |     MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); | ||||||
|  |     if ( provided != MPI_THREAD_MULTIPLE ) { | ||||||
|  |       QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); | ||||||
|  |   ShmInitGeneric(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||||
|  | { | ||||||
|  |   _ndimension = processors.size(); | ||||||
|  |   std::vector<int> periodic(_ndimension,1); | ||||||
|  |  | ||||||
|  |   _Nprocessors=1; | ||||||
|  |   _processors = processors; | ||||||
|  |   _processor_coor.resize(_ndimension); | ||||||
|  |    | ||||||
|  |   MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); | ||||||
|  |   MPI_Comm_rank(communicator,&_processor); | ||||||
|  |   MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<_ndimension;i++){ | ||||||
|  |     _Nprocessors*=_processors[i]; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   communicator_halo.resize (2*_ndimension); | ||||||
|  |   for(int i=0;i<_ndimension*2;i++){ | ||||||
|  |     MPI_Comm_dup(communicator,&communicator_halo[i]); | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   int Size;  | ||||||
|  |   MPI_Comm_size(communicator,&Size); | ||||||
|  |    | ||||||
|  |   assert(Size==_Nprocessors); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalSum(uint32_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalSum(uint64_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalSum(float &f){ | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalSumVector(float *f,int N) | ||||||
|  | { | ||||||
|  |   int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalSum(double &d) | ||||||
|  | { | ||||||
|  |   int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::GlobalSumVector(double *d,int N) | ||||||
|  | { | ||||||
|  |   int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) | ||||||
|  | { | ||||||
|  |   int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  | int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) | ||||||
|  | { | ||||||
|  |   int rank; | ||||||
|  |   int ierr=MPI_Cart_rank  (communicator, &coor[0], &rank); | ||||||
|  |   assert(ierr==0); | ||||||
|  |   return rank; | ||||||
|  | } | ||||||
|  | void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) | ||||||
|  | { | ||||||
|  |   coor.resize(_ndimension); | ||||||
|  |   int ierr=MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Basic Halo comms primitive | ||||||
|  | void CartesianCommunicator::SendToRecvFrom(void *xmit, | ||||||
|  | 					   int dest, | ||||||
|  | 					   void *recv, | ||||||
|  | 					   int from, | ||||||
|  | 					   int bytes) | ||||||
|  | { | ||||||
|  |   std::vector<CommsRequest_t> reqs(0); | ||||||
|  |   SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); | ||||||
|  |   SendToRecvFromComplete(reqs); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||||
|  | 					   void *recv, | ||||||
|  | 					   int sender, | ||||||
|  | 					   int receiver, | ||||||
|  | 					   int bytes) | ||||||
|  | { | ||||||
|  |   MPI_Status stat; | ||||||
|  |   assert(sender != receiver); | ||||||
|  |   int tag = sender; | ||||||
|  |   if ( _processor == sender ) { | ||||||
|  |     MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); | ||||||
|  |   } | ||||||
|  |   if ( _processor == receiver ) {  | ||||||
|  |     MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Basic Halo comms primitive | ||||||
|  | void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
|  | 						void *xmit, | ||||||
|  | 						int dest, | ||||||
|  | 						void *recv, | ||||||
|  | 						int from, | ||||||
|  | 						int bytes) | ||||||
|  | { | ||||||
|  |   int myrank = _processor; | ||||||
|  |   int ierr; | ||||||
|  |   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  | ||||||
|  |     MPI_Request xrq; | ||||||
|  |     MPI_Request rrq; | ||||||
|  |  | ||||||
|  |     ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); | ||||||
|  |     ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); | ||||||
|  |      | ||||||
|  |     assert(ierr==0); | ||||||
|  |     list.push_back(xrq); | ||||||
|  |     list.push_back(rrq); | ||||||
|  |   } else {  | ||||||
|  |     // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||||
|  |     ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, | ||||||
|  | 		      recv,bytes,MPI_CHAR,from, from, | ||||||
|  | 		      communicator,MPI_STATUS_IGNORE); | ||||||
|  |     assert(ierr==0); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) | ||||||
|  | { | ||||||
|  |   if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {  | ||||||
|  |     int nreq=list.size(); | ||||||
|  |     std::vector<MPI_Status> status(nreq); | ||||||
|  |     int ierr = MPI_Waitall(nreq,&list[0],&status[0]); | ||||||
|  |     assert(ierr==0); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void CartesianCommunicator::Barrier(void) | ||||||
|  | { | ||||||
|  |   int ierr = MPI_Barrier(communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void CartesianCommunicator::Broadcast(int root,void* data, int bytes) | ||||||
|  | { | ||||||
|  |   int ierr=MPI_Bcast(data, | ||||||
|  | 		     bytes, | ||||||
|  | 		     MPI_BYTE, | ||||||
|  | 		     root, | ||||||
|  | 		     communicator); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|  |   // Should only be used prior to Grid Init finished. | ||||||
|  |   // Check for this? | ||||||
|  |   /////////////////////////////////////////////////////// | ||||||
|  | int CartesianCommunicator::RankWorld(void){  | ||||||
|  |   int r;  | ||||||
|  |   MPI_Comm_rank(communicator_world,&r); | ||||||
|  |   return r; | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | ||||||
|  | { | ||||||
|  |   int ierr= MPI_Bcast(data, | ||||||
|  | 		      bytes, | ||||||
|  | 		      MPI_BYTE, | ||||||
|  | 		      root, | ||||||
|  | 		      communicator_world); | ||||||
|  |   assert(ierr==0); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
|  | 							 void *xmit, | ||||||
|  | 							 int xmit_to_rank, | ||||||
|  | 							 void *recv, | ||||||
|  | 							 int recv_from_rank, | ||||||
|  | 							 int bytes,int dir) | ||||||
|  | { | ||||||
|  |   int myrank = _processor; | ||||||
|  |   int ierr; | ||||||
|  |   assert(dir < communicator_halo.size()); | ||||||
|  |    | ||||||
|  |   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; | ||||||
|  |   // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||||
|  |   MPI_Request req[2]; | ||||||
|  |   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); | ||||||
|  |   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); | ||||||
|  |  | ||||||
|  |   list.push_back(req[0]); | ||||||
|  |   list.push_back(req[1]); | ||||||
|  |   return 2.0*bytes; | ||||||
|  | } | ||||||
|  | void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) | ||||||
|  | {  | ||||||
|  |   int nreq=waitall.size(); | ||||||
|  |   MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); | ||||||
|  | }; | ||||||
|  | double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, | ||||||
|  | 						    int xmit_to_rank, | ||||||
|  | 						    void *recv, | ||||||
|  | 						    int recv_from_rank, | ||||||
|  | 						    int bytes,int dir) | ||||||
|  | { | ||||||
|  |   int myrank = _processor; | ||||||
|  |   int ierr; | ||||||
|  |   assert(dir < communicator_halo.size()); | ||||||
|  |    | ||||||
|  |   //  std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl; | ||||||
|  |   // Give the CPU to MPI immediately; can use threads to overlap optionally | ||||||
|  |   MPI_Request req[2]; | ||||||
|  |   MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]); | ||||||
|  |   MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank  ,myrank        , communicator_halo[dir],&req[0]); | ||||||
|  |   MPI_Waitall(2, req, MPI_STATUSES_IGNORE); | ||||||
|  |   return 2.0*bytes; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
| @@ -59,6 +59,8 @@ void CartesianCommunicator::GlobalSum(double &){} | |||||||
| void CartesianCommunicator::GlobalSum(uint32_t &){} | void CartesianCommunicator::GlobalSum(uint32_t &){} | ||||||
| void CartesianCommunicator::GlobalSum(uint64_t &){} | void CartesianCommunicator::GlobalSum(uint64_t &){} | ||||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint32_t &){} | ||||||
|  | void CartesianCommunicator::GlobalXOR(uint64_t &){} | ||||||
|  |  | ||||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||||
| 					   void *recv, | 					   void *recv, | ||||||
|   | |||||||
| @@ -42,7 +42,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/cshift/Cshift_mpi.h> | #include <Grid/cshift/Cshift_mpi.h> | ||||||
| #endif  | #endif  | ||||||
|  |  | ||||||
| #ifdef GRID_COMMS_MPI3L | #ifdef GRID_COMMS_MPIT | ||||||
| #include <Grid/cshift/Cshift_mpi.h> | #include <Grid/cshift/Cshift_mpi.h> | ||||||
| #endif  | #endif  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -30,21 +30,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| class SimpleCompressor { |  | ||||||
| public: |  | ||||||
|   void Point(int) {}; |  | ||||||
|  |  | ||||||
|   vobj operator() (const vobj &arg) { |  | ||||||
|     return arg; |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| // Gather for when there is no need to SIMD split with compression | // Gather for when there is no need to SIMD split  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| template<class vobj,class cobj,class compressor> void  | template<class vobj> void  | ||||||
| Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) | Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0) | ||||||
| { | { | ||||||
|   int rd = rhs._grid->_rdimensions[dimension]; |   int rd = rhs._grid->_rdimensions[dimension]; | ||||||
|  |  | ||||||
| @@ -62,7 +52,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen | |||||||
|       for(int b=0;b<e2;b++){ |       for(int b=0;b<e2;b++){ | ||||||
| 	int o  = n*stride; | 	int o  = n*stride; | ||||||
| 	int bo = n*e2; | 	int bo = n*e2; | ||||||
| 	buffer[off+bo+b]=compress(rhs._odata[so+o+b]); | 	buffer[off+bo+b]=rhs._odata[so+o+b]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } else {  |   } else {  | ||||||
| @@ -78,17 +68,16 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen | |||||||
|        } |        } | ||||||
|      } |      } | ||||||
|      parallel_for(int i=0;i<table.size();i++){ |      parallel_for(int i=0;i<table.size();i++){ | ||||||
|        buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]); |        buffer[off+table[i].first]=rhs._odata[so+table[i].second]; | ||||||
|      } |      } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| // Gather for when there *is* need to SIMD split with compression | // Gather for when there *is* need to SIMD split  | ||||||
| /////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////// | ||||||
| template<class cobj,class vobj,class compressor> void  | template<class vobj> void  | ||||||
| Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_object *> pointers,int dimension,int plane,int cbmask,compressor &compress) | Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask) | ||||||
| { | { | ||||||
|   int rd = rhs._grid->_rdimensions[dimension]; |   int rd = rhs._grid->_rdimensions[dimension]; | ||||||
|  |  | ||||||
| @@ -109,8 +98,8 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_ | |||||||
| 	int o      =   n*n1; | 	int o      =   n*n1; | ||||||
| 	int offset = b+n*e2; | 	int offset = b+n*e2; | ||||||
| 	 | 	 | ||||||
| 	cobj temp =compress(rhs._odata[so+o+b]); | 	vobj temp =rhs._odata[so+o+b]; | ||||||
| 	extract<cobj>(temp,pointers,offset); | 	extract<vobj>(temp,pointers,offset); | ||||||
|  |  | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
| @@ -127,32 +116,14 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_ | |||||||
| 	int offset = b+n*e2; | 	int offset = b+n*e2; | ||||||
|  |  | ||||||
| 	if ( ocb & cbmask ) { | 	if ( ocb & cbmask ) { | ||||||
| 	  cobj temp =compress(rhs._odata[so+o+b]); | 	  vobj temp =rhs._odata[so+o+b]; | ||||||
| 	  extract<cobj>(temp,pointers,offset); | 	  extract<vobj>(temp,pointers,offset); | ||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| // Gather for when there is no need to SIMD split |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask) |  | ||||||
| { |  | ||||||
|   SimpleCompressor<vobj> dontcompress; |  | ||||||
|   Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| // Gather for when there *is* need to SIMD split |  | ||||||
| ////////////////////////////////////////////////////// |  | ||||||
| template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask) |  | ||||||
| { |  | ||||||
|   SimpleCompressor<vobj> dontcompress; |  | ||||||
|   Gather_plane_extract<vobj,vobj,decltype(dontcompress)>(rhs,pointers,dimension,plane,cbmask,dontcompress); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
| // Scatter for when there is no need to SIMD split | // Scatter for when there is no need to SIMD split | ||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
| @@ -200,7 +171,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo | |||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
| // Scatter for when there *is* need to SIMD split | // Scatter for when there *is* need to SIMD split | ||||||
| ////////////////////////////////////////////////////// | ////////////////////////////////////////////////////// | ||||||
|  template<class vobj,class cobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<cobj *> pointers,int dimension,int plane,int cbmask) | template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask) | ||||||
| { | { | ||||||
|   int rd = rhs._grid->_rdimensions[dimension]; |   int rd = rhs._grid->_rdimensions[dimension]; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -154,13 +154,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r | |||||||
| 			   recv_from_rank, | 			   recv_from_rank, | ||||||
| 			   bytes); | 			   bytes); | ||||||
|       grid->Barrier(); |       grid->Barrier(); | ||||||
|       /* |  | ||||||
|       for(int i=0;i<send_buf.size();i++){ |  | ||||||
| 	assert(recv_buf.size()==buffer_size); |  | ||||||
| 	assert(send_buf.size()==buffer_size); |  | ||||||
| 	std::cout << "SendRecv_Cshift_comms ["<<i<<" "<< dimension<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << "  0x" << cbmask<<std::endl; |  | ||||||
|       } |  | ||||||
|       */ |  | ||||||
|       Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask); |       Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -246,13 +240,6 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo | |||||||
| 			     (void *)&recv_buf_extract[i][0], | 			     (void *)&recv_buf_extract[i][0], | ||||||
| 			     recv_from_rank, | 			     recv_from_rank, | ||||||
| 			     bytes); | 			     bytes); | ||||||
| 	/* |  | ||||||
| 	for(int w=0;w<recv_buf_extract[i].size();w++){ |  | ||||||
| 	  assert(recv_buf_extract[i].size()==buffer_size); |  | ||||||
| 	  assert(send_buf_extract[i].size()==buffer_size); |  | ||||||
| 	  std::cout << "SendRecv_Cshift_comms ["<<w<<" "<< dimension<<"] recv "<<recv_buf_extract[i][w]<<" send " << send_buf_extract[nbr_lane][w]  << cbmask<<std::endl; |  | ||||||
| 	} |  | ||||||
| 	*/	 |  | ||||||
| 	grid->Barrier(); | 	grid->Barrier(); | ||||||
| 	rpointers[i] = &recv_buf_extract[i][0]; | 	rpointers[i] = &recv_buf_extract[i][0]; | ||||||
|       } else {  |       } else {  | ||||||
|   | |||||||
							
								
								
									
										12276
									
								
								lib/json/json.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12276
									
								
								lib/json/json.hpp
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user