mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-27 01:59:33 +00:00 
			
		
		
		
	Compare commits
	
		
			761 Commits
		
	
	
		
			feature/si
			...
			feature/la
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 27ea2afe86 | ||
|  | 78e8704eac | ||
|  | 67131d82f2 | ||
|  | 615a9448b9 | ||
|  | 00164f5ce5 | ||
|  | a7f72eb994 | ||
|  | 501fa1614a | ||
|  | 5bf42e1e15 | ||
|  | fe4d9b003c | ||
|  | 4a699b4da3 | ||
|  | 689323f4ee | ||
|  | 84b441800f | ||
|  | 1ef424b139 | ||
|  | aa66f41c69 | ||
|  | f96c800d25 | ||
|  | 32a52d7583 | ||
|  | fa04b6d3c2 | ||
|  | 7fab183c0e | ||
|  | 9ec9850bdb | ||
|  | 0c4ddaea0b | ||
|  | 00ebc150ad | ||
|  | 0f3e9ae57d | ||
|  | 034de160bf | ||
|  | 14507fd6e4 | ||
|  | 2db05ac214 | ||
|  | 31f99574fa | ||
|  | a34c8a2961 | ||
|  | ccd20df827 | ||
|  | e9be293444 | ||
|  | d577211cc3 | ||
|  | f4336e480a | ||
|  | e4d461cb03 | ||
|  | 3d63b4894e | ||
|  | 08583afaff | ||
|  | b395a312af | ||
|  | 66295b99aa | ||
|  | b8654be0ef | ||
|  | a479325349 | ||
|  | f6c3f6bf2d | ||
|  | d83868fdbb | ||
|  | 303e0b927d | ||
|  | 28ba8a0f48 | ||
|  | f9e28577f3 | ||
|  | 8a3aae98f6 | ||
|  | 8309f2364b | ||
|  | cac1750078 | ||
|  | 27936900e6 | ||
|  | e325929851 | ||
|  | 47af3565f4 | ||
|  | 4b4d187935 | ||
|  | 9aff354ab5 | ||
|  | cb9ff20249 | ||
|  | 9fe6ac71ea | ||
|  | f1fa00b71b | ||
|  | bf58557fb1 | ||
|  | 10cb37f504 | ||
|  | 1374c943d4 | ||
|  | a1d80282ec | ||
|  | 4eb8bbbebe | ||
|  | d1c6288c5f | ||
|  | dd949bc428 | ||
|  | bb7378cfc3 | ||
|  | f0e084a88c | ||
|  | 153672d8ec | ||
|  | 08ca338875 | ||
|  | f7cbf82c04 | ||
|  | 07009c569a | ||
|  | 09f4cdb11e | ||
|  | 1e54882f71 | ||
|  | d54807b8c0 | ||
|  | 5625b47c7d | ||
|  | 1edcf902b7 | ||
|  | e5c19e1fd7 | ||
|  | a11d0a33d1 | ||
|  | 4f8b6f26b4 | ||
|  | 073525c5b3 | ||
|  | eb6153080a | ||
|  | f7072d1ac2 | ||
|  | fddeb29d6b | ||
|  | a9ec5cf564 | ||
|  | 946a8671b9 | ||
|  | a6eeea777b | ||
|  | 771a1b8e79 | ||
|  | bfb68e6f02 | ||
|  | 77f7737ccc | ||
|  | 18c335198a | ||
|  | f9df685cde | ||
|  | 17c5b0f152 | ||
|  | 5918769f97 | ||
|  | bbaf1ada91 | ||
|  | 1950ac9294 | ||
|  | 13fa70ac1a | ||
|  | 7cb2b11f26 | ||
|  | 1184ed29ae | ||
|  | 203c7bf6fa | ||
|  | c709883f3f | ||
|  | aed5de4d50 | ||
|  | ba27cc6571 | ||
|  | d856327250 | ||
|  | d75369cb56 | ||
|  | bf973d0d56 | ||
|  | 837bf8a5be | ||
|  | c05b2199f6 | ||
|  | a5fe07c077 | ||
|  | b83b2b1415 | ||
|  | b331be9101 | ||
|  | 49c20a9fa8 | ||
|  | 7359df3501 | ||
|  | 59bd1fe21b | ||
|  | 4e907fef2c | ||
|  | 67888b657f | ||
|  | 74af885d4e | ||
|  | d36d2fb40d | ||
|  | 5b9267e88d | ||
|  | 15fd4003ef | ||
|  | 4b4c2a715b | ||
|  | 54a5e6c1d0 | ||
|  | 73aeca7dea | ||
|  | ad89abb018 | ||
|  | 80c5bce5bb | ||
|  | f68b5de9c8 | ||
|  | d0f3d525d5 | ||
|  | f365a83fae | ||
|  | 3a58217405 | ||
|  | c289699d9a | ||
|  | c3b1263e75 | ||
|  | 34a9aeb331 | ||
| 102ea9ae66 | |||
|  | 5fa386ddc9 | ||
|  | edabb3577f | ||
|  | ce5df177ee | ||
|  | a0bb8e5b46 | ||
|  | 46f88e6d72 | ||
|  | dd8f1ea189 | ||
|  | b61835c1a5 | ||
|  | d9cd4f0273 | ||
|  | 459f70e8d4 | ||
|  | 061e48fd73 | ||
|  | ab50145001 | ||
|  | b49bec0cec | ||
|  | ae56e556c6 | ||
|  | 1cdf999668 | ||
|  | 11062fb686 | ||
|  | 383ca7d392 | ||
|  | a446d95c33 | ||
|  | be66e7dd95 | ||
|  | 6d0d064a6c | ||
|  | bfef525ed2 | ||
|  | 0b0cf62193 | ||
|  | 7d88198387 | ||
|  | 2f619482b8 | ||
|  | d6472eda8d | ||
|  | 9e658de238 | ||
|  | bcefdd7c4e | ||
|  | 9d45fca8bc | ||
|  | ac9e6b63c0 | ||
|  | e140b3f802 | ||
|  | d9d3d30cc7 | ||
|  | 47a12ec7b5 | ||
|  | ec1e2f7a40 | ||
|  | 41f73ec083 | ||
|  | fd367d8bfd | ||
|  | 6d0786ff9d | ||
|  | b7f93aeb4d | ||
|  | 202a7fe900 | ||
|  | 8a3fe60a27 | ||
|  | 44051aecd1 | ||
|  | 06e6f8de00 | ||
|  | dbe4d7850c | ||
|  | 4fe182e5a7 | ||
|  | 175f393f9d | ||
|  | 7d867a8134 | ||
|  | 9939b267d2 | ||
|  | 14d53e1c9e | ||
|  | 8bd869da37 | ||
|  | c7036f6717 | ||
|  | c0485d799d | ||
|  | 7abc5613bd | ||
|  | 237cfd11ab | ||
|  | a4b7dddb67 | ||
|  | 5696781862 | ||
|  | 8f4b3049cd | ||
|  | 2a6e673a91 | ||
|  | 9b6cde173f | ||
|  | 9f280b82c4 | ||
| c3f0889eda | |||
|  | 7a53dc3715 | ||
|  | 0f214ad427 | ||
|  | fe4912880d | ||
|  | f038c6babe | ||
|  | 169f4b2711 | ||
|  | 2d8aff36fe | ||
|  | 9fa07eecde | ||
|  | 659d7d1a40 | ||
|  | f64fb7bd77 | ||
|  | 2a35449b91 | ||
|  | 184af5bd05 | ||
|  | 097c9637ee | ||
|  | dc6f078246 | ||
|  | 8a4714a4a6 | ||
|  | 40e119c61c | ||
|  | d9593c4b81 | ||
|  | ac740f73ce | ||
|  | 75dc7794b9 | ||
|  | dee68fc728 | ||
|  | a2d3643634 | ||
|  | 57002924bc | ||
|  | 7b0237b081 | ||
|  | b68ad0cc0b | ||
|  | 37263fd9b1 | ||
|  | 3d09e3e9e0 | ||
|  | 1354b46338 | ||
|  | 251a97fe1b | ||
|  | e18929eaa0 | ||
|  | f3b0a92e71 | ||
|  | a0be3f7330 | ||
|  | b5a6e4f1fd | ||
|  | 7a788db3dc | ||
|  | f20eceb6cd | ||
|  | 38325ebbc6 | ||
|  | b73bd151bb | ||
|  | 694b305cab | ||
|  | 2d3737a133 | ||
|  | ac1f1838bc | ||
|  | 09d09d0fe5 | ||
|  | bf630a6821 | ||
|  | 8859a151cc | ||
|  | 688a39cfd9 | ||
|  | 6f5a5cd9b3 | ||
|  | 0933aeefd4 | ||
|  | 322f61acee | ||
|  | 08e04b9676 | ||
| feaa2ac947 | |||
| 07de925127 | |||
|  | a9c816a268 | ||
|  | e43a8b6b8a | ||
|  | bf729766dd | ||
|  | dafb351d38 | ||
| 0b707b861c | |||
| 15e87a4607 | |||
| 7d7220cbd7 | |||
|  | 54e94360ad | ||
| 0af740dc15 | |||
| d2e8372df3 | |||
|  | 869b99ec1e | ||
|  | 4a29ab0d0a | ||
|  | 0165bcb58e | ||
| 4372d04ad4 | |||
|  | 349d75e483 | ||
|  | 56abbdf4c2 | ||
|  | af71c63f4c | ||
|  | e51475703a | ||
|  | 1feddf4ba6 | ||
|  | 600d7ddc2e | ||
|  | e504260f3d | ||
|  | 0440d4ce66 | ||
|  | 5e4bea8f20 | ||
|  | 6ebf9f15b7 | ||
|  | 1d7aa673a4 | ||
|  | b9104f3072 | ||
| b22eab8c8b | |||
|  | a7d56523ab | ||
|  | 9e56c65730 | ||
|  | ef4f2b8c41 | ||
|  | e8b95bd35b | ||
|  | 7e35286860 | ||
|  | 0486ff8e79 | ||
| 1e8a2e1621 | |||
| 7587df831a | |||
|  | e9cc21900f | ||
|  | 0a8faac271 | ||
|  | abc4de0fd2 | ||
| b672717096 | |||
| 284ee194b1 | |||
|  | cfe3cd76d1 | ||
|  | 3fa5e3109f | ||
|  | 8b7049f737 | ||
|  | c85024683e | ||
|  | 1300b0b04b | ||
|  | e6d984b484 | ||
|  | 1d18d95d4f | ||
|  | ae39ec85a3 | ||
|  | b96daf53a0 | ||
|  | 46879e1658 | ||
|  | ae4de94798 | ||
|  | 0ab555b4f5 | ||
|  | 8e9be9f84f | ||
|  | d572170170 | ||
| 81b18f843a | |||
|  | a833f88c32 | ||
|  | 07b2c1b253 | ||
|  | 735cbdb983 | ||
|  | 2ad54c5a02 | ||
|  | 12ccc73cf5 | ||
|  | 3d04dc33c6 | ||
|  | e7564f8330 | ||
|  | 91199a8ea0 | ||
|  | 0494feec98 | ||
|  | a16b1e134e | ||
|  | 769ad578f5 | ||
|  | eaac0044b5 | ||
|  | 56042f002c | ||
|  | 3bfd1f13e6 | ||
|  | 70ab598c96 | ||
|  | 1d0ca65e28 | ||
|  | 2bc4d0a20e | ||
| 2490816297 | |||
| 5f55bca378 | |||
| f6aa82b7f2 | |||
| 22749699a3 | |||
| 0503c028be | |||
|  | 092dcd4e04 | ||
|  | 4a8c4ccfba | ||
|  | 9b44189d5a | ||
|  | 7da4856e8e | ||
|  | aaf1e33a77 | ||
|  | 094c3d091a | ||
|  | 4b98e524a0 | ||
|  | 1a1f6d55f9 | ||
|  | 21421656ab | ||
|  | 6f687a67cd | ||
|  | b30754e762 | ||
|  | 1e429a0d57 | ||
|  | d38a4de36c | ||
|  | ef1b7db374 | ||
|  | 53a9aeb965 | ||
|  | e30fa9f4b8 | ||
|  | 58e8d0a10d | ||
|  | 62cf9cf638 | ||
|  | 0fb458879d | ||
|  | 725c513d94 | ||
| d8648307ff | |||
| 064315c00b | |||
|  | 7c6cc85df6 | ||
|  | a6691ef87c | ||
|  | 8e0ced627a | ||
|  | 0de314870d | ||
|  | ffb91e53d2 | ||
|  | f4e8bf2858 | ||
| a74c34315c | |||
|  | 69470ccc10 | ||
|  | b8b5934193 | ||
|  | 75856f2945 | ||
|  | 3c112a7a25 | ||
|  | ab3596d4d3 | ||
|  | a8c10b1933 | ||
|  | 15e801af3f | ||
|  | 0ffc235741 | ||
|  | 8e19c99c7d | ||
|  | a0bc0ad06f | ||
|  | a8fb2835ca | ||
|  | bc862ce3ab | ||
| 22f4feee7b | |||
| 3f858d6755 | |||
|  | 3267683e22 | ||
|  | f46a67ffb3 | ||
|  | f7b8383ef5 | ||
|  | 10f2872aae | ||
| 35fa3d1dfd | |||
|  | cd73897b8d | ||
|  | c4435e6beb | ||
|  | 7a8f6af5f8 | ||
|  | 49a5d9bac7 | ||
|  | 2b3fdd4a58 | ||
|  | 34502ec471 | ||
|  | 8a43e88b4f | ||
| d1ece74137 | |||
|  | 238df20370 | ||
|  | 97a32a6145 | ||
|  | 655492a443 | ||
|  | 1cab06f6bd | ||
| 43c817cc67 | |||
|  | f8024c262b | ||
|  | 4cc5f01f4a | ||
|  | 9c12c37aaf | ||
|  | 806eaa0530 | ||
|  | 01d0e54594 | ||
|  | 5aafa335fe | ||
|  | 8ba0494485 | ||
|  | d99d98d9fd | ||
|  | 95a017a4ae | ||
|  | 92f92379e6 | ||
|  | 529e78d43f | ||
|  | 4ec746d262 | ||
|  | 51bf1501fc | ||
|  | 66d819c054 | ||
|  | 3f3686f869 | ||
|  | 26bb829f8c | ||
|  | 67cb04fc66 | ||
|  | a40bd68aed | ||
|  | 36495e0fd2 | ||
|  | 93f6c15772 | ||
|  | cb93eeff21 | ||
|  | c7cc7e6101 | ||
|  | c349aa6511 | ||
|  | 3bae0a2d5c | ||
|  | c1c7566089 | ||
|  | 2439999ec8 | ||
|  | 1d96f662e3 | ||
|  | 41d1889941 | ||
|  | 0c3981e0c3 | ||
|  | c727bd4609 | ||
|  | db23749b67 | ||
|  | 751f2b9703 | ||
|  | 741bc836f6 | ||
|  | 697c0603ce | ||
|  | 14bedebb11 | ||
|  | 8546d01a4c | ||
|  | 47b5c07ffb | ||
|  | da86a2bf54 | ||
|  | c1cb60a0b3 | ||
|  | 5ed5b4bfbf | ||
|  | de84aacdfd | ||
|  | 2888003765 | ||
|  | da06bf5b95 | ||
|  | 20999c1370 | ||
|  | 33f0ed1a33 | ||
|  | 50be56433b | ||
|  | 43924007db | ||
|  | 78ef10e60f | ||
| 679ae98b14 | |||
|  | 90f6bc16bb | ||
|  | 9b5b639546 | ||
|  | 945767c6d8 | ||
|  | 422cdf4979 | ||
|  | 38db174f3b | ||
|  | 92e364a35f | ||
| 58299b8ba2 | |||
| 124bf4d829 | |||
| e8e56b3414 | |||
| 89c430136d | |||
| ea9aef7baa | |||
| c9e9e8061d | |||
|  | 453cf2a1c6 | ||
|  | de7bbfa5f9 | ||
| dda8d77c87 | |||
| aa29f4346a | |||
|  | 86116dbed6 | ||
|  | 7bd31e3f7c | ||
|  | 74f451715f | ||
|  | 655be8ed76 | ||
|  | 4063238943 | ||
|  | 3344788fa1 | ||
|  | 99220f6531 | ||
|  | 2a6d093749 | ||
|  | c947947fad | ||
|  | f555b50547 | ||
|  | 738c1a11c2 | ||
|  | f8797e1e3e | ||
|  | fd1eb7de13 | ||
|  | 2ce898efa3 | ||
|  | ab66bac4e6 | ||
|  | 56277a11c8 | ||
|  | 916e9e1d3e | ||
|  | 5b55867a7a | ||
|  | 3accb1ef89 | ||
|  | e3d0e31525 | ||
|  | 5812eb8a8c | ||
|  | 4dd3763294 | ||
|  | c429ace748 | ||
|  | ac58565d0a | ||
|  | 3703b718aa | ||
|  | b722889234 | ||
|  | abba44a837 | ||
|  | f301be94ce | ||
|  | 1d1b225497 | ||
|  | 53a785a3dd | ||
|  | 736bf3c866 | ||
|  | b9bbe5d188 | ||
|  | 3844bcf800 | ||
|  | e1a2319d01 | ||
|  | 180c732b4c | ||
|  | 957a706d0b | ||
|  | d2312e9874 | ||
|  | fc4ab9ccd5 | ||
|  | 4a340aa5ca | ||
|  | 3b7de792d5 | ||
|  | 557c3fa109 | ||
|  | ec18e9f7f6 | ||
|  | a839d5bc55 | ||
|  | de41b84c5c | ||
|  | 8e161152e4 | ||
|  | 3141ebac10 | ||
|  | 7ede696126 | ||
|  | bf516c3b81 | ||
|  | 441a52ee5d | ||
|  | a8db024c92 | ||
|  | a9c22d5f43 | ||
|  | 3ca41458a3 | ||
|  | 9e2d29c644 | ||
|  | 951be75292 | ||
|  | b9113ed310 | ||
| 1407418755 | |||
| a6a0da873f | |||
|  | 42fb49d3fd | ||
|  | 2a54c9aaab | ||
|  | 0957378679 | ||
|  | 2ed6c76fc5 | ||
|  | d3b9a7fa14 | ||
|  | 75ea306ce9 | ||
|  | 4226c633c4 | ||
|  | 5a4eafbf7e | ||
|  | eb8e26018b | ||
|  | db5ea001a3 | ||
|  | 2846f079e5 | ||
|  | 1d502e4ed6 | ||
|  | 73cdf0fffe | ||
|  | 1c25773319 | ||
|  | c38400b26f | ||
|  | 9c3065b860 | ||
|  | 94eb829d08 | ||
|  | 68392ddb5b | ||
|  | cb6b81ae82 | ||
| 90ec6eda0c | |||
| fe8d625694 | |||
| 53e76b41d2 | |||
| 8ef4300412 | |||
| 98a24ebf31 | |||
|  | b12dc89d26 | ||
|  | d80d802f9d | ||
|  | 3d99b09dba | ||
|  | db5f6d3ae3 | ||
|  | 683550f116 | ||
|  | 5e477ec553 | ||
|  | 55d0329624 | ||
|  | 86aaa35294 | ||
|  | 172d3dc93a | ||
|  | 7b03d8d087 | ||
|  | 4b759b8f2a | ||
|  | 8c540333d5 | ||
|  | 5592f7b8c1 | ||
|  | 35da4ece0b | ||
|  | 061b15b9e9 | ||
| ff4e54ef80 | |||
|  | cd1bd921bd | ||
|  | fff5751b1a | ||
|  | 2c81696fdd | ||
|  | c9dc22efa1 | ||
|  | 0ab04a000f | ||
|  | 4c1ea8677e | ||
|  | 120fb59978 | ||
|  | fd56b3ff38 | ||
|  | 0ec6829edc | ||
|  | 18b7845b7b | ||
|  | 3d0fe15374 | ||
|  | 91886068fe | ||
|  | 6d1e9e5f92 | ||
|  | b640230b1e | ||
|  | 038b6ee9cd | ||
|  | 38806343a8 | ||
|  | 831ca4e3bf | ||
|  | b3dede4dd3 | ||
|  | 4e34132f4d | ||
|  | c07cb10247 | ||
|  | d7767a2a62 | ||
|  | ec035983fd | ||
|  | 596dcd85b2 | ||
|  | 7270c6a150 | ||
|  | f8b9ad7d50 | ||
|  | 04a1959895 | ||
|  | 93cc270016 | ||
|  | 29b60f7e1a | ||
|  | 902afcfbaf | ||
|  | 97a6b61551 | ||
|  | f011bdb869 | ||
|  | bafb101e4f | ||
|  | 08fdf05528 | ||
|  | 9e72a6b22e | ||
|  | 1c12c5612c | ||
|  | a8193c4bcb | ||
|  | c3d7ec65fa | ||
|  | 8b6a6c8236 | ||
|  | e0571c872b | ||
|  | c67f41887b | ||
|  | 84687ccf1f | ||
|  | 3274561cf8 | ||
| e08fbb3771 | |||
|  | d7464aa0fe | ||
|  | 00d29153f0 | ||
| 2ce989f220 | |||
|  | d7a1dc85be | ||
|  | fc19503673 | ||
|  | beba824136 | ||
|  | 6ebf8b12b6 | ||
|  | e5a7ed4362 | ||
|  | b9f7ea47c3 | ||
|  | 06f7ee202e | ||
|  | 2b2fc6453f | ||
|  | bdd2765461 | ||
|  | 4a45c06dd7 | ||
|  | d6a7d7d1e0 | ||
|  | 1a122a0dd8 | ||
|  | 20e20733e8 | ||
|  | b7cd1a19e3 | ||
|  | f510002a62 | ||
| eedcaf6470 | |||
|  | 1e257a1251 | ||
|  | 522f6bf91a | ||
|  | d35d87d2c2 | ||
|  | 74a5cda84b | ||
|  | 5be05d85b8 | ||
|  | 35ac85aea8 | ||
|  | fa237401ff | ||
|  | 97053adcb5 | ||
|  | f8fbe4d7a3 | ||
|  | ef31c012bf | ||
|  | 9e9f621d5d | ||
|  | 651e1a7cbc | ||
|  | c4d3672720 | ||
|  | 16be6d378c | ||
|  | f05d0565aa | ||
| b39f0d1fb6 | |||
| 9f1267dfe6 | |||
| 2e90285232 | |||
| e254de982e | |||
| 28d99b5297 | |||
|  | 9bf4108d1f | ||
|  | ee93f0218b | ||
|  | 6929a84c70 | ||
|  | 5c779a789b | ||
| 161ed102a5 | |||
|  | e863a948e3 | ||
|  | f65a585236 | ||
|  | 977f34dca6 | ||
|  | 90ad956340 | ||
|  | 7996f06335 | ||
|  | 7b40a3e3e5 | ||
|  | f7fbbaaca3 | ||
|  | 17629b8d9e | ||
|  | 0baa20d292 | ||
|  | 4571c918a4 | ||
|  | 5251ea4d30 | ||
|  | 7f456b4173 | ||
|  | ae99e99da2 | ||
|  | c291ef77b5 | ||
|  | 7dd2764bb2 | ||
|  | 244f8fb6dc | ||
| f3ca29af6c | |||
| 37988221a8 | |||
|  | 27dfe816fa | ||
|  | af29be2c90 | ||
|  | f96fac0aee | ||
| 7a327a3f28 | |||
|  | 07f2ebea1b | ||
|  | 851f2ad8ef | ||
|  | 23e0561dd6 | ||
|  | 8ae1a95ec6 | ||
|  | 82b7d4eaf0 | ||
|  | 78774fbdc0 | ||
|  | 924130833e | ||
|  | 0157274762 | ||
|  | 87e8aad5a0 | ||
|  | c6f59c2933 | ||
|  | b7f90aa011 | ||
| 92f8950a56 | |||
| 65987a8a58 | |||
| 889d828bc2 | |||
|  | f22b79da8f | ||
|  | 3855673ebf | ||
|  | 4db82da0db | ||
|  | 0cdc3d2fa5 | ||
| ad98b6193d | |||
| fc760016b3 | |||
| 2da86f7dae | |||
|  | 0dfda4bb90 | ||
|  | 1189ebc8b5 | ||
| 97843e2b58 | |||
| 82b3f54697 | |||
|  | 1bb8578173 | ||
| 673994b281 | |||
| bbc0eff078 | |||
| 4c60e31070 | |||
| afbf7d4c37 | |||
| 8c3cc32364 | |||
|  | 5214846341 | ||
| 4c3fd9fa3f | |||
| 17b3a10d46 | |||
| 149a46b92c | |||
|  | ce1a115e0b | ||
| db9c28a773 | |||
| 9ac3ac41df | |||
| 2af9ab9034 | |||
| 6f1ea96293 | |||
| 2e3c5890b6 | |||
| bc6678732f | |||
| b10ae00c8a | |||
|  | 0cd6b1858c | ||
|  | 0bd296dda4 | ||
|  | af0ccdd8e9 | ||
|  | 2fb92dbc6e | ||
|  | 5c74b6028b | ||
|  | e0be2b6e6c | ||
|  | ef72f322d2 | ||
|  | 7bc2065113 | ||
|  | 2bd4233919 | ||
|  | 143c70e29f | ||
|  | b812d5e39c | ||
|  | 01480da0a8 | ||
|  | 6ad73145bc | ||
| f7293f2ddb | |||
|  | 62749d05a6 | ||
|  | 3834feb4b7 | ||
|  | 6b8ee7bae0 | ||
|  | 739c2308b5 | ||
|  | 454302414d | ||
|  | a71b69389b | ||
|  | d49e502f53 | ||
|  | 92ec3404f8 | ||
|  | f4ebea3381 | ||
|  | cf167d0cd1 | ||
|  | 6f8b771a37 | ||
|  | 4e1ffdd17c | ||
|  | a783282b8b | ||
|  | 19b85d8486 | ||
|  | c363bdd784 | ||
|  | c30d96ea50 | ||
|  | 7ffe17ada1 | ||
| 330a9b3f4c | |||
|  | 28ff66a381 | ||
|  | 78c7bcee36 | ||
| 00a7b95631 | |||
| 94d8321d01 | |||
|  | ac24cc9f99 | ||
|  | 1d666771f9 | ||
|  | d50055cd96 | ||
|  | 3ab4c8c0bb | ||
|  | 47c7159177 | ||
|  | f415db583a | ||
|  | f55c16f984 | ||
|  | df67e013ca | ||
|  | 3e990c9d0a | ||
|  | 4b740fc8fd | ||
|  | cccd14b09e | ||
|  | e6acffdfc2 | ||
| 26d124283e | |||
| 0d889b7041 | |||
| ab31ad006a | |||
|  | 392130a537 | ||
|  | deef2673b2 | ||
|  | 977b0a6dd9 | ||
|  | 977d844394 | ||
| 6e4a06e180 | |||
|  | 590675e2ca | ||
|  | 8c65bdf6d3 | ||
|  | 74f1ed3bc5 | ||
|  | 79270ef510 | ||
|  | e250e6b7bb | ||
|  | 261342c15f | ||
|  | eda4dd622e | ||
|  | c68a2b9637 | ||
|  | 293df6cd20 | ||
|  | 65f61bb3bf | ||
|  | 26b9740d53 | ||
|  | 6eb873dd96 | ||
|  | 11b4c80b27 | ||
|  | c065e454c3 | ||
|  | d9b5fbd374 | ||
|  | cfbc1a26b8 | ||
|  | 257f69f931 | ||
|  | e415260961 | ||
|  | 446c768cd3 | 
							
								
								
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -92,6 +92,7 @@ build*/* | ||||
| ##################### | ||||
| *.xcodeproj/* | ||||
| build.sh | ||||
| .vscode | ||||
|  | ||||
| # Eigen source # | ||||
| ################ | ||||
| @@ -106,6 +107,10 @@ lib/fftw/* | ||||
| m4/lt* | ||||
| m4/libtool.m4 | ||||
|  | ||||
| # github pages # | ||||
| ################ | ||||
| gh-pages/ | ||||
|  | ||||
| # Buck files # | ||||
| ############## | ||||
| .buck* | ||||
| @@ -116,4 +121,5 @@ make-bin-BUCK.sh | ||||
| # generated sources # | ||||
| ##################### | ||||
| lib/qcd/spin/gamma-gen/*.h | ||||
| lib/qcd/spin/gamma-gen/*.cc | ||||
| lib/qcd/spin/gamma-gen/*.cc | ||||
|  | ||||
|   | ||||
							
								
								
									
										76
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										76
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -7,64 +7,8 @@ cache: | ||||
| matrix: | ||||
|   include: | ||||
|     - os:        osx | ||||
|       osx_image: xcode7.2 | ||||
|       osx_image: xcode8.3 | ||||
|       compiler: clang | ||||
|     - compiler: gcc | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-4.9 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-4.9 | ||||
|     - compiler: gcc | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-5 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: VERSION=-5 | ||||
|     - compiler: clang | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-4.8 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||
|     - compiler: clang | ||||
|       addons: | ||||
|         apt: | ||||
|           sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|           packages: | ||||
|             - g++-4.8 | ||||
|             - libmpfr-dev | ||||
|             - libgmp-dev | ||||
|             - libmpc-dev | ||||
|             - libopenmpi-dev | ||||
|             - openmpi-bin | ||||
|             - binutils-dev | ||||
|       env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz | ||||
|        | ||||
| before_install: | ||||
|     - export GRIDDIR=`pwd` | ||||
| @@ -73,13 +17,15 @@ before_install: | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi | ||||
|      | ||||
| install: | ||||
|     - export CC=$CC$VERSION | ||||
|     - export CXX=$CXX$VERSION | ||||
|     - echo $PATH | ||||
|     - which autoconf | ||||
|     - autoconf  --version | ||||
|     - which automake | ||||
|     - automake  --version | ||||
|     - which $CC | ||||
|     - $CC  --version | ||||
|     - which $CXX | ||||
| @@ -92,15 +38,9 @@ script: | ||||
|     - cd build | ||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none | ||||
|     - make -j4  | ||||
|     - ./benchmarks/Benchmark_dwf --threads 1 | ||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||
|     - echo make clean | ||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||
|     - make -j4 | ||||
|     - ./benchmarks/Benchmark_dwf --threads 1 | ||||
|     - echo make clean | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi | ||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto | ||||
|     - make -j4 | ||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||
|  | ||||
|  | ||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals | ||||
|     - make check | ||||
|   | ||||
							
								
								
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								Makefile.am
									
									
									
									
									
								
							| @@ -3,10 +3,15 @@ SUBDIRS = lib benchmarks tests extras | ||||
|  | ||||
| include $(top_srcdir)/doxygen.inc | ||||
|  | ||||
| tests: all | ||||
| 	$(MAKE) -C tests tests | ||||
| bin_SCRIPTS=grid-config | ||||
|  | ||||
| .PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) | ||||
|  | ||||
| .PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) | ||||
|  | ||||
| tests-local: all | ||||
| bench-local: all | ||||
| check-local: all | ||||
|  | ||||
| AM_CXXFLAGS += -I$(top_builddir)/include | ||||
|  | ||||
| ACLOCAL_AMFLAGS = -I m4 | ||||
|   | ||||
							
								
								
									
										302
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										302
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,41 +1,13 @@ | ||||
| # Grid | ||||
| <table> | ||||
| <tr> | ||||
|     <td>Last stable release</td> | ||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a> | ||||
|     </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td>Development branch</td> | ||||
|     <td><a href="https://travis-ci.org/paboyle/Grid"> | ||||
|     <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a> | ||||
|     </td> | ||||
| </tr> | ||||
| </table> | ||||
| # Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid) | ||||
|  | ||||
| **Data parallel C++ mathematical object library.** | ||||
|  | ||||
| License: GPL v2. | ||||
|  | ||||
| Last update Nov 2016. | ||||
| Last update June 2017. | ||||
|  | ||||
| _Please do not send pull requests to the `master` branch which is reserved for releases._ | ||||
|  | ||||
| ### Bug report | ||||
|  | ||||
| _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | ||||
|  | ||||
| When you file an issue, please go though the following checklist: | ||||
|  | ||||
| 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  | ||||
| 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | ||||
| 3. Give the exact `configure` command used. | ||||
| 4. Attach `config.log`. | ||||
| 5. Attach `config.summary`. | ||||
| 6. Attach the output of `make V=1`. | ||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||
|  | ||||
|  | ||||
|  | ||||
| ### Description | ||||
| @@ -58,13 +30,68 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi | ||||
| for most programmers. | ||||
|  | ||||
| The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. | ||||
| Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). | ||||
| Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. | ||||
|  | ||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. | ||||
| These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types.  | ||||
| The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. | ||||
|  | ||||
| MPI, OpenMP, and SIMD parallelism are present in the library. | ||||
| Please see https://arxiv.org/abs/1512.03487 for more detail. | ||||
| Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. | ||||
|  | ||||
|  | ||||
| ### Compilers | ||||
|  | ||||
| Intel ICPC v16.0.3 and later | ||||
|  | ||||
| Clang v3.5 and later (need 3.8 and later for OpenMP) | ||||
|  | ||||
| GCC   v4.9.x (recommended) | ||||
|  | ||||
| GCC   v6.3 and later | ||||
|  | ||||
| ### Important:  | ||||
|  | ||||
| Some versions of GCC appear to have a bug under high optimisation (-O2, -O3). | ||||
|  | ||||
| The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates. | ||||
|  | ||||
| GCC   v5.x | ||||
|  | ||||
| GCC   v6.1, v6.2 | ||||
|  | ||||
| ### Bug report | ||||
|  | ||||
| _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ | ||||
|  | ||||
| When you file an issue, please go though the following checklist: | ||||
|  | ||||
| 1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.  | ||||
| 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. | ||||
| 3. Give the exact `configure` command used. | ||||
| 4. Attach `config.log`. | ||||
| 5. Attach `grid.config.summary`. | ||||
| 6. Attach the output of `make V=1`. | ||||
| 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. | ||||
|  | ||||
| ### Required libraries | ||||
| Grid requires: | ||||
|  | ||||
| [GMP](https://gmplib.org/),  | ||||
|  | ||||
| [MPFR](http://www.mpfr.org/)  | ||||
|  | ||||
| Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. | ||||
|  | ||||
| Grid optionally uses: | ||||
|  | ||||
| [HDF5](https://support.hdfgroup.org/HDF5/)   | ||||
|  | ||||
| [LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support.  | ||||
|  | ||||
| [FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. | ||||
|  | ||||
| LAPACK either generic version or Intel MKL library. | ||||
|  | ||||
|  | ||||
| ### Quick start | ||||
| First, start by cloning the repository: | ||||
| @@ -95,10 +122,10 @@ install Grid. Other options are detailed in the next section, you can also use ` | ||||
| `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to | ||||
| customise the build. | ||||
|  | ||||
| Finally, you can build and install Grid: | ||||
| Finally, you can build, check, and install Grid: | ||||
|  | ||||
| ``` bash | ||||
| make; make install | ||||
| make; make check; make install | ||||
| ``` | ||||
|  | ||||
| To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: | ||||
| @@ -121,7 +148,7 @@ If you want to build all the tests at once just use `make tests`. | ||||
| - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). | ||||
| - `--enable-precision={single|double}`: set the default precision (default: `double`). | ||||
| - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. | ||||
| - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). | ||||
| - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). | ||||
| - `--disable-timers`: disable system dependent high-resolution timers. | ||||
| - `--enable-chroma`: enable Chroma regression tests. | ||||
| - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) | ||||
| @@ -135,7 +162,6 @@ The following options can be use with the `--enable-comms=` option to target dif | ||||
| | `none`         | no communications                                             | | ||||
| | `mpi[-auto]`   | MPI communications                                            | | ||||
| | `mpi3[-auto]`  | MPI communications using MPI 3 shared memory                  | | ||||
| | `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | | ||||
| | `shmem `       | Cray SHMEM communications                                     | | ||||
|  | ||||
| For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead.   | ||||
| @@ -153,13 +179,13 @@ The following options can be use with the `--enable-simd=` option to target diff | ||||
| | `AVXFMA4`   | AVX (256 bit) + FMA4                   | | ||||
| | `AVX2`      | AVX 2 (256 bit)                        | | ||||
| | `AVX512`    | AVX 512 bit                            | | ||||
| | `QPX`       | QPX (256 bit)                          | | ||||
| | `NEONv8`    | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit)                     | | ||||
| | `QPX`       | IBM QPX (256 bit)                      | | ||||
|  | ||||
| Alternatively, some CPU codenames can be directly used: | ||||
|  | ||||
| | `<code>`    | Description                            | | ||||
| | ----------- | -------------------------------------- | | ||||
| | `KNC`       | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | | ||||
| | `KNL`       | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | ||||
| | `BGQ`       | Blue Gene/Q                            | | ||||
|  | ||||
| @@ -176,21 +202,205 @@ The following configuration is recommended for the Intel Knights Landing platfor | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=KNL        \ | ||||
|              --enable-comms=mpi-auto \ | ||||
|              --with-gmp=<path>        \ | ||||
|              --with-mpfr=<path>       \ | ||||
|              --enable-comms=mpi-auto  \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=icpc MPICXX=mpiicpc | ||||
| ``` | ||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||
|  | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=KNL        \ | ||||
|              --enable-comms=mpi       \ | ||||
|              --with-gmp=<path>        \ | ||||
|              --with-mpfr=<path>       \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=CC CC=cc | ||||
| ``` | ||||
| ``` | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| Knight's Landing with Intel Omnipath adapters with two adapters per node  | ||||
| presently performs better with use of more than one rank per node, using shared memory  | ||||
| for interior communication. This is the mpi3 communications implementation.  | ||||
| We recommend four ranks per node for best performance, but optimum is local volume dependent. | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=KNL        \ | ||||
|              --enable-comms=mpi3-auto \ | ||||
|              --enable-mkl             \ | ||||
|              CC=icpc MPICXX=mpiicpc  | ||||
| ``` | ||||
|  | ||||
| ### Build setup for Intel Haswell Xeon platform | ||||
|  | ||||
| The following configuration is recommended for the Intel Haswell platform: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX2       \ | ||||
|              --enable-comms=mpi3-auto \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=icpc MPICXX=mpiicpc | ||||
| ``` | ||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX2       \ | ||||
|              --enable-comms=mpi3      \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=CC CC=cc | ||||
| ``` | ||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  | ||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using | ||||
| ``` | ||||
|         export I_MPI_PIN=1 | ||||
| ``` | ||||
| This is the default. | ||||
|  | ||||
| ### Build setup for Intel Skylake Xeon platform | ||||
|  | ||||
| The following configuration is recommended for the Intel Skylake platform: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX512     \ | ||||
|              --enable-comms=mpi3      \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=mpiicpc | ||||
| ``` | ||||
| The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX512     \ | ||||
|              --enable-comms=mpi3      \ | ||||
|              --enable-mkl             \ | ||||
|              CXX=CC CC=cc | ||||
| ``` | ||||
| Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of  | ||||
| one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using | ||||
| ```  | ||||
|         export I_MPI_PIN=1 | ||||
| ``` | ||||
| This is the default.  | ||||
|  | ||||
| #### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):  | ||||
|  | ||||
| mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18  | ||||
|  | ||||
| TBA | ||||
|  | ||||
|  | ||||
| ### Build setup for AMD EPYC / RYZEN | ||||
|  | ||||
| The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. | ||||
| So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total | ||||
| are common. Each chip within the module exposes a separate NUMA domain. | ||||
| There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. | ||||
| MPI-3 is recommended with the use of four ranks per socket, | ||||
| and 8 threads per rank.  | ||||
|  | ||||
| The following configuration is recommended for the AMD EPYC platform. | ||||
|  | ||||
| ``` bash | ||||
| ../configure --enable-precision=double\ | ||||
|              --enable-simd=AVX2       \ | ||||
|              --enable-comms=mpi3 \ | ||||
|              CXX=mpicxx  | ||||
| ``` | ||||
|  | ||||
| If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: | ||||
| ``` bash | ||||
|                --with-gmp=<path>        \ | ||||
|                --with-mpfr=<path>       \ | ||||
| ``` | ||||
| where `<path>` is the UNIX prefix where GMP and MPFR are installed.  | ||||
|  | ||||
| Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. | ||||
| This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.  | ||||
|  | ||||
| It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and | ||||
| shared memory to communicate within this node: | ||||
|  | ||||
| mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4  | ||||
|  | ||||
| Where omp_bind.sh does the following: | ||||
| ``` | ||||
| #!/bin/bash | ||||
|  | ||||
| numanode=` expr $PMI_RANK % 8 ` | ||||
| basecore=`expr $numanode \* 16` | ||||
| core0=`expr $basecore + 0 ` | ||||
| core1=`expr $basecore + 2 ` | ||||
| core2=`expr $basecore + 4 ` | ||||
| core3=`expr $basecore + 6 ` | ||||
| core4=`expr $basecore + 8 ` | ||||
| core5=`expr $basecore + 10 ` | ||||
| core6=`expr $basecore + 12 ` | ||||
| core7=`expr $basecore + 14 ` | ||||
|  | ||||
| export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" | ||||
| echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY | ||||
|  | ||||
| $@ | ||||
| ``` | ||||
|  | ||||
| Performance: | ||||
|  | ||||
| #### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):  | ||||
|  | ||||
| mpirun  -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 | ||||
|  | ||||
| TBA | ||||
|  | ||||
| ### Build setup for BlueGene/Q | ||||
|  | ||||
| To be written... | ||||
|  | ||||
| ### Build setup for ARM Neon | ||||
|  | ||||
| To be written... | ||||
|  | ||||
| ### Build setup for laptops, other compilers, non-cluster builds | ||||
|  | ||||
| Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), | ||||
| and omit the enable-mkl flag.  | ||||
|  | ||||
| Single node builds are enabled with  | ||||
| ``` | ||||
|             --enable-comms=none | ||||
| ``` | ||||
|  | ||||
| FFTW support that is not in the default search path may then enabled with | ||||
| ``` | ||||
|     --with-fftw=<installpath> | ||||
| ``` | ||||
|  | ||||
| BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. | ||||
|  | ||||
|   | ||||
							
								
								
									
										70
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										70
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,6 +1,35 @@ | ||||
| TODO: | ||||
| --------------- | ||||
|  | ||||
| Large item work list: | ||||
|  | ||||
| 1)- BG/Q port and check ; Andrew says ok. | ||||
| 2)- Christoph's local basis expansion Lanczos | ||||
| -- | ||||
| 3a)- RNG I/O in ILDG/SciDAC (minor) | ||||
| 3b)- Precision conversion and sort out localConvert      <-- partial/easy | ||||
| 3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet | ||||
| 4)- Physical propagator interface | ||||
| 5)- Conserved currents | ||||
| 6)- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||
| 7)- HDCR resume | ||||
|  | ||||
| Recent DONE  | ||||
| -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori | ||||
| -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE | ||||
| -- GaugeFix into central location                      <-- DONE | ||||
| -- Scidac and Ildg metadata handling                   <-- DONE | ||||
| -- Binary I/O MPI2 IO                                  <-- DONE | ||||
| -- Binary I/O speed up & x-strips                      <-- DONE | ||||
| -- Cut down the exterior overhead                      <-- DONE | ||||
| -- Interior legs from SHM comms                        <-- DONE | ||||
| -- Half-precision comms                                <-- DONE | ||||
| -- Merge high precision reduction into develop         <-- DONE | ||||
| -- BlockCG, BCGrQ                                      <-- DONE | ||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE | ||||
|    -- slice* linalg routines for multiRHS, BlockCG     | ||||
|  | ||||
| ----- | ||||
| * Forces; the UdSdU  term in gauge force term is half of what I think it should | ||||
|   be. This is a consequence of taking ONLY the first term in: | ||||
|  | ||||
| @@ -21,16 +50,8 @@ TODO: | ||||
|   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. | ||||
|   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. | ||||
|  | ||||
|  | ||||
| Policies: | ||||
|  | ||||
| * Link smearing/boundary conds; Policy class based implementation ; framework more in place | ||||
|  | ||||
| * Support different boundary conditions (finite temp, chem. potential ... ) | ||||
|  | ||||
| * Support different fermion representations?  | ||||
|   - contained entirely within the integrator presently | ||||
|  | ||||
| - Sign of force term. | ||||
|  | ||||
| - Reversibility test. | ||||
| @@ -41,11 +62,6 @@ Policies: | ||||
|  | ||||
| - Audit oIndex usage for cb behaviour | ||||
|  | ||||
| - Rectangle gauge actions. | ||||
|   Iwasaki, | ||||
|   Symanzik, | ||||
|   ... etc... | ||||
|  | ||||
| - Prepare multigrid for HMC. - Alternate setup schemes. | ||||
|  | ||||
| - Support for ILDG --- ugly, not done | ||||
| @@ -55,9 +71,11 @@ Policies: | ||||
| - FFTnD ? | ||||
|  | ||||
| - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | ||||
|  | ||||
| - Gparity force term; Gparity (R)HMC. | ||||
| - Random number state save restore | ||||
|  | ||||
| - Mobius implementation clean up to rmove #if 0 stale code sequences | ||||
|  | ||||
| - CG -- profile carefully, kernel fusion, whole CG performance measurements. | ||||
|  | ||||
| ================================================================ | ||||
| @@ -90,6 +108,7 @@ Insert/Extract | ||||
| Not sure of status of this -- reverify. Things are working nicely now though. | ||||
|  | ||||
| * Make the Tensor types and Complex etc... play more nicely. | ||||
|  | ||||
|   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > | ||||
|     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I | ||||
|     want to introduce a syntax that does not require this. | ||||
| @@ -112,6 +131,8 @@ Not sure of status of this -- reverify. Things are working nicely now though. | ||||
| RECENT | ||||
| --------------- | ||||
|  | ||||
|   - Support different fermion representations? -- DONE | ||||
|   - contained entirely within the integrator presently | ||||
|   - Clean up HMC                                                             -- DONE | ||||
|   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE | ||||
|   - Simplified the integrators a bit.                                        -- DONE | ||||
| @@ -123,6 +144,26 @@ RECENT | ||||
|   - Parallel io improvements                                  -- DONE | ||||
|   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE | ||||
|  | ||||
|  | ||||
| DONE: | ||||
| - MultiArray -- MultiRHS done | ||||
| - ConjugateGradientMultiShift -- DONE | ||||
| - MCR                         -- DONE | ||||
| - Remez -- Mike or Boost?     -- DONE | ||||
| - Proto (ET)                  -- DONE | ||||
| - uBlas                       -- DONE ; Eigen | ||||
| - Potentially Useful Boost libraries -- DONE ; Eigen | ||||
| - Aligned allocator; memory pool -- DONE | ||||
| - Multiprecision              -- DONE | ||||
| - Serialization               -- DONE | ||||
| - Regex -- Not needed | ||||
| - Tokenize -- Why? | ||||
|  | ||||
| - Random number state save restore -- DONE | ||||
| - Rectangle gauge actions. -- DONE | ||||
|   Iwasaki, | ||||
|   Symanzik, | ||||
|   ... etc... | ||||
| Done: Cayley, Partial , ContFrac force terms. | ||||
|  | ||||
| DONE | ||||
| @@ -207,6 +248,7 @@ Done | ||||
| FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | ||||
| ====================================================================================================== | ||||
|  | ||||
| * Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE | ||||
| * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | ||||
|   user pass these? Is this a QCD specific? | ||||
|  | ||||
|   | ||||
							
								
								
									
										9
									
								
								VERSION
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								VERSION
									
									
									
									
									
								
							| @@ -1,6 +1,5 @@ | ||||
| Version : 0.6.0 | ||||
| Version : 0.7.0 | ||||
|  | ||||
| - AVX512, AVX2, AVX, SSE good | ||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above | ||||
| - MPI and MPI3 | ||||
| - HiRep, Smearing, Generic gauge group | ||||
| - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended | ||||
| - MPI and MPI3 comms optimisations for KNL and OPA finished | ||||
| - Half precision comms | ||||
|   | ||||
							
								
								
									
										800
									
								
								benchmarks/Benchmark_ITT.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										800
									
								
								benchmarks/Benchmark_ITT.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,800 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./benchmarks/Benchmark_memory_bandwidth.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; | ||||
| typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; | ||||
|  | ||||
|  | ||||
| std::vector<int> L_list; | ||||
| std::vector<int> Ls_list; | ||||
| std::vector<double> mflop_list; | ||||
|  | ||||
| double mflop_ref; | ||||
| double mflop_ref_err; | ||||
|  | ||||
| int NN_global; | ||||
|  | ||||
| struct time_statistics{ | ||||
|   double mean; | ||||
|   double err; | ||||
|   double min; | ||||
|   double max; | ||||
|  | ||||
|   void statistics(std::vector<double> v){ | ||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||
|       mean = sum / v.size(); | ||||
|  | ||||
|       std::vector<double> diff(v.size()); | ||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||
|  | ||||
|       auto result = std::minmax_element(v.begin(), v.end()); | ||||
|       min = *result.first; | ||||
|       max = *result.second; | ||||
| } | ||||
| }; | ||||
|  | ||||
| void comms_header(){ | ||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||
| }; | ||||
|  | ||||
| Gamma::Algebra Gmu [] = { | ||||
|   Gamma::Algebra::GammaX, | ||||
|   Gamma::Algebra::GammaY, | ||||
|   Gamma::Algebra::GammaZ, | ||||
|   Gamma::Algebra::GammaT | ||||
| }; | ||||
| struct controls { | ||||
|   int Opt; | ||||
|   int CommsOverlap; | ||||
|   Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; | ||||
|   //  int HugePages; | ||||
| }; | ||||
|  | ||||
| class Benchmark { | ||||
| public: | ||||
|   static void Decomposition (void ) { | ||||
|  | ||||
|     int threads = GridThread::GetThreads(); | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; | ||||
|     std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvReal          : "<<sizeof(vReal )*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvRealD         : "<<sizeof(vRealD)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvComplex       : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvComplexF      : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|   } | ||||
|  | ||||
|   static void Comms(void) | ||||
|   { | ||||
|     int Nloop=200; | ||||
|     int nmu=0; | ||||
|     int maxlat=32; | ||||
|  | ||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); | ||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|  | ||||
|     for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||
|  | ||||
|     std::vector<double> t_time(Nloop); | ||||
|     time_statistics timestat; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|     comms_header(); | ||||
|  | ||||
|     for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|       for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
| 	std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
| 	      lat*mpi_layout[1], | ||||
| 	      lat*mpi_layout[2], | ||||
| 	      lat*mpi_layout[3]}); | ||||
|  | ||||
| 	GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
| 	RealD Nrank = Grid._Nprocessors; | ||||
| 	RealD Nnode = Grid.NodeCount(); | ||||
| 	RealD ppn = Nrank/Nnode; | ||||
|  | ||||
| 	std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
| 	std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| 	Grid.ShmBufferFreeAll(); | ||||
| 	for(int d=0;d<8;d++){ | ||||
| 	  xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	  rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	  bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	  bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	} | ||||
|  | ||||
| 	int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
| 	int ncomm; | ||||
| 	double dbytes; | ||||
| 	std::vector<double> times(Nloop); | ||||
| 	for(int i=0;i<Nloop;i++){ | ||||
|  | ||||
| 	  double start=usecond(); | ||||
|  | ||||
| 	  dbytes=0; | ||||
| 	  ncomm=0; | ||||
|  | ||||
| 	  parallel_for(int dir=0;dir<8;dir++){ | ||||
|  | ||||
| 	    double tbytes; | ||||
| 	    int mu =dir % 4; | ||||
|  | ||||
| 	    if (mpi_layout[mu]>1 ) { | ||||
| 	         | ||||
| 	      int xmit_to_rank; | ||||
| 	      int recv_from_rank; | ||||
| 	      if ( dir == mu ) {  | ||||
| 		int comm_proc=1; | ||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	      } else {  | ||||
| 		int comm_proc = mpi_layout[mu]-1; | ||||
| 		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	      } | ||||
| 	      tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | ||||
| 						 (void *)&rbuf[dir][0], recv_from_rank, | ||||
| 						 bytes,dir); | ||||
| 	   | ||||
| #ifdef GRID_OMP | ||||
| #pragma omp atomic | ||||
| #endif | ||||
| 	      ncomm++; | ||||
|  | ||||
| #ifdef GRID_OMP | ||||
| #pragma omp atomic | ||||
| #endif | ||||
| 	      dbytes+=tbytes; | ||||
| 	    } | ||||
| 	  } | ||||
| 	  Grid.Barrier(); | ||||
| 	  double stop=usecond(); | ||||
| 	  t_time[i] = stop-start; // microseconds | ||||
| 	} | ||||
|  | ||||
| 	timestat.statistics(t_time); | ||||
| 	//	for(int i=0;i<t_time.size();i++){ | ||||
| 	//	  std::cout << i<<" "<<t_time[i]<<std::endl; | ||||
| 	//	} | ||||
|  | ||||
| 	dbytes=dbytes*ppn; | ||||
| 	double xbytes    = dbytes*0.5; | ||||
| 	double rbytes    = dbytes*0.5; | ||||
| 	double bidibytes = dbytes; | ||||
|  | ||||
| 	std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
| 		 <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
| 		 <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
| 		 <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
| 		 << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
| 		 << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|   | ||||
| 	 | ||||
| 	    } | ||||
|     }     | ||||
|  | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   static void Memory(void) | ||||
|   { | ||||
|     const int Nvec=8; | ||||
|     typedef Lattice< iVector< vReal,Nvec> > LatticeVec; | ||||
|     typedef iVector<vReal,Nvec> Vec; | ||||
|  | ||||
|     std::vector<int> simd_layout = GridDefaultSimd(Nd,vReal::Nsimd()); | ||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|  | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|    | ||||
|     uint64_t NP; | ||||
|     uint64_t NN; | ||||
|  | ||||
|  | ||||
|   uint64_t lmax=48; | ||||
| #define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) | ||||
|  | ||||
|     GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|     for(int lat=8;lat<=lmax;lat+=4){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       NP= Grid.RankCount(); | ||||
|       NN =Grid.NodeCount(); | ||||
|  | ||||
|       Vec rn ; random(sRNG,rn); | ||||
|  | ||||
|       LatticeVec z(&Grid); z=rn; | ||||
|       LatticeVec x(&Grid); x=rn; | ||||
|       LatticeVec y(&Grid); y=rn; | ||||
|       double a=2.0; | ||||
|  | ||||
|       uint64_t Nloop=NLOOP; | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	z=a*x-y; | ||||
|         x._odata[0]=z._odata[0]; // force serial dependency to prevent optimise away | ||||
|         y._odata[4]=z._odata[4]; | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|       | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3)  | ||||
| 	       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000. | ||||
| 	       << "\t\t"<< bytes/time/NN <<std::endl; | ||||
|  | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   static double DWF5(int Ls,int L) | ||||
|   { | ||||
|     RealD mass=0.1; | ||||
|     RealD M5  =1.8; | ||||
|  | ||||
|     double mflops; | ||||
|     double mflops_best = 0; | ||||
|     double mflops_worst= 0; | ||||
|     std::vector<double> mflops_all; | ||||
|  | ||||
|     /////////////////////////////////////////////////////// | ||||
|     // Set/Get the layout & grid size | ||||
|     /////////////////////////////////////////////////////// | ||||
|     int threads = GridThread::GetThreads(); | ||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); | ||||
|     std::vector<int> local({L,L,L,L}); | ||||
|  | ||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  | ||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     uint64_t NP = TmpGrid->RankCount(); | ||||
|     uint64_t NN = TmpGrid->NodeCount(); | ||||
|     NN_global=NN; | ||||
|     uint64_t SHM=NP/NN; | ||||
|  | ||||
|     std::vector<int> internal; | ||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); | ||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); | ||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); | ||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); | ||||
|     else assert(0); | ||||
|  | ||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); | ||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); | ||||
|  | ||||
|     ///////// Welcome message //////////// | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "Benchmark DWF Ls vec on "<<L<<"^4 local volume "<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|     ///////// Lattice Init //////////// | ||||
|     GridCartesian         * UGrid    = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     GridRedBlackCartesian * UrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|     GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); | ||||
|     GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); | ||||
|     GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); | ||||
|     GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|     ///////// RNG Init //////////// | ||||
|     std::vector<int> seeds4({1,2,3,4}); | ||||
|     std::vector<int> seeds5({5,6,7,8}); | ||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|     GridParallelRNG          RNG5(sFGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||
|  | ||||
|     ///////// Source preparation //////////// | ||||
|     LatticeFermion src   (sFGrid); random(RNG5,src); | ||||
|     LatticeFermion tmp   (sFGrid); | ||||
|  | ||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); | ||||
|     src = src*N2; | ||||
|      | ||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  | ||||
|  | ||||
|     WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); | ||||
|     LatticeFermion src_e (sFrbGrid); | ||||
|     LatticeFermion src_o (sFrbGrid); | ||||
|     LatticeFermion r_e   (sFrbGrid); | ||||
|     LatticeFermion r_o   (sFrbGrid); | ||||
|     LatticeFermion r_eo  (sFGrid); | ||||
|     LatticeFermion err   (sFGrid); | ||||
|     { | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd,src_o,src); | ||||
|  | ||||
| #if defined(AVX512)  | ||||
|       const int num_cases = 6; | ||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); | ||||
| #else | ||||
|       const int num_cases = 4; | ||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); | ||||
| #endif | ||||
|       controls Cases [] = { | ||||
| #ifdef AVX512 | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| #endif | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } | ||||
|       };  | ||||
|  | ||||
|       for(int c=0;c<num_cases;c++) { | ||||
|  | ||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; | ||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; | ||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); | ||||
|  | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
| 	int nwarm = 100; | ||||
| 	uint64_t ncall = 1000; | ||||
|  | ||||
| 	double t0=usecond(); | ||||
| 	sFGrid->Barrier(); | ||||
| 	for(int i=0;i<nwarm;i++){ | ||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	} | ||||
| 	sFGrid->Barrier(); | ||||
| 	double t1=usecond(); | ||||
|  | ||||
| 	sDw.ZeroCounters(); | ||||
| 	time_statistics timestat; | ||||
| 	std::vector<double> t_time(ncall); | ||||
| 	for(uint64_t i=0;i<ncall;i++){ | ||||
| 	  t0=usecond(); | ||||
| 	  sDw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	  t1=usecond(); | ||||
| 	  t_time[i] = t1-t0; | ||||
| 	} | ||||
| 	sFGrid->Barrier(); | ||||
| 	 | ||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
| 	double flops=(1344.0*volume)/2; | ||||
| 	double mf_hi, mf_lo, mf_err; | ||||
|  | ||||
| 	timestat.statistics(t_time); | ||||
| 	mf_hi = flops/timestat.min; | ||||
| 	mf_lo = flops/timestat.max; | ||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; | ||||
|  | ||||
| 	mflops = flops/timestat.mean; | ||||
| 	mflops_all.push_back(mflops); | ||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; | ||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; | ||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; | ||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; | ||||
|  | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per rank   "<< mflops/NP<<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"sDeo mflop/s per node   "<< mflops/NN<<std::endl; | ||||
|  | ||||
| 	sDw.Report(); | ||||
|  | ||||
|       } | ||||
|       double robust = mflops_worst/mflops_best;; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " sDeo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage <<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust <<std::endl; | ||||
|       std::cout<<GridLogMessage <<fmt << std::endl; | ||||
|       std::cout<<GridLogMessage; | ||||
|  | ||||
|       for(int i=0;i<mflops_all.size();i++){ | ||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||
|       } | ||||
|       std::cout<<std::endl; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|     } | ||||
|     return mflops_best; | ||||
|   } | ||||
|  | ||||
|   static double DWF(int Ls,int L, double & robust) | ||||
|   { | ||||
|     RealD mass=0.1; | ||||
|     RealD M5  =1.8; | ||||
|  | ||||
|     double mflops; | ||||
|     double mflops_best = 0; | ||||
|     double mflops_worst= 0; | ||||
|     std::vector<double> mflops_all; | ||||
|  | ||||
|     /////////////////////////////////////////////////////// | ||||
|     // Set/Get the layout & grid size | ||||
|     /////////////////////////////////////////////////////// | ||||
|     int threads = GridThread::GetThreads(); | ||||
|     std::vector<int> mpi = GridDefaultMpi(); assert(mpi.size()==4); | ||||
|     std::vector<int> local({L,L,L,L}); | ||||
|  | ||||
|     GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(std::vector<int>({64,64,64,64}),  | ||||
| 								       GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     uint64_t NP = TmpGrid->RankCount(); | ||||
|     uint64_t NN = TmpGrid->NodeCount(); | ||||
|     NN_global=NN; | ||||
|     uint64_t SHM=NP/NN; | ||||
|  | ||||
|     std::vector<int> internal; | ||||
|     if      ( SHM == 1 )   internal = std::vector<int>({1,1,1,1}); | ||||
|     else if ( SHM == 2 )   internal = std::vector<int>({2,1,1,1}); | ||||
|     else if ( SHM == 4 )   internal = std::vector<int>({2,2,1,1}); | ||||
|     else if ( SHM == 8 )   internal = std::vector<int>({2,2,2,1}); | ||||
|     else assert(0); | ||||
|  | ||||
|     std::vector<int> nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); | ||||
|     std::vector<int> latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); | ||||
|  | ||||
|     ///////// Welcome message //////////// | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* MPI ranks      : "<<GridCmdVectorIntToString(mpi)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Intranode      : "<<GridCmdVectorIntToString(internal)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* nodes          : "<<GridCmdVectorIntToString(nodes)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|  | ||||
|     ///////// Lattice Init //////////// | ||||
|     GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|     GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|      | ||||
|     ///////// RNG Init //////////// | ||||
|     std::vector<int> seeds4({1,2,3,4}); | ||||
|     std::vector<int> seeds5({5,6,7,8}); | ||||
|     GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|     GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|     std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||
|  | ||||
|     ///////// Source preparation //////////// | ||||
|     LatticeFermion src   (FGrid); random(RNG5,src); | ||||
|     LatticeFermion ref   (FGrid); | ||||
|     LatticeFermion tmp   (FGrid); | ||||
|  | ||||
|     RealD N2 = 1.0/::sqrt(norm2(src)); | ||||
|     src = src*N2; | ||||
|      | ||||
|     LatticeGaugeField Umu(UGrid);  SU3::HotConfiguration(RNG4,Umu);  | ||||
|  | ||||
|     DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|  | ||||
|     //////////////////////////////////// | ||||
|     // Naive wilson implementation | ||||
|     //////////////////////////////////// | ||||
|     { | ||||
|       LatticeGaugeField Umu5d(FGrid);  | ||||
|       std::vector<LatticeColourMatrix> U(4,FGrid); | ||||
|       for(int ss=0;ss<Umu._grid->oSites();ss++){ | ||||
| 	for(int s=0;s<Ls;s++){ | ||||
| 	  Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; | ||||
| 	} | ||||
|       } | ||||
|       ref = zero; | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); | ||||
|       } | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	 | ||||
| 	tmp = U[mu]*Cshift(src,mu+1,1); | ||||
| 	ref=ref + tmp - Gamma(Gmu[mu])*tmp; | ||||
| 	 | ||||
| 	tmp =adj(U[mu])*src; | ||||
| 	tmp =Cshift(tmp,mu+1,-1); | ||||
| 	ref=ref + tmp + Gamma(Gmu[mu])*tmp; | ||||
|       } | ||||
|       ref = -0.5*ref; | ||||
|     } | ||||
|  | ||||
|     LatticeFermion src_e (FrbGrid); | ||||
|     LatticeFermion src_o (FrbGrid); | ||||
|     LatticeFermion r_e   (FrbGrid); | ||||
|     LatticeFermion r_o   (FrbGrid); | ||||
|     LatticeFermion r_eo  (FGrid); | ||||
|     LatticeFermion err   (FGrid); | ||||
|     { | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd,src_o,src); | ||||
|  | ||||
| #if defined(AVX512)  | ||||
|       const int num_cases = 6; | ||||
|       std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); | ||||
| #else | ||||
|       const int num_cases = 4; | ||||
|       std::string fmt("U/S ; U/O ; G/S ; G/O "); | ||||
| #endif | ||||
|       controls Cases [] = { | ||||
| #ifdef AVX512 | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| #endif | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential  }, | ||||
| 	{ QCD::WilsonKernelsStatic::OptGeneric   , QCD::WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicySequential  } | ||||
|       };  | ||||
|  | ||||
|       for(int c=0;c<num_cases;c++) { | ||||
|  | ||||
| 	QCD::WilsonKernelsStatic::Comms = Cases[c].CommsOverlap; | ||||
| 	QCD::WilsonKernelsStatic::Opt   = Cases[c].Opt; | ||||
| 	CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch); | ||||
|  | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
| 	if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| 	if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
| 	if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| 	std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
| 	int nwarm = 200; | ||||
| 	double t0=usecond(); | ||||
| 	FGrid->Barrier(); | ||||
| 	for(int i=0;i<nwarm;i++){ | ||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	} | ||||
| 	FGrid->Barrier(); | ||||
| 	double t1=usecond(); | ||||
| 	//	uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); | ||||
| 	//	if (ncall < 500) ncall = 500; | ||||
| 	uint64_t ncall = 1000; | ||||
|  | ||||
| 	FGrid->Broadcast(0,&ncall,sizeof(ncall)); | ||||
|  | ||||
| 	//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; | ||||
| 	Dw.ZeroCounters(); | ||||
|  | ||||
| 	time_statistics timestat; | ||||
| 	std::vector<double> t_time(ncall); | ||||
| 	for(uint64_t i=0;i<ncall;i++){ | ||||
| 	  t0=usecond(); | ||||
| 	  Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	  t1=usecond(); | ||||
| 	  t_time[i] = t1-t0; | ||||
| 	} | ||||
| 	FGrid->Barrier(); | ||||
| 	 | ||||
| 	double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
| 	double flops=(1344.0*volume)/2; | ||||
| 	double mf_hi, mf_lo, mf_err; | ||||
|  | ||||
| 	timestat.statistics(t_time); | ||||
| 	mf_hi = flops/timestat.min; | ||||
| 	mf_lo = flops/timestat.max; | ||||
| 	mf_err= flops/timestat.min * timestat.err/timestat.mean; | ||||
|  | ||||
| 	mflops = flops/timestat.mean; | ||||
| 	mflops_all.push_back(mflops); | ||||
| 	if ( mflops_best == 0   ) mflops_best = mflops; | ||||
| 	if ( mflops_worst== 0   ) mflops_worst= mflops; | ||||
| 	if ( mflops>mflops_best ) mflops_best = mflops; | ||||
| 	if ( mflops<mflops_worst) mflops_worst= mflops; | ||||
|  | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl; | ||||
| 	std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl; | ||||
|  | ||||
| 	Dw.Report(); | ||||
|  | ||||
| 	Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| 	Dw.DhopOE(src_e,r_o,DaggerNo); | ||||
| 	setCheckerboard(r_eo,r_o); | ||||
| 	setCheckerboard(r_eo,r_e); | ||||
| 	err = r_eo-ref;  | ||||
| 	std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||
| 	assert((norm2(err)<1.0e-4)); | ||||
|  | ||||
|       } | ||||
|       robust = mflops_worst/mflops_best; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl; | ||||
|       std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl; | ||||
|       std::cout<<GridLogMessage << std::fixed<<std::setprecision(3)<< L<<"^4 x "<<Ls<< " Performance Robustness   =   "<< robust  <<std::endl; | ||||
|       std::cout<<GridLogMessage <<fmt << std::endl; | ||||
|       std::cout<<GridLogMessage ; | ||||
|  | ||||
|       for(int i=0;i<mflops_all.size();i++){ | ||||
| 	std::cout<<mflops_all[i]/NN<<" ; " ; | ||||
|       } | ||||
|       std::cout<<std::endl; | ||||
|       std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|     } | ||||
|     return mflops_best; | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential); | ||||
| #ifdef KNL | ||||
|   LebesgueOrder::Block = std::vector<int>({8,2,2,2}); | ||||
| #else | ||||
|   LebesgueOrder::Block = std::vector<int>({2,2,2,2}); | ||||
| #endif | ||||
|   Benchmark::Decomposition(); | ||||
|  | ||||
|   int do_memory=1; | ||||
|   int do_comms =1; | ||||
|   int do_su3   =0; | ||||
|   int do_wilson=1; | ||||
|   int do_dwf   =1; | ||||
|  | ||||
|   if ( do_su3 ) { | ||||
|     // empty for now | ||||
|   } | ||||
| #if 1 | ||||
|   int sel=2; | ||||
|   std::vector<int> L_list({8,12,16,24}); | ||||
| #else | ||||
|   int sel=1; | ||||
|   std::vector<int> L_list({8,12}); | ||||
| #endif | ||||
|   int selm1=sel-1; | ||||
|   std::vector<double> robust_list; | ||||
|  | ||||
|   std::vector<double> wilson; | ||||
|   std::vector<double> dwf4; | ||||
|   std::vector<double> dwf5; | ||||
|  | ||||
|   if ( do_wilson ) { | ||||
|     int Ls=1; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Wilson dslash 4D vectorised" <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     for(int l=0;l<L_list.size();l++){ | ||||
|       double robust; | ||||
|       wilson.push_back(Benchmark::DWF(1,L_list[l],robust)); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int Ls=16; | ||||
|   if ( do_dwf ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     for(int l=0;l<L_list.size();l++){ | ||||
|       double robust; | ||||
|       double result = Benchmark::DWF(Ls,L_list[l],robust) ; | ||||
|       dwf4.push_back(result); | ||||
|       robust_list.push_back(robust); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if ( do_dwf ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     for(int l=0;l<L_list.size();l++){ | ||||
|       dwf5.push_back(Benchmark::DWF5(Ls,L_list[l])); | ||||
|     } | ||||
|  | ||||
|   } | ||||
|  | ||||
|   if ( do_dwf ) { | ||||
|  | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "L \t\t Wilson \t DWF4 \t DWF5 " <<std::endl; | ||||
|   for(int l=0;l<L_list.size();l++){ | ||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t "<<dwf4[l]<<" \t "<<dwf5[l] <<std::endl; | ||||
|   } | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   } | ||||
|  | ||||
|   int NN=NN_global; | ||||
|   if ( do_memory ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     Benchmark::Memory(); | ||||
|   } | ||||
|  | ||||
|   if ( do_comms && (NN>1) ) { | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; | ||||
|     std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|     Benchmark::Comms(); | ||||
|   } | ||||
|  | ||||
|   if ( do_dwf ) { | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl; | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " L \t\t Wilson\t\t DWF4  \t\t DWF5 " <<std::endl; | ||||
|   for(int l=0;l<L_list.size();l++){ | ||||
|     std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]/NN<<" \t "<<dwf4[l]/NN<<" \t "<<dwf5[l] /NN<<std::endl; | ||||
|   } | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << " Comparison point     result: "  << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl; | ||||
|   std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl; | ||||
|   std::cout<<std::setprecision(3); | ||||
|   std::cout<<GridLogMessage << " Comparison point robustness: "  << robust_list[sel] <<std::endl; | ||||
|   std::cout<<GridLogMessage << "=================================================================================="<<std::endl; | ||||
|  | ||||
|   } | ||||
|  | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
| @@ -31,6 +31,32 @@ using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| struct time_statistics{ | ||||
|   double mean; | ||||
|   double err; | ||||
|   double min; | ||||
|   double max; | ||||
|  | ||||
|   void statistics(std::vector<double> v){ | ||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||
|       mean = sum / v.size(); | ||||
|  | ||||
|       std::vector<double> diff(v.size()); | ||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||
|  | ||||
|       auto result = std::minmax_element(v.begin(), v.end()); | ||||
|       min = *result.first; | ||||
|       max = *result.second; | ||||
| } | ||||
| }; | ||||
|  | ||||
| void header(){ | ||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
| @@ -40,17 +66,21 @@ int main (int argc, char ** argv) | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   int Nloop=10; | ||||
|   int Nloop=100; | ||||
|   int nmu=0; | ||||
|   int maxlat=32; | ||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||
|  | ||||
|   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; | ||||
|   std::vector<double> t_time(Nloop); | ||||
|   time_statistics timestat; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   int maxlat=24; | ||||
|   header(); | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
| @@ -58,15 +88,23 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);	 | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|       for(int mu=0;mu<8;mu++){ | ||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; | ||||
|       } | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| @@ -79,7 +117,6 @@ int main (int argc, char ** argv) | ||||
| 	    int comm_proc=1; | ||||
| 	    int xmit_to_rank; | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.SendToRecvFromBegin(requests, | ||||
| 				   (void *)&xbuf[mu][0], | ||||
| @@ -102,18 +139,24 @@ int main (int argc, char ** argv) | ||||
| 	} | ||||
| 	Grid.SendToRecvFromComplete(requests); | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       double dbytes    = bytes*ppn; | ||||
|       double xbytes    = dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|     } | ||||
|   }     | ||||
|  | ||||
| @@ -121,25 +164,32 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|  | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > xbuf(8); | ||||
|       std::vector<Vector<HalfSpinColourVectorD> > rbuf(8); | ||||
|  | ||||
|       for(int mu=0;mu<8;mu++){ | ||||
| 	xbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	rbuf[mu].resize(lat*lat*lat*Ls); | ||||
| 	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl; | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       double start=usecond(); | ||||
|      | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| @@ -178,30 +228,37 @@ int main (int argc, char ** argv) | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|  | ||||
|       } | ||||
|  | ||||
|       double stop=usecond(); | ||||
|       timestat.statistics(t_time); | ||||
|        | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double dbytes    = bytes*ppn; | ||||
|       double xbytes    = dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|  | ||||
|       double time = stop-start; | ||||
|     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|        | ||||
|     } | ||||
|   }   | ||||
|  | ||||
|  | ||||
|   Nloop=10; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
| @@ -209,6 +266,9 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| @@ -216,73 +276,86 @@ int main (int argc, char ** argv) | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	 | ||||
|  | ||||
| 	  if (mpi_layout[mu]>1 ) { | ||||
| 	   | ||||
| 	    ncomm++; | ||||
| 	    int comm_proc=1; | ||||
| 	    int xmit_to_rank; | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu); | ||||
| 	 | ||||
| 	    comm_proc = mpi_layout[mu]-1; | ||||
| 	   | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu+4][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu+4][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu+4][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu+4][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu+4); | ||||
| 	   | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	Grid.StencilSendToRecvFromComplete(requests,0); | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
| 	 | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|  | ||||
|  | ||||
|   Nloop=100; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
| @@ -290,6 +363,9 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| @@ -297,16 +373,18 @@ int main (int argc, char ** argv) | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	 | ||||
| @@ -318,44 +396,146 @@ int main (int argc, char ** argv) | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu); | ||||
| 	    requests.resize(0); | ||||
|  | ||||
| 	    comm_proc = mpi_layout[mu]-1; | ||||
| 	   | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu+4][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu+4][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu+4][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu+4][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes,mu+4); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests,mu+4); | ||||
| 	    requests.resize(0); | ||||
| 	   | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
| 	 | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|   | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|  | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=8;Ls*=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0], | ||||
|       				    lat*mpi_layout[1], | ||||
|       				    lat*mpi_layout[2], | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
|       Grid.ShmBufferFreeAll(); | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
|  | ||||
| 	parallel_for(int dir=0;dir<8;dir++){ | ||||
|  | ||||
| 	  double tbytes; | ||||
| 	  int mu =dir % 4; | ||||
|  | ||||
| 	  if (mpi_layout[mu]>1 ) { | ||||
| 	   | ||||
| 	    ncomm++; | ||||
| 	    int xmit_to_rank; | ||||
| 	    int recv_from_rank; | ||||
| 	    if ( dir == mu ) {  | ||||
| 	      int comm_proc=1; | ||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    } else {  | ||||
| 	      int comm_proc = mpi_layout[mu]-1; | ||||
| 	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    } | ||||
|  | ||||
| 	    tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, | ||||
| 					       (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); | ||||
|  | ||||
| #pragma omp atomic | ||||
| 	    dbytes+=tbytes; | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|       } | ||||
|  | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|   | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= All done; Bye Bye"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|   | ||||
| @@ -1,28 +1,22 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|  /************************************************************************************* | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./benchmarks/Benchmark_dwf.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| @@ -57,7 +51,13 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   std::vector<int> latt4 = GridDefaultLatt(); | ||||
|   const int Ls=16; | ||||
|   int Ls=16; | ||||
|   for(int i=0;i<argc;i++) | ||||
|     if(std::string(argv[i]) == "-Ls"){ | ||||
|       std::stringstream ss(argv[i+1]); ss >> Ls; | ||||
|     } | ||||
|  | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
| @@ -151,9 +151,7 @@ int main (int argc, char ** argv) | ||||
|   RealD M5  =1.8; | ||||
|  | ||||
|   RealD NP = UGrid->_Nprocessors; | ||||
|  | ||||
|   std::cout << GridLogMessage << "Creating action operator " << std::endl; | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   RealD NN = UGrid->NodeCount(); | ||||
|  | ||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; | ||||
| @@ -163,16 +161,22 @@ int main (int argc, char ** argv) | ||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| #ifdef GRID_OMP | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| #endif | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||
|  | ||||
|   int ncall =1000; | ||||
|   DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   int ncall =500; | ||||
|   if (1) { | ||||
|     FGrid->Barrier(); | ||||
|     Dw.ZeroCounters(); | ||||
|     Dw.Dhop(src,result,0); | ||||
|     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       __SSC_START; | ||||
| @@ -190,6 +194,7 @@ int main (int argc, char ** argv) | ||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||
|     err = ref-result;  | ||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||
|  | ||||
| @@ -206,6 +211,34 @@ int main (int argc, char ** argv) | ||||
|     Dw.Report(); | ||||
|   } | ||||
|  | ||||
|   DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   if (1) { | ||||
|     FGrid->Barrier(); | ||||
|     DwH.ZeroCounters(); | ||||
|     DwH.Dhop(src,result,0); | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       __SSC_START; | ||||
|       DwH.Dhop(src,result,0); | ||||
|       __SSC_STOP; | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|     FGrid->Barrier(); | ||||
|      | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=1344*volume*ncall; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||
|     err = ref-result;  | ||||
|     std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl; | ||||
|  | ||||
|     assert (norm2(err)< 1.0e-3 ); | ||||
|     DwH.Report(); | ||||
|   } | ||||
|  | ||||
|   if (1) | ||||
|   { | ||||
|  | ||||
| @@ -214,6 +247,10 @@ int main (int argc, char ** argv) | ||||
|     std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||
|     if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
|     if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| #ifdef GRID_OMP | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| #endif | ||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
|     if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
| @@ -245,6 +282,7 @@ int main (int argc, char ** argv) | ||||
|     std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||
|     //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; | ||||
|     sDw.Report(); | ||||
|     RealD sum=0; | ||||
| @@ -270,6 +308,7 @@ int main (int argc, char ** argv) | ||||
|       std::cout<< "sD ERR   \n " << err  <<std::endl; | ||||
|     } | ||||
|     assert(sum < 1.0e-4); | ||||
|  | ||||
|      | ||||
|     if(1){ | ||||
|       std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||
| @@ -277,6 +316,10 @@ int main (int argc, char ** argv) | ||||
|       std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; | ||||
|       if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
|       if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| #ifdef GRID_OMP | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| #endif | ||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   )  | ||||
| 	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
|       if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)  | ||||
| @@ -316,6 +359,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|       std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||
|       std::cout<<GridLogMessage << "sDeo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; | ||||
|       sDw.Report(); | ||||
|  | ||||
|       sDw.DhopEO(ssrc_o,sr_e,DaggerNo); | ||||
| @@ -344,8 +388,23 @@ int main (int argc, char ** argv) | ||||
|       } | ||||
|       assert(error<1.0e-4); | ||||
|     } | ||||
|  | ||||
|   if(0){ | ||||
|     std::cout << "Single cache warm call to sDw.Dhop " <<std::endl; | ||||
|     for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ | ||||
|       sDw.Dhop(ssrc,sresult,0); | ||||
|       PerformanceCounter Counter(i); | ||||
|       Counter.Start(); | ||||
|       sDw.Dhop(ssrc,sresult,0); | ||||
|       Counter.Stop(); | ||||
|       Counter.Report(); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   } | ||||
|  | ||||
|  | ||||
|  | ||||
|   if (1) | ||||
|   { // Naive wilson dag implementation | ||||
|     ref = zero; | ||||
| @@ -394,14 +453,15 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|  | ||||
|   // S-direction is INNERMOST and takes no part in the parity. | ||||
|   static int Opt;  // these are a temporary hack | ||||
|   static int Comms;  // these are a temporary hack | ||||
|  | ||||
|   std::cout << GridLogMessage<< "*********************************************************" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl; | ||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; | ||||
|   if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; | ||||
|   if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; | ||||
| #ifdef GRID_OMP | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| #endif | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
| @@ -422,6 +482,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NN<<std::endl; | ||||
|     Dw.Report(); | ||||
|   } | ||||
|   Dw.DhopEO(src_o,r_e,DaggerNo); | ||||
| @@ -448,8 +509,9 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl; | ||||
|   std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl; | ||||
|  | ||||
|   //assert(norm2(src_e)<1.0e-4); | ||||
|   //assert(norm2(src_o)<1.0e-4); | ||||
|  | ||||
|   assert(norm2(src_e)<1.0e-4); | ||||
|   assert(norm2(src_o)<1.0e-4); | ||||
|   Grid_finalize(); | ||||
|   exit(0); | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										190
									
								
								benchmarks/Benchmark_gparity.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										190
									
								
								benchmarks/Benchmark_gparity.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,190 @@ | ||||
| #include <Grid/Grid.h> | ||||
| #include <sstream> | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
|   Gamma::Algebra Gmu [] = { | ||||
|     Gamma::Algebra::GammaX, | ||||
|     Gamma::Algebra::GammaY, | ||||
|     Gamma::Algebra::GammaZ, | ||||
|     Gamma::Algebra::GammaT | ||||
|   }; | ||||
|  | ||||
| typedef typename GparityDomainWallFermionF::FermionField GparityLatticeFermionF; | ||||
| typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD; | ||||
|  | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   int Ls=16; | ||||
|   for(int i=0;i<argc;i++) | ||||
|     if(std::string(argv[i]) == "-Ls"){ | ||||
|       std::stringstream ss(argv[i+1]); ss >> Ls; | ||||
|     } | ||||
|  | ||||
|  | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "Ls = " << Ls << std::endl; | ||||
|  | ||||
|   std::vector<int> latt4 = GridDefaultLatt(); | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|    | ||||
|   std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|   std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; | ||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|   std::cout << GridLogMessage << "Initialised RNGs" << std::endl; | ||||
|  | ||||
|   GparityLatticeFermionF src   (FGrid); random(RNG5,src); | ||||
|   RealD N2 = 1.0/::sqrt(norm2(src)); | ||||
|   src = src*N2; | ||||
|  | ||||
|   GparityLatticeFermionF result(FGrid); result=zero; | ||||
|   GparityLatticeFermionF    ref(FGrid);    ref=zero; | ||||
|   GparityLatticeFermionF    tmp(FGrid); | ||||
|   GparityLatticeFermionF    err(FGrid); | ||||
|  | ||||
|   std::cout << GridLogMessage << "Drawing gauge field" << std::endl; | ||||
|   LatticeGaugeFieldF Umu(UGrid);  | ||||
|   SU3::HotConfiguration(RNG4,Umu);  | ||||
|   std::cout << GridLogMessage << "Random gauge initialised " << std::endl; | ||||
|  | ||||
|   RealD mass=0.1; | ||||
|   RealD M5  =1.8; | ||||
|  | ||||
|   RealD NP = UGrid->_Nprocessors; | ||||
|   RealD NN = UGrid->NodeCount(); | ||||
|  | ||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "* Benchmarking DomainWallFermion::Dhop                  "<<std::endl; | ||||
|   std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl; | ||||
| #ifdef GRID_OMP | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl; | ||||
| #endif | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl; | ||||
|   if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl; | ||||
|   std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; | ||||
|  | ||||
|  | ||||
|  | ||||
|   std::cout << GridLogMessage<< "* SINGLE/SINGLE"<<std::endl; | ||||
|   GparityDomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   int ncall =1000; | ||||
|   if (1) { | ||||
|     FGrid->Barrier(); | ||||
|     Dw.ZeroCounters(); | ||||
|     Dw.Dhop(src,result,0); | ||||
|     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       __SSC_START; | ||||
|       Dw.Dhop(src,result,0); | ||||
|       __SSC_STOP; | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|     FGrid->Barrier(); | ||||
|      | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=2*1344*volume*ncall; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||
|     //    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; | ||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||
|     Dw.Report(); | ||||
|   } | ||||
|  | ||||
|   std::cout << GridLogMessage<< "* SINGLE/HALF"<<std::endl; | ||||
|   GparityDomainWallFermionFH DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   if (1) { | ||||
|     FGrid->Barrier(); | ||||
|     DwH.ZeroCounters(); | ||||
|     DwH.Dhop(src,result,0); | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       __SSC_START; | ||||
|       DwH.Dhop(src,result,0); | ||||
|       __SSC_STOP; | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|     FGrid->Barrier(); | ||||
|      | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=2*1344*volume*ncall; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||
|     DwH.Report(); | ||||
|   } | ||||
|  | ||||
|   GridCartesian         * UGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d); | ||||
|   GridCartesian         * FGrid_d   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_d); | ||||
|   GridRedBlackCartesian * FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_d); | ||||
|  | ||||
|    | ||||
|   std::cout << GridLogMessage<< "* DOUBLE/DOUBLE"<<std::endl; | ||||
|   GparityLatticeFermionD src_d(FGrid_d); | ||||
|   precisionChange(src_d,src); | ||||
|  | ||||
|   LatticeGaugeFieldD Umu_d(UGrid_d);  | ||||
|   precisionChange(Umu_d,Umu); | ||||
|  | ||||
|   GparityLatticeFermionD result_d(FGrid_d); | ||||
|  | ||||
|   GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5); | ||||
|   if (1) { | ||||
|     FGrid_d->Barrier(); | ||||
|     DwD.ZeroCounters(); | ||||
|     DwD.Dhop(src_d,result_d,0); | ||||
|     std::cout<<GridLogMessage<<"Called warmup"<<std::endl; | ||||
|     double t0=usecond(); | ||||
|     for(int i=0;i<ncall;i++){ | ||||
|       __SSC_START; | ||||
|       DwD.Dhop(src_d,result_d,0); | ||||
|       __SSC_STOP; | ||||
|     } | ||||
|     double t1=usecond(); | ||||
|     FGrid_d->Barrier(); | ||||
|      | ||||
|     double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; | ||||
|     double flops=2*1344*volume*ncall; | ||||
|  | ||||
|     std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; | ||||
|     //    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; | ||||
|     //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl; | ||||
|     std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl; | ||||
|     DwD.Report(); | ||||
|   } | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
|  | ||||
| @@ -55,21 +55,21 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|   uint64_t lmax=44; | ||||
| #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   uint64_t lmax=96; | ||||
| #define NLOOP (10*lmax*lmax*lmax*lmax/vol) | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       uint64_t Nloop=NLOOP; | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       double a=2.0; | ||||
|  | ||||
|  | ||||
| @@ -83,7 +83,7 @@ int main (int argc, char ** argv) | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|        | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       double bytes=3*vol*Nvec*sizeof(Real); | ||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||
|  | ||||
|     } | ||||
| @@ -94,17 +94,17 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|    | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       double a=2.0; | ||||
|  | ||||
|       uint64_t Nloop=NLOOP; | ||||
| @@ -119,7 +119,7 @@ int main (int argc, char ** argv) | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|       | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       double bytes=3*vol*Nvec*sizeof(Real); | ||||
|       double bytes=3.0*vol*Nvec*sizeof(Real); | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||
|  | ||||
|     } | ||||
| @@ -129,20 +129,20 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|  | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       uint64_t Nloop=NLOOP; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       RealD a=2.0; | ||||
|  | ||||
|  | ||||
| @@ -154,7 +154,7 @@ int main (int argc, char ** argv) | ||||
|       double stop=usecond(); | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|        | ||||
|       double bytes=2*vol*Nvec*sizeof(Real); | ||||
|       double bytes=2.0*vol*Nvec*sizeof(Real); | ||||
|       double flops=vol*Nvec*1;// mul | ||||
|       std::cout<<GridLogMessage <<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.<<std::endl; | ||||
|  | ||||
| @@ -166,17 +166,17 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|   for(int lat=8;lat<=lmax;lat+=8){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       uint64_t Nloop=NLOOP; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       LatticeVec z(&Grid); //random(pRNG,z); | ||||
|       LatticeVec x(&Grid); //random(pRNG,x); | ||||
|       LatticeVec y(&Grid); //random(pRNG,y); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|       LatticeVec z(&Grid);// random(pRNG,z); | ||||
|       LatticeVec x(&Grid);// random(pRNG,x); | ||||
|       LatticeVec y(&Grid);// random(pRNG,y); | ||||
|       RealD a=2.0; | ||||
|       Real nn;       | ||||
|       double start=usecond(); | ||||
| @@ -187,7 +187,7 @@ int main (int argc, char ** argv) | ||||
|       double stop=usecond(); | ||||
|       double time = (stop-start)/Nloop*1000; | ||||
|        | ||||
|       double bytes=vol*Nvec*sizeof(Real); | ||||
|       double bytes=1.0*vol*Nvec*sizeof(Real); | ||||
|       double flops=vol*Nvec*2;// mul,add | ||||
|       std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"  \t\t"<<bytes/time<<"\t\t"<<flops/time<< "\t\t"<<(stop-start)/1000./1000.<< "\t\t " <<std::endl; | ||||
|  | ||||
|   | ||||
| @@ -40,7 +40,7 @@ int main (int argc, char ** argv) | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); | ||||
|   GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout); | ||||
|   GridRedBlackCartesian     RBGrid(&Grid); | ||||
|  | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|   | ||||
| @@ -35,13 +35,14 @@ using namespace Grid::QCD; | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
| #define LMAX (64) | ||||
|  | ||||
|   int Nloop=1000; | ||||
|   int64_t Nloop=20; | ||||
|  | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|  | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   int64_t threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
| @@ -50,19 +51,19 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=2;lat<=32;lat+=2){ | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid);// random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid);// random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid);// random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	x=x*y; | ||||
|       } | ||||
|       double stop=usecond(); | ||||
| @@ -82,20 +83,20 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=2;lat<=32;lat+=2){ | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	z=x*y; | ||||
|       } | ||||
|       double stop=usecond(); | ||||
| @@ -113,20 +114,20 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=2;lat<=32;lat+=2){ | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	mult(z,x,y); | ||||
|       } | ||||
|       double stop=usecond(); | ||||
| @@ -144,20 +145,20 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=2;lat<=32;lat+=2){ | ||||
|   for(int lat=2;lat<=LMAX;lat+=2){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|       int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|       int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); | ||||
|       GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|  | ||||
|       LatticeColourMatrix z(&Grid); //random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); //random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); //random(pRNG,y); | ||||
|       LatticeColourMatrix z(&Grid); random(pRNG,z); | ||||
|       LatticeColourMatrix x(&Grid); random(pRNG,x); | ||||
|       LatticeColourMatrix y(&Grid); random(pRNG,y); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       for(int64_t i=0;i<Nloop;i++){ | ||||
| 	mac(z,x,y); | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|   | ||||
| @@ -58,7 +58,7 @@ int main (int argc, char ** argv) | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); | ||||
|   GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout); | ||||
|   GridRedBlackCartesian     RBGrid(&Grid); | ||||
|  | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|   | ||||
| @@ -93,7 +93,7 @@ int main (int argc, char ** argv) | ||||
| 	  std::cout << latt_size.back() << "\t\t"; | ||||
|  | ||||
| 	  GridCartesian           Grid(latt_size,simd_layout,mpi_layout); | ||||
| 	  GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); | ||||
| 	  GridRedBlackCartesian RBGrid(&Grid); | ||||
|  | ||||
| 	  GridParallelRNG  pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); | ||||
| 	  LatticeGaugeField Umu(&Grid); random(pRNG,Umu); | ||||
|   | ||||
| @@ -1,11 +1,7 @@ | ||||
| include Make.inc | ||||
|  | ||||
| simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o | ||||
|  | ||||
| EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a | ||||
|  | ||||
| libsimple_su3_test_a_SOURCES = simple_su3_test.cc | ||||
|  | ||||
| libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc | ||||
|  | ||||
| libsimple_simd_test_a_SOURCES = simple_simd_test.cc | ||||
| bench-local: all | ||||
| 	./Benchmark_su3 | ||||
| 	./Benchmark_memory_bandwidth | ||||
| 	./Benchmark_wilson | ||||
| 	./Benchmark_dwf --dslash-unroll | ||||
| @@ -1,6 +1,6 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' | ||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | ||||
|  | ||||
| echo "-- deploying Eigen source..." | ||||
| wget ${EIGEN_URL} --no-check-certificate | ||||
|   | ||||
							
								
								
									
										239
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										239
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -1,16 +1,23 @@ | ||||
| AC_PREREQ([2.63]) | ||||
| AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) | ||||
| AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) | ||||
| AC_CANONICAL_BUILD | ||||
| AC_CANONICAL_HOST | ||||
| AC_CANONICAL_TARGET | ||||
| AM_INIT_AUTOMAKE(subdir-objects) | ||||
| AM_INIT_AUTOMAKE([subdir-objects 1.13]) | ||||
| AM_EXTRA_RECURSIVE_TARGETS([tests bench]) | ||||
| AC_CONFIG_MACRO_DIR([m4]) | ||||
| AC_CONFIG_SRCDIR([lib/Grid.h]) | ||||
| AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) | ||||
| m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) | ||||
|  | ||||
| ############### Checks for programs | ||||
| ################ Get git info | ||||
| #AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])]) | ||||
|  | ||||
| ################ Set flags | ||||
| # do not move! | ||||
| CXXFLAGS="-O3 $CXXFLAGS" | ||||
|  | ||||
| ############### Checks for programs | ||||
| AC_PROG_CXX | ||||
| AC_PROG_RANLIB | ||||
|  | ||||
| @@ -24,12 +31,14 @@ AX_GXX_VERSION | ||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||
|       [version of g++ that will compile the code]) | ||||
|  | ||||
|  | ||||
|  | ||||
| ############### Checks for typedefs, structures, and compiler characteristics | ||||
| AC_TYPE_SIZE_T | ||||
| AC_TYPE_UINT32_T | ||||
| AC_TYPE_UINT64_T | ||||
|  | ||||
| ############### OpenMP  | ||||
| ############### OpenMP | ||||
| AC_OPENMP | ||||
| ac_openmp=no | ||||
| if test "${OPENMP_CXXFLAGS}X" != "X"; then | ||||
| @@ -45,9 +54,14 @@ AC_CHECK_HEADERS(malloc/malloc.h) | ||||
| AC_CHECK_HEADERS(malloc.h) | ||||
| AC_CHECK_HEADERS(endian.h) | ||||
| AC_CHECK_HEADERS(execinfo.h) | ||||
| AC_CHECK_HEADERS(numaif.h) | ||||
| AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) | ||||
| AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) | ||||
|  | ||||
| ############## Standard libraries | ||||
| AC_CHECK_LIB([m],[cos]) | ||||
| AC_CHECK_LIB([stdc++],[abort]) | ||||
|  | ||||
| ############### GMP and MPFR | ||||
| AC_ARG_WITH([gmp], | ||||
|     [AS_HELP_STRING([--with-gmp=prefix], | ||||
| @@ -60,16 +74,23 @@ AC_ARG_WITH([mpfr], | ||||
|     [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] | ||||
|     [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) | ||||
|  | ||||
| ############### FFTW3  | ||||
| AC_ARG_WITH([fftw],     | ||||
| ############### FFTW3 | ||||
| AC_ARG_WITH([fftw], | ||||
|             [AS_HELP_STRING([--with-fftw=prefix], | ||||
|             [try this for a non-standard install prefix of the FFTW3 library])], | ||||
|             [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] | ||||
|             [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) | ||||
|  | ||||
| ############### lapack  | ||||
| ############### LIME | ||||
| AC_ARG_WITH([lime], | ||||
|             [AS_HELP_STRING([--with-lime=prefix], | ||||
|             [try this for a non-standard install prefix of the LIME library])], | ||||
|             [AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"] | ||||
|             [AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"]) | ||||
|  | ||||
| ############### lapack | ||||
| AC_ARG_ENABLE([lapack], | ||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  | ||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], | ||||
|     [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) | ||||
|  | ||||
| case ${ac_LAPACK} in | ||||
| @@ -83,6 +104,18 @@ case ${ac_LAPACK} in | ||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; | ||||
| esac | ||||
|  | ||||
| ############### FP16 conversions | ||||
| AC_ARG_ENABLE([sfw-fp16], | ||||
|     [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], | ||||
|     [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) | ||||
| case ${ac_SFW_FP16} in | ||||
|     yes) | ||||
|       AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);; | ||||
|     no);; | ||||
|     *) | ||||
|       AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);; | ||||
| esac | ||||
|  | ||||
| ############### MKL | ||||
| AC_ARG_ENABLE([mkl], | ||||
|     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], | ||||
| @@ -108,7 +141,7 @@ AC_ARG_WITH([hdf5], | ||||
|  | ||||
| ############### first-touch | ||||
| AC_ARG_ENABLE([numa], | ||||
|     [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],  | ||||
|     [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], | ||||
|     [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) | ||||
|  | ||||
| case ${ac_NUMA} in | ||||
| @@ -134,8 +167,8 @@ if test "${ac_MKL}x" != "nox"; then | ||||
| fi | ||||
|  | ||||
| AC_SEARCH_LIBS([__gmpf_init], [gmp], | ||||
|                [AC_SEARCH_LIBS([mpfr_init], [mpfr],  | ||||
|                                [AC_DEFINE([HAVE_LIBMPFR], [1],  | ||||
|                [AC_SEARCH_LIBS([mpfr_init], [mpfr], | ||||
|                                [AC_DEFINE([HAVE_LIBMPFR], [1], | ||||
|                                           [Define to 1 if you have the `MPFR' library])] | ||||
|                                [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] | ||||
|                [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] | ||||
| @@ -144,7 +177,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp], | ||||
| if test "${ac_LAPACK}x" != "nox"; then | ||||
|     AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], | ||||
|                    [AC_MSG_ERROR("LAPACK enabled but library not found")]) | ||||
| fi    | ||||
| fi | ||||
|  | ||||
| AC_SEARCH_LIBS([fftw_execute], [fftw3], | ||||
|                [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], | ||||
| @@ -152,6 +185,23 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3], | ||||
|                [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] | ||||
|                [have_fftw=true]) | ||||
|  | ||||
| AC_SEARCH_LIBS([limeCreateReader], [lime], | ||||
|                [AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])] | ||||
|                [have_lime=true], | ||||
| 	       [AC_MSG_WARN(C-LIME library was not found in your system. | ||||
| In order to use ILGG file format please install or provide the correct path to your installation | ||||
| Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([crc32], [z], | ||||
|                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] | ||||
|                [have_zlib=true] [LIBS="${LIBS} -lz"], | ||||
| 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([move_pages], [numa], | ||||
|                [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] | ||||
|                [have_libnuma=true] [LIBS="${LIBS} -lnuma"], | ||||
| 	       [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||
|                [have_hdf5=true] | ||||
| @@ -176,19 +226,26 @@ case ${ax_cv_cxx_compiler_vendor} in | ||||
|     case ${ac_SIMD} in | ||||
|       SSE4) | ||||
|         AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) | ||||
|         SIMD_FLAGS='-msse4.2';; | ||||
| 	case ${ac_SFW_FP16} in | ||||
| 	  yes) | ||||
| 	  SIMD_FLAGS='-msse4.2';; | ||||
| 	  no) | ||||
| 	  SIMD_FLAGS='-msse4.2 -mf16c';; | ||||
| 	  *) | ||||
|           AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);; | ||||
| 	esac;; | ||||
|       AVX) | ||||
|         AC_DEFINE([AVX1],[1],[AVX intrinsics]) | ||||
|         SIMD_FLAGS='-mavx';; | ||||
|         SIMD_FLAGS='-mavx -mf16c';; | ||||
|       AVXFMA4) | ||||
|         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) | ||||
|         SIMD_FLAGS='-mavx -mfma4';; | ||||
|         SIMD_FLAGS='-mavx -mfma4 -mf16c';; | ||||
|       AVXFMA) | ||||
|         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) | ||||
|         SIMD_FLAGS='-mavx -mfma';; | ||||
|         SIMD_FLAGS='-mavx -mfma -mf16c';; | ||||
|       AVX2) | ||||
|         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) | ||||
|         SIMD_FLAGS='-mavx2 -mfma';; | ||||
|         SIMD_FLAGS='-mavx2 -mfma -mf16c';; | ||||
|       AVX512) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||
|         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; | ||||
| @@ -197,6 +254,7 @@ case ${ax_cv_cxx_compiler_vendor} in | ||||
|         SIMD_FLAGS='';; | ||||
|       KNL) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) | ||||
|         SIMD_FLAGS='-march=knl';; | ||||
|       GEN) | ||||
|         AC_DEFINE([GEN],[1],[generic vector code]) | ||||
| @@ -204,6 +262,9 @@ case ${ax_cv_cxx_compiler_vendor} in | ||||
|                            [generic SIMD vector width (in bytes)]) | ||||
|         SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" | ||||
|         SIMD_FLAGS='';; | ||||
|       NEONv8) | ||||
|         AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) | ||||
|         SIMD_FLAGS='-march=armv8-a';; | ||||
|       QPX|BGQ) | ||||
|         AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) | ||||
|         SIMD_FLAGS='';; | ||||
| @@ -232,6 +293,7 @@ case ${ax_cv_cxx_compiler_vendor} in | ||||
|         SIMD_FLAGS='';; | ||||
|       KNL) | ||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) | ||||
|         AC_DEFINE([KNL],[1],[Knights landing processor]) | ||||
|         SIMD_FLAGS='-xmic-avx512';; | ||||
|       GEN) | ||||
|         AC_DEFINE([GEN],[1],[generic vector code]) | ||||
| @@ -269,8 +331,41 @@ case ${ac_PRECISION} in | ||||
|      double) | ||||
|        AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) | ||||
|      ;; | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]); | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| ######################  Shared memory allocation technique under MPI3 | ||||
| AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], | ||||
|               [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) | ||||
|  | ||||
| case ${ac_SHM} in | ||||
|  | ||||
|      shmget) | ||||
|      AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) | ||||
|      ;; | ||||
|  | ||||
|      shmopen) | ||||
|      AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) | ||||
|      ;; | ||||
|  | ||||
|      hugetlbfs) | ||||
|      AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] ) | ||||
|      ;; | ||||
|  | ||||
|      *) | ||||
|      AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]); | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| ######################  Shared base path for SHMMMAP | ||||
| AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], | ||||
|               [Select SHM mmap base path for hugetlbfs])], | ||||
| 	      [ac_SHMPATH=${enable_shmpath}], | ||||
| 	      [ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/]) | ||||
| AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) | ||||
|  | ||||
| ############### communication type selection | ||||
| AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], | ||||
|               [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) | ||||
| @@ -280,14 +375,14 @@ case ${ac_COMMS} in | ||||
|         AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) | ||||
|         comms_type='none' | ||||
|      ;; | ||||
|      mpi3l*) | ||||
|        AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] ) | ||||
|        comms_type='mpi3l' | ||||
|      ;; | ||||
|      mpi3*) | ||||
|         AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) | ||||
|         comms_type='mpi3' | ||||
|      ;; | ||||
|      mpit) | ||||
|         AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) | ||||
|         comms_type='mpit' | ||||
|      ;; | ||||
|      mpi*) | ||||
|         AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) | ||||
|         comms_type='mpi' | ||||
| @@ -297,7 +392,7 @@ case ${ac_COMMS} in | ||||
|         comms_type='shmem' | ||||
|      ;; | ||||
|      *) | ||||
|         AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);  | ||||
|         AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); | ||||
|      ;; | ||||
| esac | ||||
| case ${ac_COMMS} in | ||||
| @@ -315,7 +410,7 @@ esac | ||||
| AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI,   [ test "${comms_type}X" == "mpiX" ]) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3,  [ test "${comms_type}X" == "mpi3X" ] ) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] ) | ||||
| AM_CONDITIONAL(BUILD_COMMS_MPIT,  [ test "${comms_type}X" == "mpitX" ] ) | ||||
| AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ]) | ||||
|  | ||||
| ############### RNG selection | ||||
| @@ -334,7 +429,7 @@ case ${ac_RNG} in | ||||
|       AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) | ||||
|      ;; | ||||
|      *) | ||||
|       AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);  | ||||
|       AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| @@ -351,7 +446,7 @@ case ${ac_TIMERS} in | ||||
|       AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) | ||||
|      ;; | ||||
|      *) | ||||
|       AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);  | ||||
|       AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| @@ -363,7 +458,7 @@ case ${ac_CHROMA} in | ||||
|      yes|no) | ||||
|      ;; | ||||
|      *) | ||||
|        AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);  | ||||
|        AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); | ||||
|      ;; | ||||
| esac | ||||
|  | ||||
| @@ -384,12 +479,67 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg]) | ||||
|  | ||||
| ############### Ouput | ||||
| cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} | ||||
| GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" | ||||
| GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS" | ||||
| GRID_LIBS=$LIBS | ||||
| GRID_SHORT_SHA=`git rev-parse --short HEAD` | ||||
| GRID_SHA=`git rev-parse HEAD` | ||||
| GRID_BRANCH=`git rev-parse --abbrev-ref HEAD` | ||||
| AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" | ||||
| AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" | ||||
| AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" | ||||
| AC_SUBST([AM_CFLAGS]) | ||||
| AC_SUBST([AM_CXXFLAGS]) | ||||
| AC_SUBST([AM_LDFLAGS]) | ||||
| AC_SUBST([GRID_CXXFLAGS]) | ||||
| AC_SUBST([GRID_LDFLAGS]) | ||||
| AC_SUBST([GRID_LIBS]) | ||||
| AC_SUBST([GRID_SHA]) | ||||
| AC_SUBST([GRID_BRANCH]) | ||||
|  | ||||
| git_commit=`cd $srcdir && ./scripts/configure.commit` | ||||
|  | ||||
| echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| Summary of configuration for $PACKAGE v$VERSION | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| ----- GIT VERSION ------------------------------------- | ||||
| $git_commit | ||||
| ----- PLATFORM ---------------------------------------- | ||||
| architecture (build)        : $build_cpu | ||||
| os (build)                  : $build_os | ||||
| architecture (target)       : $target_cpu | ||||
| os (target)                 : $target_os | ||||
| compiler vendor             : ${ax_cv_cxx_compiler_vendor} | ||||
| compiler version            : ${ax_cv_gxx_version} | ||||
| ----- BUILD OPTIONS ----------------------------------- | ||||
| SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | ||||
| Threading                   : ${ac_openmp} | ||||
| Communications type         : ${comms_type} | ||||
| Shared memory allocator     : ${ac_SHM} | ||||
| Shared memory mmap path     : ${ac_SHMPATH} | ||||
| Default precision           : ${ac_PRECISION} | ||||
| Software FP16 conversion    : ${ac_SFW_FP16} | ||||
| RNG choice                  : ${ac_RNG} | ||||
| GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||
| LAPACK                      : ${ac_LAPACK} | ||||
| FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||
| LIME (ILDG support)         : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi` | ||||
| HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | ||||
| build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | ||||
| ----- BUILD FLAGS ------------------------------------- | ||||
| CXXFLAGS: | ||||
| `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| LDFLAGS: | ||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| LIBS: | ||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| -------------------------------------------------------" > grid.configure.summary | ||||
|  | ||||
| GRID_SUMMARY="`cat grid.configure.summary`" | ||||
| AM_SUBST_NOTMAKE([GRID_SUMMARY]) | ||||
| AC_SUBST([GRID_SUMMARY]) | ||||
|  | ||||
| AC_CONFIG_FILES([grid-config], [chmod +x grid-config]) | ||||
| AC_CONFIG_FILES(Makefile) | ||||
| AC_CONFIG_FILES(lib/Makefile) | ||||
| AC_CONFIG_FILES(tests/Makefile) | ||||
| @@ -400,6 +550,8 @@ AC_CONFIG_FILES(tests/forces/Makefile) | ||||
| AC_CONFIG_FILES(tests/hadrons/Makefile) | ||||
| AC_CONFIG_FILES(tests/hmc/Makefile) | ||||
| AC_CONFIG_FILES(tests/solver/Makefile) | ||||
| AC_CONFIG_FILES(tests/lanczos/Makefile) | ||||
| AC_CONFIG_FILES(tests/smearing/Makefile) | ||||
| AC_CONFIG_FILES(tests/qdpxx/Makefile) | ||||
| AC_CONFIG_FILES(tests/testu01/Makefile) | ||||
| AC_CONFIG_FILES(benchmarks/Makefile) | ||||
| @@ -407,36 +559,7 @@ AC_CONFIG_FILES(extras/Makefile) | ||||
| AC_CONFIG_FILES(extras/Hadrons/Makefile) | ||||
| AC_OUTPUT | ||||
|  | ||||
| echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| Summary of configuration for $PACKAGE v$VERSION | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| echo "" | ||||
| cat grid.configure.summary | ||||
| echo "" | ||||
|  | ||||
| ----- PLATFORM ---------------------------------------- | ||||
| architecture (build)        : $build_cpu | ||||
| os (build)                  : $build_os | ||||
| architecture (target)       : $target_cpu | ||||
| os (target)                 : $target_os | ||||
| compiler vendor             : ${ax_cv_cxx_compiler_vendor} | ||||
| compiler version            : ${ax_cv_gxx_version} | ||||
| ----- BUILD OPTIONS ----------------------------------- | ||||
| SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} | ||||
| Threading                   : ${ac_openmp}  | ||||
| Communications type         : ${comms_type} | ||||
| Default precision           : ${ac_PRECISION} | ||||
| RNG choice                  : ${ac_RNG}  | ||||
| GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` | ||||
| LAPACK                      : ${ac_LAPACK} | ||||
| FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` | ||||
| HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi` | ||||
| build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi` | ||||
| ----- BUILD FLAGS ------------------------------------- | ||||
| CXXFLAGS: | ||||
| `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| LDFLAGS: | ||||
| `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| LIBS: | ||||
| `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'` | ||||
| -------------------------------------------------------" > config.summary | ||||
| echo "" | ||||
| cat config.summary | ||||
| echo "" | ||||
|   | ||||
| @@ -162,7 +162,8 @@ void Application::saveParameterFile(const std::string parameterFileName) | ||||
| sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)" | ||||
|  | ||||
| #define DEFINE_MEMPEAK \ | ||||
| auto memPeak = [this](const std::vector<unsigned int> &program)\ | ||||
| GeneticScheduler<unsigned int>::ObjFunc memPeak = \ | ||||
| [this](const std::vector<unsigned int> &program)\ | ||||
| {\ | ||||
|     unsigned int memPeak;\ | ||||
|     bool         msg;\ | ||||
|   | ||||
| @@ -41,9 +41,10 @@ using namespace Hadrons; | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| Environment::Environment(void) | ||||
| { | ||||
|     nd_ = GridDefaultLatt().size(); | ||||
|     dim_ = GridDefaultLatt(); | ||||
|     nd_  = dim_.size(); | ||||
|     grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( | ||||
|         GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||
|         dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), | ||||
|         GridDefaultMpi())); | ||||
|     gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); | ||||
|     auto loc = getGrid()->LocalDimensions(); | ||||
| @@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const | ||||
|     return nd_; | ||||
| } | ||||
|  | ||||
| std::vector<int> Environment::getDim(void) const | ||||
| { | ||||
|     return dim_; | ||||
| } | ||||
|  | ||||
| int Environment::getDim(const unsigned int mu) const | ||||
| { | ||||
|     return dim_[mu]; | ||||
| } | ||||
|  | ||||
| // random number generator ///////////////////////////////////////////////////// | ||||
| void Environment::setSeed(const std::vector<int> &seed) | ||||
| { | ||||
| @@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const | ||||
|     return getModuleType(getModuleAddress(name)); | ||||
| } | ||||
|  | ||||
| std::string Environment::getModuleNamespace(const unsigned int address) const | ||||
| { | ||||
|     std::string type = getModuleType(address), ns; | ||||
|      | ||||
|     auto pos2 = type.rfind("::"); | ||||
|     auto pos1 = type.rfind("::", pos2 - 2); | ||||
|      | ||||
|     return type.substr(pos1 + 2, pos2 - pos1 - 2); | ||||
| } | ||||
|  | ||||
| std::string Environment::getModuleNamespace(const std::string name) const | ||||
| { | ||||
|     return getModuleNamespace(getModuleAddress(name)); | ||||
| } | ||||
|  | ||||
| bool Environment::hasModule(const unsigned int address) const | ||||
| { | ||||
|     return (address < module_.size()); | ||||
| @@ -492,7 +518,14 @@ std::string Environment::getObjectType(const unsigned int address) const | ||||
| { | ||||
|     if (hasRegisteredObject(address)) | ||||
|     { | ||||
|         return typeName(object_[address].type); | ||||
|         if (object_[address].type) | ||||
|         { | ||||
|             return typeName(object_[address].type); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return "<no type>"; | ||||
|         } | ||||
|     } | ||||
|     else if (hasObject(address)) | ||||
|     { | ||||
| @@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const | ||||
|     return getObjectSize(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| unsigned int Environment::getObjectModule(const unsigned int address) const | ||||
| { | ||||
|     if (hasObject(address)) | ||||
|     { | ||||
|         return object_[address].module; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| unsigned int Environment::getObjectModule(const std::string name) const | ||||
| { | ||||
|     return getObjectModule(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| unsigned int Environment::getObjectLs(const unsigned int address) const | ||||
| { | ||||
|     if (hasRegisteredObject(address)) | ||||
|   | ||||
| @@ -106,6 +106,8 @@ public: | ||||
|     void                    createGrid(const unsigned int Ls); | ||||
|     GridCartesian *         getGrid(const unsigned int Ls = 1) const; | ||||
|     GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; | ||||
|     std::vector<int>        getDim(void) const; | ||||
|     int                     getDim(const unsigned int mu) const; | ||||
|     unsigned int            getNd(void) const; | ||||
|     // random number generator | ||||
|     void                    setSeed(const std::vector<int> &seed); | ||||
| @@ -131,6 +133,8 @@ public: | ||||
|     std::string             getModuleName(const unsigned int address) const; | ||||
|     std::string             getModuleType(const unsigned int address) const; | ||||
|     std::string             getModuleType(const std::string name) const; | ||||
|     std::string             getModuleNamespace(const unsigned int address) const; | ||||
|     std::string             getModuleNamespace(const std::string name) const; | ||||
|     bool                    hasModule(const unsigned int address) const; | ||||
|     bool                    hasModule(const std::string name) const; | ||||
|     Graph<unsigned int>     makeModuleGraph(void) const; | ||||
| @@ -171,6 +175,8 @@ public: | ||||
|     std::string             getObjectType(const std::string name) const; | ||||
|     Size                    getObjectSize(const unsigned int address) const; | ||||
|     Size                    getObjectSize(const std::string name) const; | ||||
|     unsigned int            getObjectModule(const unsigned int address) const; | ||||
|     unsigned int            getObjectModule(const std::string name) const; | ||||
|     unsigned int            getObjectLs(const unsigned int address) const; | ||||
|     unsigned int            getObjectLs(const std::string name) const; | ||||
|     bool                    hasObject(const unsigned int address) const; | ||||
| @@ -181,6 +187,10 @@ public: | ||||
|     bool                    hasCreatedObject(const std::string name) const; | ||||
|     bool                    isObject5d(const unsigned int address) const; | ||||
|     bool                    isObject5d(const std::string name) const; | ||||
|     template <typename T> | ||||
|     bool                    isObjectOfType(const unsigned int address) const; | ||||
|     template <typename T> | ||||
|     bool                    isObjectOfType(const std::string name) const; | ||||
|     Environment::Size       getTotalSize(void) const; | ||||
|     void                    addOwnership(const unsigned int owner, | ||||
|                                          const unsigned int property); | ||||
| @@ -197,6 +207,7 @@ private: | ||||
|     bool                                   dryRun_{false}; | ||||
|     unsigned int                           traj_, locVol_; | ||||
|     // grids | ||||
|     std::vector<int>                       dim_; | ||||
|     GridPt                                 grid4d_; | ||||
|     std::map<unsigned int, GridPt>         grid5d_; | ||||
|     GridRbPt                               gridRb4d_; | ||||
| @@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const | ||||
|         else | ||||
|         { | ||||
|             HADRON_ERROR("object with address " + std::to_string(address) + | ||||
|                          " does not have type '" + typeid(T).name() + | ||||
|                          " does not have type '" + typeName(&typeid(T)) + | ||||
|                          "' (has type '" + getObjectType(address) + "')"); | ||||
|         } | ||||
|     } | ||||
| @@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name) | ||||
|     return createLattice<T>(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| bool Environment::isObjectOfType(const unsigned int address) const | ||||
| { | ||||
|     if (hasRegisteredObject(address)) | ||||
|     { | ||||
|         if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get())) | ||||
|         { | ||||
|             return true; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return false; | ||||
|         } | ||||
|     } | ||||
|     else if (hasObject(address)) | ||||
|     { | ||||
|         HADRON_ERROR("object with address " + std::to_string(address) + | ||||
|                      " exists but is not registered"); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         HADRON_ERROR("no object with address " + std::to_string(address)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| bool Environment::isObjectOfType(const std::string name) const | ||||
| { | ||||
|     return isObjectOfType<T>(getObjectAddress(name)); | ||||
| } | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Environment_hpp_ | ||||
|   | ||||
| @@ -51,23 +51,43 @@ using Grid::operator<<; | ||||
|  * error with GCC 5 (clang & GCC 6 compile fine without it). | ||||
|  */ | ||||
|  | ||||
| // FIXME: find a way to do that in a more general fashion | ||||
| #ifndef FIMPL | ||||
| #define FIMPL WilsonImplR | ||||
| #endif | ||||
| #ifndef SIMPL | ||||
| #define SIMPL ScalarImplCR | ||||
| #endif | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| // type aliases | ||||
| #define TYPE_ALIASES(FImpl, suffix)\ | ||||
| #define FERM_TYPE_ALIASES(FImpl, suffix)\ | ||||
| typedef FermionOperator<FImpl>                       FMat##suffix;             \ | ||||
| typedef typename FImpl::FermionField                 FermionField##suffix;     \ | ||||
| typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \ | ||||
| typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \ | ||||
| typedef typename FImpl::DoubledGaugeField            DoubledGaugeField##suffix;\ | ||||
| typedef std::function<void(FermionField##suffix &,                             \ | ||||
| typedef std::vector<typename FImpl::SitePropagator::scalar_object>             \ | ||||
|                                                      SlicedPropagator##suffix; | ||||
|  | ||||
| #define GAUGE_TYPE_ALIASES(FImpl, suffix)\ | ||||
| typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; | ||||
|  | ||||
| #define SCALAR_TYPE_ALIASES(SImpl, suffix)\ | ||||
| typedef typename SImpl::Field ScalarField##suffix;\ | ||||
| typedef typename SImpl::Field PropagatorField##suffix; | ||||
|  | ||||
| #define SOLVER_TYPE_ALIASES(FImpl, suffix)\ | ||||
| typedef std::function<void(FermionField##suffix &,\ | ||||
|                       const FermionField##suffix &)> SolverFn##suffix; | ||||
|  | ||||
| #define SINK_TYPE_ALIASES(suffix)\ | ||||
| typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix; | ||||
|  | ||||
| #define FGS_TYPE_ALIASES(FImpl, suffix)\ | ||||
| FERM_TYPE_ALIASES(FImpl, suffix)\ | ||||
| GAUGE_TYPE_ALIASES(FImpl, suffix)\ | ||||
| SOLVER_TYPE_ALIASES(FImpl, suffix) | ||||
|  | ||||
| // logger | ||||
| class HadronsLogger: public Logger | ||||
| { | ||||
| @@ -145,6 +165,15 @@ std::string typeName(void) | ||||
|     return typeName(typeIdPt<T>()); | ||||
| } | ||||
|  | ||||
| // default writers/readers | ||||
| #ifdef HAVE_HDF5 | ||||
| typedef Hdf5Reader CorrReader; | ||||
| typedef Hdf5Writer CorrWriter; | ||||
| #else | ||||
| typedef XmlReader CorrReader; | ||||
| typedef XmlWriter CorrWriter; | ||||
| #endif | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Global_hpp_ | ||||
|   | ||||
| @@ -1,40 +1,25 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules.hpp | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Hadrons/Modules/MAction/DWF.hpp> | ||||
| #include <Grid/Hadrons/Modules/MAction/Wilson.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/Meson.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> | ||||
| #include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/Load.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/Random.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> | ||||
| #include <Grid/Hadrons/Modules/MGauge/Unit.hpp> | ||||
| #include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSink/Point.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/Point.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/Wall.hpp> | ||||
| #include <Grid/Hadrons/Modules/MSource/Z2.hpp> | ||||
| #include <Grid/Hadrons/Modules/Quark.hpp> | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_DWF_hpp_ | ||||
| #define Hadrons_DWF_hpp_ | ||||
| #ifndef Hadrons_MAction_DWF_hpp_ | ||||
| #define Hadrons_MAction_DWF_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -48,14 +48,15 @@ public: | ||||
|                                     std::string, gauge, | ||||
|                                     unsigned int, Ls, | ||||
|                                     double      , mass, | ||||
|                                     double      , M5); | ||||
|                                     double      , M5, | ||||
|                                     std::string , boundary); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TDWF: public Module<DWFPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TDWF(const std::string name); | ||||
| @@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void) | ||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||
|                  << std::endl; | ||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||
|                  << std::endl; | ||||
|     env().createGrid(par().Ls); | ||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||
|     auto &g4     = *env().getGrid(); | ||||
|     auto &grb4   = *env().getRbGrid(); | ||||
|     auto &g5     = *env().getGrid(par().Ls); | ||||
|     auto &grb5   = *env().getRbGrid(par().Ls); | ||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||
|     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); | ||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||
|                                                 par().mass, par().M5); | ||||
|                                                 par().mass, par().M5, | ||||
|                                                 implParams); | ||||
|     env().setObject(getName(), fMatPt); | ||||
| } | ||||
|  | ||||
| @@ -131,4 +137,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_DWF_hpp_ | ||||
| #endif // Hadrons_MAction_DWF_hpp_ | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Wilson_hpp_ | ||||
| #define Hadrons_Wilson_hpp_ | ||||
| #ifndef Hadrons_MAction_Wilson_hpp_ | ||||
| #define Hadrons_MAction_Wilson_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -46,14 +46,15 @@ class WilsonPar: Serializable | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||
|                                     std::string, gauge, | ||||
|                                     double     , mass); | ||||
|                                     double     , mass, | ||||
|                                     std::string, boundary); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TWilson: public Module<WilsonPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TWilson(const std::string name); | ||||
| @@ -112,10 +113,15 @@ void TWilson<FImpl>::execute() | ||||
| { | ||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||
|                  << std::endl; | ||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||
|     auto &grid   = *env().getGrid(); | ||||
|     auto &gridRb = *env().getRbGrid(); | ||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); | ||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||
|     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); | ||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, | ||||
|                                             implParams); | ||||
|     env().setObject(getName(), fMatPt); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Baryon_hpp_ | ||||
| #define Hadrons_Baryon_hpp_ | ||||
| #ifndef Hadrons_MContraction_Baryon_hpp_ | ||||
| #define Hadrons_MContraction_Baryon_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| class TBaryon: public Module<BaryonPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl1, 1); | ||||
|     TYPE_ALIASES(FImpl2, 2); | ||||
|     TYPE_ALIASES(FImpl3, 3); | ||||
|     FERM_TYPE_ALIASES(FImpl1, 1); | ||||
|     FERM_TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(FImpl3, 3); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
| @@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | ||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "', and '" | ||||
|                  << par().q3 << "'" << std::endl; | ||||
|      | ||||
|     XmlWriter             writer(par().output); | ||||
|     CorrWriter             writer(par().output); | ||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2); | ||||
| @@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void) | ||||
|      | ||||
|     // FIXME: do contractions | ||||
|      | ||||
|     write(writer, "meson", result); | ||||
|     // write(writer, "meson", result); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Baryon_hpp_ | ||||
| #endif // Hadrons_MContraction_Baryon_hpp_ | ||||
|   | ||||
							
								
								
									
										144
									
								
								extras/Hadrons/Modules/MContraction/DiscLoop.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								extras/Hadrons/Modules/MContraction/DiscLoop.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MContraction_DiscLoop_hpp_ | ||||
| #define Hadrons_MContraction_DiscLoop_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                                DiscLoop                                    * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MContraction) | ||||
|  | ||||
| class DiscLoopPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar, | ||||
|                                     std::string,    q_loop, | ||||
|                                     Gamma::Algebra, gamma, | ||||
|                                     std::string,    output); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TDiscLoop: public Module<DiscLoopPar> | ||||
| { | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||
|                                         Gamma::Algebra, gamma, | ||||
|                                         std::vector<Complex>, corr); | ||||
|     }; | ||||
| public: | ||||
|     // constructor | ||||
|     TDiscLoop(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TDiscLoop(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       TDiscLoop implementation                             * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| TDiscLoop<FImpl>::TDiscLoop(const std::string name) | ||||
| : Module<DiscLoopPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TDiscLoop<FImpl>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().q_loop}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TDiscLoop<FImpl>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TDiscLoop<FImpl>::setup(void) | ||||
| { | ||||
|      | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TDiscLoop<FImpl>::execute(void) | ||||
| { | ||||
|     LOG(Message) << "Computing disconnected loop contraction '" << getName()  | ||||
|                  << "' using '" << par().q_loop << "' with " << par().gamma  | ||||
|                  << " insertion." << std::endl; | ||||
|  | ||||
|     CorrWriter            writer(par().output); | ||||
|     PropagatorField       &q_loop = *env().template getObject<PropagatorField>(par().q_loop); | ||||
|     LatticeComplex        c(env().getGrid()); | ||||
|     Gamma                 gamma(par().gamma); | ||||
|     std::vector<TComplex> buf; | ||||
|     Result                result; | ||||
|  | ||||
|     c = trace(gamma*q_loop); | ||||
|     sliceSum(c, buf, Tp); | ||||
|  | ||||
|     result.gamma = par().gamma; | ||||
|     result.corr.resize(buf.size()); | ||||
|     for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|     { | ||||
|         result.corr[t] = TensorRemove(buf[t]); | ||||
|     } | ||||
|  | ||||
|     write(writer, "disc", result); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MContraction_DiscLoop_hpp_ | ||||
							
								
								
									
										170
									
								
								extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MContraction_Gamma3pt_hpp_ | ||||
| #define Hadrons_MContraction_Gamma3pt_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /* | ||||
|  * 3pt contraction with gamma matrix insertion. | ||||
|  * | ||||
|  * Schematic: | ||||
|  * | ||||
|  *             q2           q3 | ||||
|  *        /----<------*------<----¬ | ||||
|  *       /          gamma          \ | ||||
|  *      /                           \ | ||||
|  *   i *                            * f | ||||
|  *      \                          / | ||||
|  *       \                        / | ||||
|  *        \----------->----------/ | ||||
|  *                   q1 | ||||
|  * | ||||
|  *      trace(g5*q1*adj(q2)*g5*gamma*q3) | ||||
|  */ | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                               Gamma3pt                                     * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MContraction) | ||||
|  | ||||
| class Gamma3ptPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar, | ||||
|                                     std::string,    q1, | ||||
|                                     std::string,    q2, | ||||
|                                     std::string,    q3, | ||||
|                                     Gamma::Algebra, gamma, | ||||
|                                     std::string,    output); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| class TGamma3pt: public Module<Gamma3ptPar> | ||||
| { | ||||
|     FERM_TYPE_ALIASES(FImpl1, 1); | ||||
|     FERM_TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(FImpl3, 3); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||
|                                         Gamma::Algebra, gamma, | ||||
|                                         std::vector<Complex>, corr); | ||||
|     }; | ||||
| public: | ||||
|     // constructor | ||||
|     TGamma3pt(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TGamma3pt(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       TGamma3pt implementation                             * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name) | ||||
| : Module<Gamma3ptPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void) | ||||
| { | ||||
|      | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl1, typename FImpl2, typename FImpl3> | ||||
| void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void) | ||||
| { | ||||
|     LOG(Message) << "Computing 3pt contractions '" << getName() << "' using" | ||||
|                  << " quarks '" << par().q1 << "', '" << par().q2 << "' and '" | ||||
|                  << par().q3 << "', with " << par().gamma << " insertion."  | ||||
|                  << std::endl; | ||||
|  | ||||
|     CorrWriter            writer(par().output); | ||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||
|     PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q3); | ||||
|     LatticeComplex        c(env().getGrid()); | ||||
|     Gamma                 g5(Gamma::Algebra::Gamma5); | ||||
|     Gamma                 gamma(par().gamma); | ||||
|     std::vector<TComplex> buf; | ||||
|     Result                result; | ||||
|  | ||||
|     c = trace(g5*q1*adj(q2)*(g5*gamma)*q3); | ||||
|     sliceSum(c, buf, Tp); | ||||
|  | ||||
|     result.gamma = par().gamma; | ||||
|     result.corr.resize(buf.size()); | ||||
|     for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|     { | ||||
|         result.corr[t] = TensorRemove(buf[t]); | ||||
|     } | ||||
|  | ||||
|     write(writer, "gamma3pt", result); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MContraction_Gamma3pt_hpp_ | ||||
| @@ -6,8 +6,10 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|         Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| @@ -27,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Meson_hpp_ | ||||
| #define Hadrons_Meson_hpp_ | ||||
| #ifndef Hadrons_MContraction_Meson_hpp_ | ||||
| #define Hadrons_MContraction_Meson_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -36,32 +38,56 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /* | ||||
|   | ||||
|  Meson contractions | ||||
|  ----------------------------- | ||||
|   | ||||
|  * options: | ||||
|  - q1: input propagator 1 (string) | ||||
|  - q2: input propagator 2 (string) | ||||
|  - gammas: gamma products to insert at sink & source, pairs of gamma matrices  | ||||
|            (space-separated strings) in angled brackets (i.e. <g_sink g_src>), | ||||
|            in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>"). | ||||
|  | ||||
|            Special values: "all" - perform all possible contractions. | ||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."), | ||||
|         given as multiples of (2*pi) / L. | ||||
| */ | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                                TMeson                                       * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MContraction) | ||||
|  | ||||
| typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair; | ||||
|  | ||||
| class MesonPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, | ||||
|                                     std::string,    q1, | ||||
|                                     std::string,    q2, | ||||
|                                     std::string,    output, | ||||
|                                     Gamma::Algebra, gammaSource, | ||||
|                                     Gamma::Algebra, gammaSink); | ||||
|                                     std::string, q1, | ||||
|                                     std::string, q2, | ||||
|                                     std::string, gammas, | ||||
|                                     std::string, sink, | ||||
|                                     std::string, output); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| class TMeson: public Module<MesonPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl1, 1); | ||||
|     TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(FImpl1, 1); | ||||
|     FERM_TYPE_ALIASES(FImpl2, 2); | ||||
|     FERM_TYPE_ALIASES(ScalarImplCR, Scalar); | ||||
|     SINK_TYPE_ALIASES(Scalar); | ||||
|     class Result: Serializable | ||||
|     { | ||||
|     public: | ||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr); | ||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result, | ||||
|                                         Gamma::Algebra, gamma_snk, | ||||
|                                         Gamma::Algebra, gamma_src, | ||||
|                                         std::vector<Complex>, corr); | ||||
|     }; | ||||
| public: | ||||
|     // constructor | ||||
| @@ -71,6 +97,7 @@ public: | ||||
|     // dependencies/products | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     virtual void parseGammaString(std::vector<GammaPair> &gammaList); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
| @@ -90,7 +117,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name) | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> input = {par().q1, par().q2}; | ||||
|     std::vector<std::string> input = {par().q1, par().q2, par().sink}; | ||||
|      | ||||
|     return input; | ||||
| } | ||||
| @@ -103,7 +130,35 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | ||||
|     return output; | ||||
| } | ||||
|  | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) | ||||
| { | ||||
|     gammaList.clear(); | ||||
|     // Determine gamma matrices to insert at source/sink. | ||||
|     if (par().gammas.compare("all") == 0) | ||||
|     { | ||||
|         // Do all contractions. | ||||
|         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) | ||||
|         { | ||||
|             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) | ||||
|             { | ||||
|                 gammaList.push_back(std::make_pair((Gamma::Algebra)i,  | ||||
|                                                    (Gamma::Algebra)j)); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         // Parse individual contractions from input string. | ||||
|         gammaList = strToVec<GammaPair>(par().gammas); | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| #define mesonConnected(q1, q2, gSnk, gSrc) \ | ||||
| (g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) | ||||
|  | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| void TMeson<FImpl1, FImpl2>::execute(void) | ||||
| { | ||||
| @@ -111,21 +166,73 @@ void TMeson<FImpl1, FImpl2>::execute(void) | ||||
|                  << " quarks '" << par().q1 << "' and '" << par().q2 << "'" | ||||
|                  << std::endl; | ||||
|      | ||||
|     XmlWriter             writer(par().output); | ||||
|     PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1); | ||||
|     PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2); | ||||
|     LatticeComplex        c(env().getGrid()); | ||||
|     Gamma                 gSrc(par().gammaSource), gSnk(par().gammaSink); | ||||
|     Gamma                 g5(Gamma::Algebra::Gamma5); | ||||
|     std::vector<TComplex> buf; | ||||
|     Result                result; | ||||
|     CorrWriter             writer(par().output); | ||||
|     std::vector<TComplex>  buf; | ||||
|     std::vector<Result>    result; | ||||
|     Gamma                  g5(Gamma::Algebra::Gamma5); | ||||
|     std::vector<GammaPair> gammaList; | ||||
|     int                    nt = env().getDim(Tp); | ||||
|      | ||||
|     c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5); | ||||
|     sliceSum(c, buf, Tp); | ||||
|     result.corr.resize(buf.size()); | ||||
|     for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|     parseGammaString(gammaList); | ||||
|     result.resize(gammaList.size()); | ||||
|     for (unsigned int i = 0; i < result.size(); ++i) | ||||
|     { | ||||
|         result.corr[t] = TensorRemove(buf[t]); | ||||
|         result[i].gamma_snk = gammaList[i].first; | ||||
|         result[i].gamma_src = gammaList[i].second; | ||||
|         result[i].corr.resize(nt); | ||||
|     } | ||||
|     if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and | ||||
|         env().template isObjectOfType<SlicedPropagator2>(par().q2)) | ||||
|     { | ||||
|         SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1); | ||||
|         SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2); | ||||
|          | ||||
|         LOG(Message) << "(propagator already sinked)" << std::endl; | ||||
|         for (unsigned int i = 0; i < result.size(); ++i) | ||||
|         { | ||||
|             Gamma gSnk(gammaList[i].first); | ||||
|             Gamma gSrc(gammaList[i].second); | ||||
|              | ||||
|             for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|             { | ||||
|                 result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         PropagatorField1 &q1   = *env().template getObject<PropagatorField1>(par().q1); | ||||
|         PropagatorField2 &q2   = *env().template getObject<PropagatorField2>(par().q2); | ||||
|         LatticeComplex   c(env().getGrid()); | ||||
|          | ||||
|         LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; | ||||
|         for (unsigned int i = 0; i < result.size(); ++i) | ||||
|         { | ||||
|             Gamma       gSnk(gammaList[i].first); | ||||
|             Gamma       gSrc(gammaList[i].second); | ||||
|             std::string ns; | ||||
|                  | ||||
|             ns = env().getModuleNamespace(env().getObjectModule(par().sink)); | ||||
|             if (ns == "MSource") | ||||
|             { | ||||
|                 PropagatorField1 &sink = | ||||
|                     *env().template getObject<PropagatorField1>(par().sink); | ||||
|                  | ||||
|                 c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); | ||||
|                 sliceSum(c, buf, Tp); | ||||
|             } | ||||
|             else if (ns == "MSink") | ||||
|             { | ||||
|                 SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink); | ||||
|                  | ||||
|                 c   = trace(mesonConnected(q1, q2, gSnk, gSrc)); | ||||
|                 buf = sink(c); | ||||
|             } | ||||
|             for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|             { | ||||
|                 result[i].corr[t] = TensorRemove(buf[t]); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     write(writer, "meson", result); | ||||
| } | ||||
| @@ -134,4 +241,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Meson_hpp_ | ||||
| #endif // Hadrons_MContraction_Meson_hpp_ | ||||
|   | ||||
							
								
								
									
										114
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||
| #define Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         WeakHamiltonian                                    * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MContraction) | ||||
|  | ||||
| /******************************************************************************* | ||||
|  * Utilities for contractions involving the Weak Hamiltonian. | ||||
|  ******************************************************************************/ | ||||
| //// Sum and store correlator. | ||||
| #define MAKE_DIAG(exp, buf, res, n)\ | ||||
| sliceSum(exp, buf, Tp);\ | ||||
| res.name = (n);\ | ||||
| res.corr.resize(buf.size());\ | ||||
| for (unsigned int t = 0; t < buf.size(); ++t)\ | ||||
| {\ | ||||
|     res.corr[t] = TensorRemove(buf[t]);\ | ||||
| } | ||||
|  | ||||
| //// Contraction of mu index: use 'mu' variable in exp. | ||||
| #define SUM_MU(buf,exp)\ | ||||
| buf = zero;\ | ||||
| for (unsigned int mu = 0; mu < ndim; ++mu)\ | ||||
| {\ | ||||
|     buf += exp;\ | ||||
| } | ||||
|  | ||||
| enum  | ||||
| { | ||||
|   i_V = 0, | ||||
|   i_A = 1, | ||||
|   n_i = 2 | ||||
| }; | ||||
|  | ||||
| class WeakHamiltonianPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar, | ||||
|                                     std::string, q1, | ||||
|                                     std::string, q2, | ||||
|                                     std::string, q3, | ||||
|                                     std::string, q4, | ||||
|                                     std::string, output); | ||||
| }; | ||||
|  | ||||
| #define MAKE_WEAK_MODULE(modname)\ | ||||
| class T##modname: public Module<WeakHamiltonianPar>\ | ||||
| {\ | ||||
| public:\ | ||||
|     FERM_TYPE_ALIASES(FIMPL,)\ | ||||
|     class Result: Serializable\ | ||||
|     {\ | ||||
|     public:\ | ||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\ | ||||
|                                         std::string, name,\ | ||||
|                                         std::vector<Complex>, corr);\ | ||||
|     };\ | ||||
| public:\ | ||||
|     /* constructor */ \ | ||||
|     T##modname(const std::string name);\ | ||||
|     /* destructor */ \ | ||||
|     virtual ~T##modname(void) = default;\ | ||||
|     /* dependency relation */ \ | ||||
|     virtual std::vector<std::string> getInput(void);\ | ||||
|     virtual std::vector<std::string> getOutput(void);\ | ||||
|     /* setup */ \ | ||||
|     virtual void setup(void);\ | ||||
|     /* execution */ \ | ||||
|     virtual void execute(void);\ | ||||
|     std::vector<std::string> VA_label = {"V", "A"};\ | ||||
| };\ | ||||
| MODULE_REGISTER_NS(modname, T##modname, MContraction); | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MContraction_WeakHamiltonian_hpp_ | ||||
							
								
								
									
										137
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MContraction; | ||||
|  | ||||
| /* | ||||
|  * Weak Hamiltonian current-current contractions, Eye-type. | ||||
|  *  | ||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical | ||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). | ||||
|  *  | ||||
|  * Schematics:        q4                 |                   | ||||
|  *                  /-<-¬                |                              | ||||
|  *                 /     \               |             q2           q3 | ||||
|  *                 \     /               |        /----<------*------<----¬                         | ||||
|  *            q2    \   /    q3          |       /          /-*-¬          \ | ||||
|  *       /-----<-----* *-----<----¬      |      /          /     \          \ | ||||
|  *    i *            H_W           * f   |   i *           \     /  q4      * f | ||||
|  *       \                        /      |      \           \->-/          /    | ||||
|  *        \                      /       |       \                        /        | ||||
|  *         \---------->---------/        |        \----------->----------/         | ||||
|  *                   q1                  |                   q1                   | ||||
|  *                                       | | ||||
|  *                Saucer (S)             |                  Eye (E) | ||||
|  *  | ||||
|  * S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2]) | ||||
|  * E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2]) | ||||
|  */ | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                  TWeakHamiltonianEye implementation                        * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name) | ||||
| : Module<WeakHamiltonianPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TWeakHamiltonianEye::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TWeakHamiltonianEye::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TWeakHamiltonianEye::setup(void) | ||||
| { | ||||
|  | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TWeakHamiltonianEye::execute(void) | ||||
| { | ||||
|     LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"  | ||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||
|                  << "'." << std::endl; | ||||
|  | ||||
|     CorrWriter             writer(par().output); | ||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||
|     LatticeComplex        expbuf(env().getGrid()); | ||||
|     std::vector<TComplex> corrbuf; | ||||
|     std::vector<Result>   result(n_eye_diag); | ||||
|     unsigned int ndim   = env().getNd(); | ||||
|  | ||||
|     PropagatorField              tmp1(env().getGrid()); | ||||
|     LatticeComplex               tmp2(env().getGrid()); | ||||
|     std::vector<PropagatorField> S_body(ndim, tmp1); | ||||
|     std::vector<PropagatorField> S_loop(ndim, tmp1); | ||||
|     std::vector<LatticeComplex>  E_body(ndim, tmp2); | ||||
|     std::vector<LatticeComplex>  E_loop(ndim, tmp2); | ||||
|  | ||||
|     // Setup for S-type contractions. | ||||
|     for (int mu = 0; mu < ndim; ++mu) | ||||
|     { | ||||
|         S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu])); | ||||
|         S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu])); | ||||
|     } | ||||
|  | ||||
|     // Perform S-type contractions.     | ||||
|     SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu])) | ||||
|     MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S") | ||||
|  | ||||
|     // Recycle sub-expressions for E-type contractions. | ||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) | ||||
|     { | ||||
|         E_body[mu] = trace(S_body[mu]); | ||||
|         E_loop[mu] = trace(S_loop[mu]); | ||||
|     } | ||||
|  | ||||
|     // Perform E-type contractions. | ||||
|     SUM_MU(expbuf, E_body[mu]*E_loop[mu]) | ||||
|     MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E") | ||||
|  | ||||
|     write(writer, "HW_Eye", result); | ||||
| } | ||||
							
								
								
									
										58
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||
| #define Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         WeakHamiltonianEye                                 * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MContraction) | ||||
|  | ||||
| enum | ||||
| { | ||||
|     S_diag = 0, | ||||
|     E_diag = 1, | ||||
|     n_eye_diag = 2 | ||||
| }; | ||||
|  | ||||
| // Saucer and Eye subdiagram contractions. | ||||
| #define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma) | ||||
| #define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma) | ||||
|  | ||||
| MAKE_WEAK_MODULE(WeakHamiltonianEye) | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ | ||||
							
								
								
									
										139
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,139 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MContraction; | ||||
|  | ||||
| /* | ||||
|  * Weak Hamiltonian current-current contractions, Non-Eye-type. | ||||
|  *  | ||||
|  * These contractions are generated by the Q1 and Q2 operators in the physical | ||||
|  * basis (see e.g. Fig 3 of arXiv:1507.03094). | ||||
|  *  | ||||
|  * Schematic:      | ||||
|  *            q2             q3          |           q2              q3 | ||||
|  *          /--<--¬       /--<--¬        |        /--<--¬         /--<--¬        | ||||
|  *         /       \     /       \       |       /       \       /       \       | ||||
|  *        /         \   /         \      |      /         \     /         \      | ||||
|  *       /           \ /           \     |     /           \   /           \     | ||||
|  *    i *             * H_W         *  f |  i *             * * H_W         * f  | ||||
|  *      \             *             |    |     \           /   \           / | ||||
|  *       \           / \           /     |      \         /     \         /     | ||||
|  *        \         /   \         /      |       \       /       \       /   | ||||
|  *         \       /     \       /       |        \-->--/         \-->--/       | ||||
|  *          \-->--/       \-->--/        |          q1               q4  | ||||
|  *            q1             q4          | | ||||
|  *                Connected (C)          |                 Wing (W) | ||||
|  * | ||||
|  * C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu]) | ||||
|  * W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu]) | ||||
|  *  | ||||
|  */ | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                  TWeakHamiltonianNonEye implementation                     * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name) | ||||
| : Module<WeakHamiltonianPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TWeakHamiltonianNonEye::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TWeakHamiltonianNonEye::setup(void) | ||||
| { | ||||
|  | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TWeakHamiltonianNonEye::execute(void) | ||||
| { | ||||
|     LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"  | ||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||
|                  << "'." << std::endl; | ||||
|      | ||||
|     CorrWriter             writer(par().output); | ||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||
|     LatticeComplex        expbuf(env().getGrid()); | ||||
|     std::vector<TComplex> corrbuf; | ||||
|     std::vector<Result>   result(n_noneye_diag);  | ||||
|     unsigned int ndim   = env().getNd(); | ||||
|  | ||||
|     PropagatorField              tmp1(env().getGrid()); | ||||
|     LatticeComplex               tmp2(env().getGrid()); | ||||
|     std::vector<PropagatorField> C_i_side_loop(ndim, tmp1); | ||||
|     std::vector<PropagatorField> C_f_side_loop(ndim, tmp1); | ||||
|     std::vector<LatticeComplex>  W_i_side_loop(ndim, tmp2); | ||||
|     std::vector<LatticeComplex>  W_f_side_loop(ndim, tmp2); | ||||
|  | ||||
|     // Setup for C-type contractions. | ||||
|     for (int mu = 0; mu < ndim; ++mu) | ||||
|     { | ||||
|         C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu])); | ||||
|         C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu])); | ||||
|     } | ||||
|  | ||||
|     // Perform C-type contractions.     | ||||
|     SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu])) | ||||
|     MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C") | ||||
|  | ||||
|     // Recycle sub-expressions for W-type contractions. | ||||
|     for (unsigned int mu = 0; mu < ndim; ++mu) | ||||
|     { | ||||
|         W_i_side_loop[mu] = trace(C_i_side_loop[mu]); | ||||
|         W_f_side_loop[mu] = trace(C_f_side_loop[mu]); | ||||
|     } | ||||
|  | ||||
|     // Perform W-type contractions. | ||||
|     SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu]) | ||||
|     MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W") | ||||
|  | ||||
|     write(writer, "HW_NonEye", result); | ||||
| } | ||||
| @@ -0,0 +1,57 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||
| #define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         WeakHamiltonianNonEye                              * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MContraction) | ||||
|  | ||||
| enum | ||||
| { | ||||
|     W_diag = 0, | ||||
|     C_diag = 1, | ||||
|     n_noneye_diag = 2 | ||||
| }; | ||||
|  | ||||
| // Wing and Connected subdiagram contractions | ||||
| #define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) | ||||
|  | ||||
| MAKE_WEAK_MODULE(WeakHamiltonianNonEye) | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ | ||||
							
								
								
									
										135
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MContraction; | ||||
|  | ||||
| /* | ||||
|  * Weak Hamiltonian + current contractions, disconnected topology for neutral  | ||||
|  * mesons. | ||||
|  *  | ||||
|  * These contractions are generated by operators Q_1,...,10 of the dS=1 Weak | ||||
|  * Hamiltonian in the physical basis and an additional current J (see e.g.  | ||||
|  * Fig 11 of arXiv:1507.03094). | ||||
|  *  | ||||
|  * Schematic: | ||||
|  *                         | ||||
|  *           q2          q4             q3 | ||||
|  *       /--<--¬     /---<--¬       /---<--¬ | ||||
|  *     /         \ /         \     /        \ | ||||
|  *  i *           * H_W      |  J *          * f | ||||
|  *     \         / \         /     \        / | ||||
|  *      \--->---/   \-------/       \------/ | ||||
|  *          q1  | ||||
|  *  | ||||
|  * options | ||||
|  * - q1: input propagator 1 (string) | ||||
|  * - q2: input propagator 2 (string) | ||||
|  * - q3: input propagator 3 (string), assumed to be sequential propagator  | ||||
|  * - q4: input propagator 4 (string), assumed to be a loop | ||||
|  *  | ||||
|  * type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5) | ||||
|  * type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5) | ||||
|  */ | ||||
|  | ||||
| /******************************************************************************* | ||||
|  *                  TWeakNeutral4ptDisc implementation                         * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name) | ||||
| : Module<WeakHamiltonianPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TWeakNeutral4ptDisc::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TWeakNeutral4ptDisc::setup(void) | ||||
| { | ||||
|  | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TWeakNeutral4ptDisc::execute(void) | ||||
| { | ||||
|     LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"  | ||||
|                  << getName() << "' using quarks '" << par().q1 << "', '"  | ||||
|                  << par().q2 << ", '" << par().q3 << "' and '" << par().q4  | ||||
|                  << "'." << std::endl; | ||||
|  | ||||
|     CorrWriter             writer(par().output); | ||||
|     PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1); | ||||
|     PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2); | ||||
|     PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3); | ||||
|     PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4); | ||||
|     Gamma g5            = Gamma(Gamma::Algebra::Gamma5); | ||||
|     LatticeComplex        expbuf(env().getGrid()); | ||||
|     std::vector<TComplex> corrbuf; | ||||
|     std::vector<Result>   result(n_neut_disc_diag); | ||||
|     unsigned int ndim   = env().getNd(); | ||||
|  | ||||
|     PropagatorField              tmp(env().getGrid()); | ||||
|     std::vector<PropagatorField> meson(ndim, tmp); | ||||
|     std::vector<PropagatorField> loop(ndim, tmp); | ||||
|     LatticeComplex               curr(env().getGrid()); | ||||
|  | ||||
|     // Setup for type 1 contractions. | ||||
|     for (int mu = 0; mu < ndim; ++mu) | ||||
|     { | ||||
|         meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu])); | ||||
|         loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu])); | ||||
|     } | ||||
|     curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5)); | ||||
|  | ||||
|     // Perform type 1 contractions.     | ||||
|     SUM_MU(expbuf, trace(meson[mu]*loop[mu])) | ||||
|     expbuf *= curr; | ||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1") | ||||
|  | ||||
|     // Perform type 2 contractions. | ||||
|     SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu])) | ||||
|     expbuf *= curr; | ||||
|     MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2") | ||||
|  | ||||
|     write(writer, "HW_disc0", result); | ||||
| } | ||||
							
								
								
									
										59
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson    <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||
| #define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         WeakNeutral4ptDisc                                 * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MContraction) | ||||
|  | ||||
| enum | ||||
| { | ||||
|     neut_disc_1_diag = 0, | ||||
|     neut_disc_2_diag = 1, | ||||
|     n_neut_disc_diag = 2 | ||||
| }; | ||||
|  | ||||
| // Neutral 4pt disconnected subdiagram contractions. | ||||
| #define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) | ||||
| #define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma) | ||||
| #define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma)) | ||||
|  | ||||
| MAKE_WEAK_MODULE(WeakNeutral4ptDisc) | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ | ||||
| @@ -1,34 +1,5 @@ | ||||
| /*************************************************************************************
 | ||||
| 
 | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
| 
 | ||||
| Source file: extras/Hadrons/Modules/Quark.hpp | ||||
| 
 | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
| 
 | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
| 
 | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| 
 | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| 
 | ||||
| #ifndef Hadrons_Quark_hpp_ | ||||
| #define Hadrons_Quark_hpp_ | ||||
| #ifndef Hadrons_MFermion_GaugeProp_hpp_ | ||||
| #define Hadrons_MFermion_GaugeProp_hpp_ | ||||
| 
 | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -37,27 +8,29 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
| 
 | ||||
| /******************************************************************************
 | ||||
|  *                               TQuark                                       * | ||||
|  *                                GaugeProp                                   * | ||||
|  ******************************************************************************/ | ||||
| class QuarkPar: Serializable | ||||
| BEGIN_MODULE_NAMESPACE(MFermion) | ||||
| 
 | ||||
| class GaugePropPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, | ||||
|                                     std::string, source, | ||||
|                                     std::string, solver); | ||||
| }; | ||||
| 
 | ||||
| template <typename FImpl> | ||||
| class TQuark: public Module<QuarkPar> | ||||
| class TGaugeProp: public Module<GaugePropPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor
 | ||||
|     TQuark(const std::string name); | ||||
|     TGaugeProp(const std::string name); | ||||
|     // destructor
 | ||||
|     virtual ~TQuark(void) = default; | ||||
|     // dependencies/products
 | ||||
|     virtual ~TGaugeProp(void) = default; | ||||
|     // dependency relation
 | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup
 | ||||
| @@ -69,20 +42,20 @@ private: | ||||
|     SolverFn     *solver_{nullptr}; | ||||
| }; | ||||
| 
 | ||||
| MODULE_REGISTER(Quark, TQuark<FIMPL>); | ||||
| MODULE_REGISTER_NS(GaugeProp, TGaugeProp<FIMPL>, MFermion); | ||||
| 
 | ||||
| /******************************************************************************
 | ||||
|  *                          TQuark implementation                             * | ||||
|  *                      TGaugeProp implementation                             * | ||||
|  ******************************************************************************/ | ||||
| // constructor /////////////////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| TQuark<FImpl>::TQuark(const std::string name) | ||||
| : Module(name) | ||||
| TGaugeProp<FImpl>::TGaugeProp(const std::string name) | ||||
| : Module<GaugePropPar>(name) | ||||
| {} | ||||
| 
 | ||||
| // dependencies/products ///////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TQuark<FImpl>::getInput(void) | ||||
| std::vector<std::string> TGaugeProp<FImpl>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().source, par().solver}; | ||||
|      | ||||
| @@ -90,7 +63,7 @@ std::vector<std::string> TQuark<FImpl>::getInput(void) | ||||
| } | ||||
| 
 | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TQuark<FImpl>::getOutput(void) | ||||
| std::vector<std::string> TGaugeProp<FImpl>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName(), getName() + "_5d"}; | ||||
|      | ||||
| @@ -99,7 +72,7 @@ std::vector<std::string> TQuark<FImpl>::getOutput(void) | ||||
| 
 | ||||
| // setup ///////////////////////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| void TQuark<FImpl>::setup(void) | ||||
| void TGaugeProp<FImpl>::setup(void) | ||||
| { | ||||
|     Ls_ = env().getObjectLs(par().solver); | ||||
|     env().template registerLattice<PropagatorField>(getName()); | ||||
| @@ -111,13 +84,13 @@ void TQuark<FImpl>::setup(void) | ||||
| 
 | ||||
| // execution ///////////////////////////////////////////////////////////////////
 | ||||
| template <typename FImpl> | ||||
| void TQuark<FImpl>::execute(void) | ||||
| void TGaugeProp<FImpl>::execute(void) | ||||
| { | ||||
|     LOG(Message) << "Computing quark propagator '" << getName() << "'" | ||||
|                  << std::endl; | ||||
|     << std::endl; | ||||
|      | ||||
|     FermionField    source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), | ||||
|                     tmp(env().getGrid()); | ||||
|     tmp(env().getGrid()); | ||||
|     std::string     propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); | ||||
|     PropagatorField &prop    = *env().template createLattice<PropagatorField>(propName); | ||||
|     PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source); | ||||
| @@ -128,7 +101,7 @@ void TQuark<FImpl>::execute(void) | ||||
|     } | ||||
|      | ||||
|     LOG(Message) << "Inverting using solver '" << par().solver | ||||
|                  << "' on source '" << par().source << "'" << std::endl; | ||||
|     << "' on source '" << par().source << "'" << std::endl; | ||||
|     for (unsigned int s = 0; s < Ns; ++s) | ||||
|     for (unsigned int c = 0; c < Nc; ++c) | ||||
|     { | ||||
| @@ -170,16 +143,18 @@ void TQuark<FImpl>::execute(void) | ||||
|         if (Ls_ > 1) | ||||
|         { | ||||
|             PropagatorField &p4d = | ||||
|                 *env().template getObject<PropagatorField>(getName()); | ||||
|             *env().template getObject<PropagatorField>(getName()); | ||||
|              | ||||
|             axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); | ||||
|             axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1); | ||||
|             axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); | ||||
|             ExtractSlice(tmp, sol, 0, 0); | ||||
|             FermToProp(p4d, tmp, s, c); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| END_MODULE_NAMESPACE | ||||
| 
 | ||||
| END_HADRONS_NAMESPACE | ||||
| 
 | ||||
| #endif // Hadrons_Quark_hpp_
 | ||||
| #endif // Hadrons_MFermion_GaugeProp_hpp_
 | ||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TLoad::execute(void) | ||||
| { | ||||
|     NerscField  header; | ||||
|     FieldMetaData  header; | ||||
|     std::string fileName = par().file + "." | ||||
|                            + std::to_string(env().getTrajectory()); | ||||
|      | ||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | ||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||
|     NerscIO::readConfiguration(U, header, fileName); | ||||
|     LOG(Message) << "NERSC header:" << std::endl; | ||||
|     dump_nersc_header(header, LOG(Message)); | ||||
|     dump_meta_data(header, LOG(Message)); | ||||
| } | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Load_hpp_ | ||||
| #define Hadrons_Load_hpp_ | ||||
| #ifndef Hadrons_MGauge_Load_hpp_ | ||||
| #define Hadrons_MGauge_Load_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Load_hpp_ | ||||
| #endif // Hadrons_MGauge_Load_hpp_ | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Random_hpp_ | ||||
| #define Hadrons_Random_hpp_ | ||||
| #ifndef Hadrons_MGauge_Random_hpp_ | ||||
| #define Hadrons_MGauge_Random_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Random_hpp_ | ||||
| #endif // Hadrons_MGauge_Random_hpp_ | ||||
|   | ||||
							
								
								
									
										88
									
								
								extras/Hadrons/Modules/MGauge/StochEm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								extras/Hadrons/Modules/MGauge/StochEm.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.cc | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
|  | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MGauge; | ||||
|  | ||||
| /****************************************************************************** | ||||
| *                  TStochEm implementation                             * | ||||
| ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TStochEm::TStochEm(const std::string name) | ||||
| : Module<StochEmPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TStochEm::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TStochEm::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TStochEm::setup(void) | ||||
| { | ||||
|     if (!env().hasRegisteredObject("_" + getName() + "_weight")) | ||||
|     { | ||||
|         env().registerLattice<EmComp>("_" + getName() + "_weight"); | ||||
|     } | ||||
|     env().registerLattice<EmField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TStochEm::execute(void) | ||||
| { | ||||
|     PhotonR photon(par().gauge, par().zmScheme); | ||||
|     EmField &a = *env().createLattice<EmField>(getName()); | ||||
|     EmComp  *w; | ||||
|      | ||||
|     if (!env().hasCreatedObject("_" + getName() + "_weight")) | ||||
|     { | ||||
|         LOG(Message) << "Caching stochatic EM potential weight (gauge: " | ||||
|                      << par().gauge << ", zero-mode scheme: " | ||||
|                      << par().zmScheme << ")..." << std::endl; | ||||
|         w = env().createLattice<EmComp>("_" + getName() + "_weight"); | ||||
|         photon.StochasticWeight(*w); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         w = env().getObject<EmComp>("_" + getName() + "_weight"); | ||||
|     } | ||||
|     LOG(Message) << "Generating stochatic EM potential..." << std::endl; | ||||
|     photon.StochasticField(a, *env().get4dRng(), *w); | ||||
| } | ||||
							
								
								
									
										75
									
								
								extras/Hadrons/Modules/MGauge/StochEm.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								extras/Hadrons/Modules/MGauge/StochEm.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
|  | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef Hadrons_MGauge_StochEm_hpp_ | ||||
| #define Hadrons_MGauge_StochEm_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         StochEm                                 * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MGauge) | ||||
|  | ||||
| class StochEmPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, | ||||
|                                     PhotonR::Gauge,    gauge, | ||||
|                                     PhotonR::ZmScheme, zmScheme); | ||||
| }; | ||||
|  | ||||
| class TStochEm: public Module<StochEmPar> | ||||
| { | ||||
| public: | ||||
|     typedef PhotonR::GaugeField     EmField; | ||||
|     typedef PhotonR::GaugeLinkField EmComp; | ||||
| public: | ||||
|     // constructor | ||||
|     TStochEm(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TStochEm(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MGauge_StochEm_hpp_ | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Unit_hpp_ | ||||
| #define Hadrons_Unit_hpp_ | ||||
| #ifndef Hadrons_MGauge_Unit_hpp_ | ||||
| #define Hadrons_MGauge_Unit_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Unit_hpp_ | ||||
| #endif // Hadrons_MGauge_Unit_hpp_ | ||||
|   | ||||
							
								
								
									
										132
									
								
								extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,132 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp | ||||
|  | ||||
| Copyright (C) 2016 | ||||
|  | ||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MLoop_NoiseLoop_hpp_ | ||||
| #define Hadrons_MLoop_NoiseLoop_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /* | ||||
|   | ||||
|  Noise loop propagator | ||||
|  ----------------------------- | ||||
|  * loop_x = q_x * adj(eta_x) | ||||
|   | ||||
|  * options: | ||||
|  - q = Result of inversion on noise source. | ||||
|  - eta = noise source. | ||||
|  | ||||
|  */ | ||||
|  | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         NoiseLoop                                          * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MLoop) | ||||
|  | ||||
| class NoiseLoopPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar, | ||||
|                                     std::string, q, | ||||
|                                     std::string, eta); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TNoiseLoop: public Module<NoiseLoopPar> | ||||
| { | ||||
| public: | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TNoiseLoop(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TNoiseLoop(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                 TNoiseLoop implementation                                  * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| TNoiseLoop<FImpl>::TNoiseLoop(const std::string name) | ||||
| : Module<NoiseLoopPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TNoiseLoop<FImpl>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().q, par().eta}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TNoiseLoop<FImpl>::setup(void) | ||||
| { | ||||
|     env().template registerLattice<PropagatorField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TNoiseLoop<FImpl>::execute(void) | ||||
| { | ||||
|     PropagatorField &loop = *env().template createLattice<PropagatorField>(getName()); | ||||
|     PropagatorField &q    = *env().template getObject<PropagatorField>(par().q); | ||||
|     PropagatorField &eta  = *env().template getObject<PropagatorField>(par().eta); | ||||
|     loop = q*adj(eta); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MLoop_NoiseLoop_hpp_ | ||||
							
								
								
									
										226
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										226
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,226 @@ | ||||
| #include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MScalar; | ||||
|  | ||||
| /****************************************************************************** | ||||
| *                     TChargedProp implementation                             * | ||||
| ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TChargedProp::TChargedProp(const std::string name) | ||||
| : Module<ChargedPropPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TChargedProp::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().source, par().emField}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TChargedProp::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TChargedProp::setup(void) | ||||
| { | ||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); | ||||
|     phaseName_.clear(); | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         phaseName_.push_back("_shiftphase_" + std::to_string(mu)); | ||||
|     } | ||||
|     GFSrcName_ = "_" + getName() + "_DinvSrc"; | ||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) | ||||
|     { | ||||
|         env().registerLattice<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     if (!env().hasRegisteredObject(phaseName_[0])) | ||||
|     { | ||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|         { | ||||
|             env().registerLattice<ScalarField>(phaseName_[mu]); | ||||
|         } | ||||
|     } | ||||
|     if (!env().hasRegisteredObject(GFSrcName_)) | ||||
|     { | ||||
|         env().registerLattice<ScalarField>(GFSrcName_); | ||||
|     } | ||||
|     env().registerLattice<ScalarField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TChargedProp::execute(void) | ||||
| { | ||||
|     // CACHING ANALYTIC EXPRESSIONS | ||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); | ||||
|     Complex     ci(0.0,1.0); | ||||
|     FFT         fft(env().getGrid()); | ||||
|      | ||||
|     // cache free scalar propagator | ||||
|     if (!env().hasCreatedObject(freeMomPropName_)) | ||||
|     { | ||||
|         LOG(Message) << "Caching momentum space free scalar propagator" | ||||
|                      << " (mass= " << par().mass << ")..." << std::endl; | ||||
|         freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_); | ||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     // cache G*F*src | ||||
|     if (!env().hasCreatedObject(GFSrcName_)) | ||||
|          | ||||
|     { | ||||
|         GFSrc_ = env().createLattice<ScalarField>(GFSrcName_); | ||||
|         fft.FFT_all_dim(*GFSrc_, source, FFT::forward); | ||||
|         *GFSrc_ = (*freeMomProp_)*(*GFSrc_); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         GFSrc_ = env().getObject<ScalarField>(GFSrcName_); | ||||
|     } | ||||
|     // cache phases | ||||
|     if (!env().hasCreatedObject(phaseName_[0])) | ||||
|     { | ||||
|         std::vector<int> &l = env().getGrid()->_fdimensions; | ||||
|          | ||||
|         LOG(Message) << "Caching shift phases..." << std::endl; | ||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|         { | ||||
|             Real    twoPiL = M_PI*2./l[mu]; | ||||
|              | ||||
|             phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu])); | ||||
|             LatticeCoordinate(*(phase_[mu]), mu); | ||||
|             *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|         { | ||||
|             phase_.push_back(env().getObject<ScalarField>(phaseName_[mu])); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // PROPAGATOR CALCULATION | ||||
|     LOG(Message) << "Computing charged scalar propagator" | ||||
|                  << " (mass= " << par().mass | ||||
|                  << ", charge= " << par().charge << ")..." << std::endl; | ||||
|      | ||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); | ||||
|     ScalarField buf(env().getGrid()); | ||||
|     ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; | ||||
|     double      q = par().charge; | ||||
|      | ||||
|     // G*F*Src | ||||
|     prop = GFSrc; | ||||
|  | ||||
|     // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) | ||||
|     buf = GFSrc; | ||||
|     momD1(buf, fft); | ||||
|     buf = G*buf; | ||||
|     prop = prop - q*buf; | ||||
|  | ||||
|     // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) | ||||
|     momD1(buf, fft); | ||||
|     prop = prop + q*q*G*buf; | ||||
|  | ||||
|     // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) | ||||
|     buf = GFSrc; | ||||
|     momD2(buf, fft); | ||||
|     prop = prop - q*q*G*buf; | ||||
|  | ||||
|     // final FT | ||||
|     fft.FFT_all_dim(prop, prop, FFT::backward); | ||||
|      | ||||
|     // OUTPUT IF NECESSARY | ||||
|     if (!par().output.empty()) | ||||
|     { | ||||
|         std::string           filename = par().output + "." + | ||||
|                                          std::to_string(env().getTrajectory()); | ||||
|          | ||||
|         LOG(Message) << "Saving zero-momentum projection to '" | ||||
|                      << filename << "'..." << std::endl; | ||||
|          | ||||
|         CorrWriter            writer(filename); | ||||
|         std::vector<TComplex> vecBuf; | ||||
|         std::vector<Complex>  result; | ||||
|          | ||||
|         sliceSum(prop, vecBuf, Tp); | ||||
|         result.resize(vecBuf.size()); | ||||
|         for (unsigned int t = 0; t < vecBuf.size(); ++t) | ||||
|         { | ||||
|             result[t] = TensorRemove(vecBuf[t]); | ||||
|         } | ||||
|         write(writer, "charge", q); | ||||
|         write(writer, "prop", result); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void TChargedProp::momD1(ScalarField &s, FFT &fft) | ||||
| { | ||||
|     EmField     &A = *env().getObject<EmField>(par().emField); | ||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), | ||||
|                 Amu(env().getGrid()); | ||||
|     Complex     ci(0.0,1.0); | ||||
|  | ||||
|     result = zero; | ||||
|  | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu); | ||||
|         buf = (*phase_[mu])*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); | ||||
|         buf = Amu*buf; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result - ci*buf; | ||||
|     } | ||||
|     fft.FFT_all_dim(s, s, FFT::backward); | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu); | ||||
|         buf = Amu*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result + ci*adj(*phase_[mu])*buf; | ||||
|     } | ||||
|  | ||||
|     s = result; | ||||
| } | ||||
|  | ||||
| void TChargedProp::momD2(ScalarField &s, FFT &fft) | ||||
| { | ||||
|     EmField     &A = *env().getObject<EmField>(par().emField); | ||||
|     ScalarField buf(env().getGrid()), result(env().getGrid()), | ||||
|                 Amu(env().getGrid()); | ||||
|  | ||||
|     result = zero; | ||||
|      | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu); | ||||
|         buf = (*phase_[mu])*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::backward); | ||||
|         buf = Amu*Amu*buf; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result + .5*buf; | ||||
|     } | ||||
|     fft.FFT_all_dim(s, s, FFT::backward); | ||||
|     for (unsigned int mu = 0; mu < env().getNd(); ++mu) | ||||
|     { | ||||
|         Amu = peekLorentz(A, mu);         | ||||
|         buf = Amu*Amu*s; | ||||
|         fft.FFT_all_dim(buf, buf, FFT::forward); | ||||
|         result = result + .5*adj(*phase_[mu])*buf; | ||||
|     } | ||||
|  | ||||
|     s = result; | ||||
| } | ||||
							
								
								
									
										61
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								extras/Hadrons/Modules/MScalar/ChargedProp.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| #ifndef Hadrons_MScalar_ChargedProp_hpp_ | ||||
| #define Hadrons_MScalar_ChargedProp_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       Charged scalar propagator                            * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MScalar) | ||||
|  | ||||
| class ChargedPropPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, | ||||
|                                     std::string, emField, | ||||
|                                     std::string, source, | ||||
|                                     double,      mass, | ||||
|                                     double,      charge, | ||||
|                                     std::string, output); | ||||
| }; | ||||
|  | ||||
| class TChargedProp: public Module<ChargedPropPar> | ||||
| { | ||||
| public: | ||||
|     SCALAR_TYPE_ALIASES(SIMPL,); | ||||
|     typedef PhotonR::GaugeField     EmField; | ||||
|     typedef PhotonR::GaugeLinkField EmComp; | ||||
| public: | ||||
|     // constructor | ||||
|     TChargedProp(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TChargedProp(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| private: | ||||
|     void momD1(ScalarField &s, FFT &fft); | ||||
|     void momD2(ScalarField &s, FFT &fft); | ||||
| private: | ||||
|     std::string                freeMomPropName_, GFSrcName_; | ||||
|     std::vector<std::string>   phaseName_; | ||||
|     ScalarField                *freeMomProp_, *GFSrc_; | ||||
|     std::vector<ScalarField *> phase_; | ||||
|     EmField                    *A; | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MScalar_ChargedProp_hpp_ | ||||
							
								
								
									
										79
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | ||||
| #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> | ||||
| #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Hadrons; | ||||
| using namespace MScalar; | ||||
|  | ||||
| /****************************************************************************** | ||||
| *                        TFreeProp implementation                             * | ||||
| ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| TFreeProp::TFreeProp(const std::string name) | ||||
| : Module<FreePropPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| std::vector<std::string> TFreeProp::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in = {par().source}; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| std::vector<std::string> TFreeProp::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| void TFreeProp::setup(void) | ||||
| { | ||||
|     freeMomPropName_ = FREEMOMPROP(par().mass); | ||||
|      | ||||
|     if (!env().hasRegisteredObject(freeMomPropName_)) | ||||
|     { | ||||
|         env().registerLattice<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     env().registerLattice<ScalarField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TFreeProp::execute(void) | ||||
| { | ||||
|     ScalarField &prop   = *env().createLattice<ScalarField>(getName()); | ||||
|     ScalarField &source = *env().getObject<ScalarField>(par().source); | ||||
|     ScalarField *freeMomProp; | ||||
|  | ||||
|     if (!env().hasCreatedObject(freeMomPropName_)) | ||||
|     { | ||||
|         LOG(Message) << "Caching momentum space free scalar propagator" | ||||
|                      << " (mass= " << par().mass << ")..." << std::endl; | ||||
|         freeMomProp = env().createLattice<ScalarField>(freeMomPropName_); | ||||
|         SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         freeMomProp = env().getObject<ScalarField>(freeMomPropName_); | ||||
|     } | ||||
|     LOG(Message) << "Computing free scalar propagator..." << std::endl; | ||||
|     SIMPL::FreePropagator(source, prop, *freeMomProp); | ||||
|      | ||||
|     if (!par().output.empty()) | ||||
|     { | ||||
|         TextWriter            writer(par().output + "." + | ||||
|                                      std::to_string(env().getTrajectory())); | ||||
|         std::vector<TComplex> buf; | ||||
|         std::vector<Complex>  result; | ||||
|          | ||||
|         sliceSum(prop, buf, Tp); | ||||
|         result.resize(buf.size()); | ||||
|         for (unsigned int t = 0; t < buf.size(); ++t) | ||||
|         { | ||||
|             result[t] = TensorRemove(buf[t]); | ||||
|         } | ||||
|         write(writer, "prop", result); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										50
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								extras/Hadrons/Modules/MScalar/FreeProp.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| #ifndef Hadrons_MScalar_FreeProp_hpp_ | ||||
| #define Hadrons_MScalar_FreeProp_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                               FreeProp                                     * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MScalar) | ||||
|  | ||||
| class FreePropPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, | ||||
|                                     std::string, source, | ||||
|                                     double,      mass, | ||||
|                                     std::string, output); | ||||
| }; | ||||
|  | ||||
| class TFreeProp: public Module<FreePropPar> | ||||
| { | ||||
| public: | ||||
|     SCALAR_TYPE_ALIASES(SIMPL,); | ||||
| public: | ||||
|     // constructor | ||||
|     TFreeProp(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TFreeProp(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| private: | ||||
|     std::string freeMomPropName_; | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MScalar_FreeProp_hpp_ | ||||
							
								
								
									
										6
									
								
								extras/Hadrons/Modules/MScalar/Scalar.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								extras/Hadrons/Modules/MScalar/Scalar.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| #ifndef Hadrons_Scalar_hpp_ | ||||
| #define Hadrons_Scalar_hpp_ | ||||
|  | ||||
| #define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) | ||||
|  | ||||
| #endif // Hadrons_Scalar_hpp_ | ||||
							
								
								
									
										114
									
								
								extras/Hadrons/Modules/MSink/Point.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								extras/Hadrons/Modules/MSink/Point.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| #ifndef Hadrons_MSink_Point_hpp_ | ||||
| #define Hadrons_MSink_Point_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                                   Point                                    * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MSink) | ||||
|  | ||||
| class PointPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, | ||||
|                                     std::string, mom); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TPoint: public Module<PointPar> | ||||
| { | ||||
| public: | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
|     SINK_TYPE_ALIASES(); | ||||
| public: | ||||
|     // constructor | ||||
|     TPoint(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TPoint(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSink); | ||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                          TPoint implementation                             * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| TPoint<FImpl>::TPoint(const std::string name) | ||||
| : Module<PointPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TPoint<FImpl>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TPoint<FImpl>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TPoint<FImpl>::setup(void) | ||||
| { | ||||
|     unsigned int size; | ||||
|      | ||||
|     size = env().template lattice4dSize<LatticeComplex>(); | ||||
|     env().registerObject(getName(), size); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TPoint<FImpl>::execute(void) | ||||
| { | ||||
|     std::vector<Real> p = strToVec<Real>(par().mom); | ||||
|     LatticeComplex    ph(env().getGrid()), coor(env().getGrid()); | ||||
|     Complex           i(0.0,1.0); | ||||
|      | ||||
|     LOG(Message) << "Setting up point sink function for momentum [" | ||||
|                  << par().mom << "]" << std::endl; | ||||
|     ph = zero; | ||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||
|     { | ||||
|         LatticeCoordinate(coor, mu); | ||||
|         ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; | ||||
|     } | ||||
|     ph = exp((Real)(2*M_PI)*i*ph); | ||||
|     auto sink = [ph](const PropagatorField &field) | ||||
|     { | ||||
|         SlicedPropagator res; | ||||
|         PropagatorField  tmp = ph*field; | ||||
|          | ||||
|         sliceSum(tmp, res, Tp); | ||||
|          | ||||
|         return res; | ||||
|     }; | ||||
|     env().setObject(getName(), new SinkFn(sink)); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MSink_Point_hpp_ | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_RBPrecCG_hpp_ | ||||
| #define Hadrons_RBPrecCG_hpp_ | ||||
| #ifndef Hadrons_MSolver_RBPrecCG_hpp_ | ||||
| #define Hadrons_MSolver_RBPrecCG_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -53,7 +53,7 @@ template <typename FImpl> | ||||
| class TRBPrecCG: public Module<RBPrecCGPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TRBPrecCG(const std::string name); | ||||
| @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_RBPrecCG_hpp_ | ||||
| #endif // Hadrons_MSolver_RBPrecCG_hpp_ | ||||
|   | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Point_hpp_ | ||||
| #define Hadrons_Point_hpp_ | ||||
| #ifndef Hadrons_MSource_Point_hpp_ | ||||
| #define Hadrons_MSource_Point_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -63,7 +63,7 @@ template <typename FImpl> | ||||
| class TPoint: public Module<PointPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TPoint(const std::string name); | ||||
| @@ -78,7 +78,8 @@ public: | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource); | ||||
| MODULE_REGISTER_NS(Point,       TPoint<FIMPL>,        MSource); | ||||
| MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       TPoint template implementation                       * | ||||
| @@ -132,4 +133,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Point_hpp_ | ||||
| #endif // Hadrons_MSource_Point_hpp_ | ||||
|   | ||||
| @@ -6,6 +6,7 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp | ||||
|  | ||||
| Copyright (C) 2015 | ||||
| Copyright (C) 2016 | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Antonin Portelli <antonin.portelli@me.com> | ||||
|  | ||||
| @@ -27,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_SeqGamma_hpp_ | ||||
| #define Hadrons_SeqGamma_hpp_ | ||||
| #ifndef Hadrons_MSource_SeqGamma_hpp_ | ||||
| #define Hadrons_MSource_SeqGamma_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -71,7 +72,7 @@ template <typename FImpl> | ||||
| class TSeqGamma: public Module<SeqGammaPar> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FGS_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TSeqGamma(const std::string name); | ||||
| @@ -149,9 +150,9 @@ void TSeqGamma<FImpl>::execute(void) | ||||
|     for(unsigned int mu = 0; mu < env().getNd(); mu++) | ||||
|     { | ||||
|         LatticeCoordinate(coor, mu); | ||||
|         ph = ph + p[mu]*coor; | ||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||
|     } | ||||
|     ph = exp(i*ph); | ||||
|     ph = exp((Real)(2*M_PI)*i*ph); | ||||
|     LatticeCoordinate(t, Tp); | ||||
|     src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); | ||||
| } | ||||
| @@ -160,4 +161,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_SeqGamma_hpp_ | ||||
| #endif // Hadrons_MSource_SeqGamma_hpp_ | ||||
|   | ||||
							
								
								
									
										147
									
								
								extras/Hadrons/Modules/MSource/Wall.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								extras/Hadrons/Modules/MSource/Wall.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
| Source file: extras/Hadrons/Modules/MSource/Wall.hpp | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Andrew Lawson <andrew.lawson1991@gmail.com> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_MSource_WallSource_hpp_ | ||||
| #define Hadrons_MSource_WallSource_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| #include <Grid/Hadrons/ModuleFactory.hpp> | ||||
|  | ||||
| BEGIN_HADRONS_NAMESPACE | ||||
|  | ||||
| /* | ||||
|   | ||||
|  Wall source | ||||
|  ----------------------------- | ||||
|  * src_x = delta(x_3 - tW) * exp(i x.mom) | ||||
|   | ||||
|  * options: | ||||
|  - tW: source timeslice (integer) | ||||
|  - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.") | ||||
|   | ||||
|  */ | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                         Wall                                               * | ||||
|  ******************************************************************************/ | ||||
| BEGIN_MODULE_NAMESPACE(MSource) | ||||
|  | ||||
| class WallPar: Serializable | ||||
| { | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar, | ||||
|                                     unsigned int, tW, | ||||
|                                     std::string, mom); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| class TWall: public Module<WallPar> | ||||
| { | ||||
| public: | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TWall(const std::string name); | ||||
|     // destructor | ||||
|     virtual ~TWall(void) = default; | ||||
|     // dependency relation | ||||
|     virtual std::vector<std::string> getInput(void); | ||||
|     virtual std::vector<std::string> getOutput(void); | ||||
|     // setup | ||||
|     virtual void setup(void); | ||||
|     // execution | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                 TWall implementation                                       * | ||||
|  ******************************************************************************/ | ||||
| // constructor ///////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| TWall<FImpl>::TWall(const std::string name) | ||||
| : Module<WallPar>(name) | ||||
| {} | ||||
|  | ||||
| // dependencies/products /////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TWall<FImpl>::getInput(void) | ||||
| { | ||||
|     std::vector<std::string> in; | ||||
|      | ||||
|     return in; | ||||
| } | ||||
|  | ||||
| template <typename FImpl> | ||||
| std::vector<std::string> TWall<FImpl>::getOutput(void) | ||||
| { | ||||
|     std::vector<std::string> out = {getName()}; | ||||
|      | ||||
|     return out; | ||||
| } | ||||
|  | ||||
| // setup /////////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TWall<FImpl>::setup(void) | ||||
| { | ||||
|     env().template registerLattice<PropagatorField>(getName()); | ||||
| } | ||||
|  | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| template <typename FImpl> | ||||
| void TWall<FImpl>::execute(void) | ||||
| {     | ||||
|     LOG(Message) << "Generating wall source at t = " << par().tW  | ||||
|                  << " with momentum " << par().mom << std::endl; | ||||
|      | ||||
|     PropagatorField &src = *env().template createLattice<PropagatorField>(getName()); | ||||
|     Lattice<iScalar<vInteger>> t(env().getGrid()); | ||||
|     LatticeComplex             ph(env().getGrid()), coor(env().getGrid()); | ||||
|     std::vector<Real>          p; | ||||
|     Complex                    i(0.0,1.0); | ||||
|      | ||||
|     p  = strToVec<Real>(par().mom); | ||||
|     ph = zero; | ||||
|     for(unsigned int mu = 0; mu < Nd; mu++) | ||||
|     { | ||||
|         LatticeCoordinate(coor, mu); | ||||
|         ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); | ||||
|     } | ||||
|     ph = exp((Real)(2*M_PI)*i*ph); | ||||
|     LatticeCoordinate(t, Tp); | ||||
|     src = 1.; | ||||
|     src = where((t == par().tW), src*ph, 0.*src); | ||||
| } | ||||
|  | ||||
| END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_MSource_WallSource_hpp_ | ||||
| @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef Hadrons_Z2_hpp_ | ||||
| #define Hadrons_Z2_hpp_ | ||||
| #ifndef Hadrons_MSource_Z2_hpp_ | ||||
| #define Hadrons_MSource_Z2_hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -67,7 +67,7 @@ template <typename FImpl> | ||||
| class TZ2: public Module<Z2Par> | ||||
| { | ||||
| public: | ||||
|     TYPE_ALIASES(FImpl,); | ||||
|     FERM_TYPE_ALIASES(FImpl,); | ||||
| public: | ||||
|     // constructor | ||||
|     TZ2(const std::string name); | ||||
| @@ -82,7 +82,8 @@ public: | ||||
|     virtual void execute(void); | ||||
| }; | ||||
|  | ||||
| MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource); | ||||
| MODULE_REGISTER_NS(Z2,       TZ2<FIMPL>,        MSource); | ||||
| MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource); | ||||
|  | ||||
| /****************************************************************************** | ||||
|  *                       TZ2 template implementation                          * | ||||
| @@ -148,4 +149,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons_Z2_hpp_ | ||||
| #endif // Hadrons_MSource_Z2_hpp_ | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| #ifndef Hadrons____FILEBASENAME____hpp_ | ||||
| #define Hadrons____FILEBASENAME____hpp_ | ||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons____FILEBASENAME____hpp_ | ||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| #ifndef Hadrons____FILEBASENAME____hpp_ | ||||
| #define Hadrons____FILEBASENAME____hpp_ | ||||
| #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
| #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|  | ||||
| #include <Grid/Hadrons/Global.hpp> | ||||
| #include <Grid/Hadrons/Module.hpp> | ||||
| @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE | ||||
|  | ||||
| END_HADRONS_NAMESPACE | ||||
|  | ||||
| #endif // Hadrons____FILEBASENAME____hpp_ | ||||
| #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ | ||||
|   | ||||
| @@ -1,19 +1,38 @@ | ||||
| modules_cc =\ | ||||
|   Modules/MContraction/WeakHamiltonianEye.cc \ | ||||
|   Modules/MContraction/WeakHamiltonianNonEye.cc \ | ||||
|   Modules/MContraction/WeakNeutral4ptDisc.cc \ | ||||
|   Modules/MGauge/Load.cc \ | ||||
|   Modules/MGauge/Random.cc \ | ||||
|   Modules/MGauge/Unit.cc | ||||
|   Modules/MGauge/StochEm.cc \ | ||||
|   Modules/MGauge/Unit.cc \ | ||||
|   Modules/MScalar/ChargedProp.cc \ | ||||
|   Modules/MScalar/FreeProp.cc | ||||
|  | ||||
| modules_hpp =\ | ||||
|   Modules/MAction/DWF.hpp \ | ||||
|   Modules/MAction/Wilson.hpp \ | ||||
|   Modules/MContraction/Baryon.hpp \ | ||||
|   Modules/MContraction/DiscLoop.hpp \ | ||||
|   Modules/MContraction/Gamma3pt.hpp \ | ||||
|   Modules/MContraction/Meson.hpp \ | ||||
|   Modules/MContraction/WeakHamiltonian.hpp \ | ||||
|   Modules/MContraction/WeakHamiltonianEye.hpp \ | ||||
|   Modules/MContraction/WeakHamiltonianNonEye.hpp \ | ||||
|   Modules/MContraction/WeakNeutral4ptDisc.hpp \ | ||||
|   Modules/MFermion/GaugeProp.hpp \ | ||||
|   Modules/MGauge/Load.hpp \ | ||||
|   Modules/MGauge/Random.hpp \ | ||||
|   Modules/MGauge/StochEm.hpp \ | ||||
|   Modules/MGauge/Unit.hpp \ | ||||
|   Modules/MLoop/NoiseLoop.hpp \ | ||||
|   Modules/MScalar/ChargedProp.hpp \ | ||||
|   Modules/MScalar/FreeProp.hpp \ | ||||
|   Modules/MScalar/Scalar.hpp \ | ||||
|   Modules/MSink/Point.hpp \ | ||||
|   Modules/MSolver/RBPrecCG.hpp \ | ||||
|   Modules/MSource/Point.hpp \ | ||||
|   Modules/MSource/SeqGamma.hpp \ | ||||
|   Modules/MSource/Z2.hpp \ | ||||
|   Modules/Quark.hpp | ||||
|   Modules/MSource/Wall.hpp \ | ||||
|   Modules/MSource/Z2.hpp | ||||
|  | ||||
|   | ||||
							
								
								
									
										11
									
								
								extras/qed-fvol/Global.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								extras/qed-fvol/Global.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| #include <qed-fvol/Global.hpp> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace QCD; | ||||
| using namespace QedFVol; | ||||
|  | ||||
| QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); | ||||
| QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); | ||||
| QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); | ||||
| QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); | ||||
| QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); | ||||
							
								
								
									
										42
									
								
								extras/qed-fvol/Global.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								extras/qed-fvol/Global.hpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| #ifndef QedFVol_Global_hpp_ | ||||
| #define QedFVol_Global_hpp_ | ||||
|  | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| #define BEGIN_QEDFVOL_NAMESPACE \ | ||||
| namespace Grid {\ | ||||
| using namespace QCD;\ | ||||
| namespace QedFVol {\ | ||||
| using Grid::operator<<; | ||||
| #define END_QEDFVOL_NAMESPACE }} | ||||
|  | ||||
| /* the 'using Grid::operator<<;' statement prevents a very nasty compilation | ||||
|  * error with GCC (clang compiles fine without it). | ||||
|  */ | ||||
|  | ||||
| BEGIN_QEDFVOL_NAMESPACE | ||||
|  | ||||
| class QedFVolLogger: public Logger | ||||
| { | ||||
| public: | ||||
|     QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, | ||||
|                                                   GridLogColours, "BLACK"){}; | ||||
| }; | ||||
|  | ||||
| #define LOG(channel) std::cout << QedFVolLog##channel | ||||
| #define QEDFVOL_ERROR(msg)\ | ||||
| LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ | ||||
|            << __LINE__ << ")" << std::endl;\ | ||||
| abort(); | ||||
|  | ||||
| #define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; | ||||
|  | ||||
| extern QedFVolLogger QedFVolLogError; | ||||
| extern QedFVolLogger QedFVolLogWarning; | ||||
| extern QedFVolLogger QedFVolLogMessage; | ||||
| extern QedFVolLogger QedFVolLogIterative; | ||||
| extern QedFVolLogger QedFVolLogDebug; | ||||
|  | ||||
| END_QEDFVOL_NAMESPACE | ||||
|  | ||||
| #endif // QedFVol_Global_hpp_ | ||||
							
								
								
									
										9
									
								
								extras/qed-fvol/Makefile.am
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								extras/qed-fvol/Makefile.am
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| AM_CXXFLAGS += -I$(top_srcdir)/extras | ||||
|  | ||||
| bin_PROGRAMS = qed-fvol | ||||
|  | ||||
| qed_fvol_SOURCES =   \ | ||||
|     qed-fvol.cc      \ | ||||
|     Global.cc | ||||
|  | ||||
| qed_fvol_LDADD   = -lGrid | ||||
							
								
								
									
										265
									
								
								extras/qed-fvol/WilsonLoops.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										265
									
								
								extras/qed-fvol/WilsonLoops.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,265 @@ | ||||
| #ifndef QEDFVOL_WILSONLOOPS_H | ||||
| #define QEDFVOL_WILSONLOOPS_H | ||||
|  | ||||
| #include <Global.hpp> | ||||
|  | ||||
| BEGIN_QEDFVOL_NAMESPACE | ||||
|  | ||||
| template <class Gimpl> class NewWilsonLoops : public Gimpl { | ||||
| public: | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; | ||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; | ||||
|  | ||||
|   ////////////////////////////////////////////////// | ||||
|   // directed plaquette oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U, | ||||
|                            const int mu, const int nu) { | ||||
|     // Annoyingly, must use either scope resolution to find dependent base | ||||
|     // class, | ||||
|     // or this-> ; there is no "this" in a static method. This forces explicit | ||||
|     // Gimpl scope | ||||
|     // resolution throughout the usage in this file, and rather defeats the | ||||
|     // purpose of deriving | ||||
|     // from Gimpl. | ||||
|     plaq = Gimpl::CovShiftBackward( | ||||
|         U[mu], mu, Gimpl::CovShiftBackward( | ||||
|                        U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // trace of directed plaquette oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void traceDirPlaquette(LatticeComplex &plaq, | ||||
|                                 const std::vector<GaugeMat> &U, const int mu, | ||||
|                                 const int nu) { | ||||
|     GaugeMat sp(U[0]._grid); | ||||
|     dirPlaquette(sp, U, mu, nu); | ||||
|     plaq = trace(sp); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all planes of plaquette | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void sitePlaquette(LatticeComplex &Plaq, | ||||
|                             const std::vector<GaugeMat> &U) { | ||||
|     LatticeComplex sitePlaq(U[0]._grid); | ||||
|     Plaq = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceDirPlaquette(sitePlaq, U, mu, nu); | ||||
|         Plaq = Plaq + sitePlaq; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of plaquette | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumPlaquette(const GaugeLorentz &Umu) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Plaq(Umu._grid); | ||||
|  | ||||
|     sitePlaquette(Plaq, U); | ||||
|  | ||||
|     TComplex Tp = sum(Plaq); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of plaquette | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgPlaquette(const GaugeLorentz &Umu) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumplaq = sumPlaquette(Umu); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; | ||||
|     return sumplaq / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////////////////////// | ||||
|   // Wilson loop of size (R1, R2), oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void wilsonLoop(GaugeMat &wl, const std::vector<GaugeMat> &U, | ||||
|                            const int Rmu, const int Rnu, | ||||
|                            const int mu, const int nu) { | ||||
|     wl = U[nu]; | ||||
|  | ||||
|     for(int i = 0; i < Rnu-1; i++){ | ||||
|       wl = Gimpl::CovShiftForward(U[nu], nu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rmu; i++){ | ||||
|       wl = Gimpl::CovShiftForward(U[mu], mu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rnu; i++){ | ||||
|       wl = Gimpl::CovShiftBackward(U[nu], nu, wl); | ||||
|     } | ||||
|  | ||||
|     for(int i = 0; i < Rmu; i++){ | ||||
|       wl = Gimpl::CovShiftBackward(U[mu], mu, wl); | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // trace of Wilson Loop oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void traceWilsonLoop(LatticeComplex &wl, | ||||
|                                 const std::vector<GaugeMat> &U, | ||||
|                                 const int Rmu, const int Rnu, | ||||
|                                 const int mu, const int nu) { | ||||
|     GaugeMat sp(U[0]._grid); | ||||
|     wilsonLoop(sp, U, Rmu, Rnu, mu, nu); | ||||
|     wl = trace(sp); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|     Wl = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over planes of Wilson loop with length R1 | ||||
|   // in the time direction | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteTimelikeWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|  | ||||
|     int ndim = U[0]._grid->_ndimension; | ||||
|  | ||||
|     Wl = zero; | ||||
|     for (int nu = 0; nu < ndim - 1; nu++) { | ||||
|       traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); | ||||
|       Wl = Wl + siteWl; | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum Wilson loop over all planes orthogonal to the time direction | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void siteSpatialWilsonLoop(LatticeComplex &Wl, | ||||
|                             const std::vector<GaugeMat> &U, | ||||
|                             const int R1, const int R2) { | ||||
|     LatticeComplex siteWl(U[0]._grid); | ||||
|  | ||||
|     Wl = zero; | ||||
|     for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
|         traceWilsonLoop(siteWl, U, R1, R2, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|         traceWilsonLoop(siteWl, U, R2, R1, mu, nu); | ||||
|         Wl = Wl + siteWl; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of timelike Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteTimelikeWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all x,y,z,t and over all planes of spatial Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     std::vector<GaugeMat> U(4, Umu._grid); | ||||
|  | ||||
|     for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Wl(Umu._grid); | ||||
|  | ||||
|     siteSpatialWilsonLoop(Wl, U, R1, R2); | ||||
|  | ||||
|     TComplex Tp = sum(Wl); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * ndim * (ndim - 1); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of timelike Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * (ndim - 1); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   // average over all x,y,z,t and over all planes of spatial Wilson loop | ||||
|   ////////////////////////////////////////////////// | ||||
|   static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, | ||||
|                             const int R1, const int R2) { | ||||
|     int ndim = Umu._grid->_ndimension; | ||||
|     Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); | ||||
|     Real vol = Umu._grid->gSites(); | ||||
|     Real faces = 1.0 * (ndim - 1) * (ndim - 2); | ||||
|     return sumWl / vol / faces / Nc; // Nc dependent... FIXME | ||||
|   } | ||||
| }; | ||||
|  | ||||
| END_QEDFVOL_NAMESPACE | ||||
|  | ||||
| #endif // QEDFVOL_WILSONLOOPS_H | ||||
							
								
								
									
										88
									
								
								extras/qed-fvol/qed-fvol.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								extras/qed-fvol/qed-fvol.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| #include <Global.hpp> | ||||
| #include <WilsonLoops.h> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace QCD; | ||||
| using namespace QedFVol; | ||||
|  | ||||
| typedef PeriodicGaugeImpl<QedGimplR>    QedPeriodicGimplR; | ||||
| typedef PhotonR::GaugeField             EmField; | ||||
| typedef PhotonR::GaugeLinkField         EmComp; | ||||
|  | ||||
| const int NCONFIGS = 10; | ||||
| const int NWILSON = 10; | ||||
|  | ||||
| int main(int argc, char *argv[]) | ||||
| { | ||||
|     // parse command line | ||||
|     std::string parameterFileName; | ||||
|      | ||||
|     if (argc < 2) | ||||
|     { | ||||
|         std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]"; | ||||
|         std::cerr << std::endl; | ||||
|         std::exit(EXIT_FAILURE); | ||||
|     } | ||||
|     parameterFileName = argv[1]; | ||||
|      | ||||
|     // initialization | ||||
|     Grid_init(&argc, &argv); | ||||
|     QedFVolLogError.Active(GridLogError.isActive()); | ||||
|     QedFVolLogWarning.Active(GridLogWarning.isActive()); | ||||
|     QedFVolLogMessage.Active(GridLogMessage.isActive()); | ||||
|     QedFVolLogIterative.Active(GridLogIterative.isActive()); | ||||
|     QedFVolLogDebug.Active(GridLogDebug.isActive()); | ||||
|     LOG(Message) << "Grid initialized" << std::endl; | ||||
|      | ||||
|     // QED stuff | ||||
|     std::vector<int> latt_size   = GridDefaultLatt(); | ||||
|     std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); | ||||
|     std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|     GridCartesian    grid(latt_size,simd_layout,mpi_layout); | ||||
|     GridParallelRNG  pRNG(&grid); | ||||
|     PhotonR          photon(PhotonR::Gauge::feynman, | ||||
|                             PhotonR::ZmScheme::qedL); | ||||
|     EmField          a(&grid); | ||||
|     EmField          expA(&grid); | ||||
|  | ||||
|     Complex imag_unit(0, 1); | ||||
|  | ||||
|     Real wlA; | ||||
|     std::vector<Real> logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); | ||||
|  | ||||
|     pRNG.SeedRandomDevice(); | ||||
|  | ||||
|     LOG(Message) << "Wilson loop calculation beginning" << std::endl; | ||||
|     for(int ic = 0; ic < NCONFIGS; ic++){ | ||||
|         LOG(Message) << "Configuration " << ic <<std::endl; | ||||
|         photon.StochasticField(a, pRNG); | ||||
|  | ||||
|         // Exponentiate photon field | ||||
|         expA = exp(imag_unit*a); | ||||
|  | ||||
|         // Calculate Wilson loops | ||||
|         for(int iw=1; iw<=NWILSON; iw++){ | ||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgWilsonLoop(expA, iw, iw) * 3; | ||||
|             logWlAvg[iw-1] -= 2*log(wlA); | ||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgTimelikeWilsonLoop(expA, iw, iw) * 3; | ||||
|             logWlTime[iw-1] -= 2*log(wlA); | ||||
|             wlA = NewWilsonLoops<QedPeriodicGimplR>::avgSpatialWilsonLoop(expA, iw, iw) * 3; | ||||
|             logWlSpace[iw-1] -= 2*log(wlA); | ||||
|         } | ||||
|     } | ||||
|     LOG(Message) << "Wilson loop calculation completed" << std::endl; | ||||
|      | ||||
|     // Calculate Wilson loops | ||||
|     for(int iw=1; iw<=10; iw++){ | ||||
|         LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; | ||||
|         LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; | ||||
|         LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; | ||||
|         LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; | ||||
|     } | ||||
|  | ||||
|     // epilogue | ||||
|     LOG(Message) << "Grid is finalizing now" << std::endl; | ||||
|     Grid_finalize(); | ||||
|      | ||||
|     return EXIT_SUCCESS; | ||||
| } | ||||
| @@ -20,4 +20,17 @@ The simple testcase in this directory is the submitted bug report that encapsula | ||||
| problem. The test case works with icpc and with clang++, but fails consistently on g++ | ||||
| current variants. | ||||
|  | ||||
| Peter | ||||
| Peter | ||||
|  | ||||
|  | ||||
| ************ | ||||
|  | ||||
| Second GCC bug reported, see Issue 100. | ||||
|  | ||||
| https://wandbox.org/permlink/tzssJza6R9XnqANw | ||||
| https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652 | ||||
|  | ||||
| Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the | ||||
| CI test suite. The limitations of Travis runtime limits & weak cores are being shown. | ||||
|  | ||||
| Travis uses 5.4.1 for g++-5. | ||||
|   | ||||
							
								
								
									
										86
									
								
								grid-config.in
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										86
									
								
								grid-config.in
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| #! /bin/sh | ||||
|  | ||||
| prefix=@prefix@ | ||||
| exec_prefix=@exec_prefix@ | ||||
| includedir=@includedir@ | ||||
|  | ||||
| usage() | ||||
| { | ||||
|   cat <<EOF | ||||
| Usage: grid-config [OPTION] | ||||
|  | ||||
| Known values for OPTION are: | ||||
|  | ||||
|   --prefix     show Grid installation prefix | ||||
|   --cxxflags   print pre-processor and compiler flags | ||||
|   --ldflags    print library linking flags | ||||
|   --libs       print library linking information | ||||
|   --summary    print full build summary | ||||
|   --help       display this help and exit | ||||
|   --version    output version information | ||||
|   --git        print git revision | ||||
|  | ||||
| EOF | ||||
|    | ||||
|   exit $1 | ||||
| } | ||||
|  | ||||
| if test $# -eq 0; then | ||||
|   usage 1 | ||||
| fi | ||||
|  | ||||
| cflags=false | ||||
| libs=false | ||||
|  | ||||
| while test $# -gt 0; do | ||||
|   case "$1" in | ||||
|     -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; | ||||
|     *) optarg= ;; | ||||
|   esac | ||||
|    | ||||
|   case "$1" in | ||||
|     --prefix) | ||||
|       echo $prefix | ||||
|     ;; | ||||
|      | ||||
|     --version) | ||||
|       echo @VERSION@ | ||||
|       exit 0 | ||||
|     ;; | ||||
|      | ||||
|     --git) | ||||
|       echo "@GRID_BRANCH@ @GRID_SHA@" | ||||
|       exit 0 | ||||
|     ;; | ||||
|      | ||||
|     --help) | ||||
|       usage 0 | ||||
|     ;; | ||||
|      | ||||
|     --cxxflags) | ||||
|       echo @GRID_CXXFLAGS@ | ||||
|     ;; | ||||
|      | ||||
|     --ldflags) | ||||
|       echo @GRID_LDFLAGS@ | ||||
|     ;; | ||||
|      | ||||
|     --libs) | ||||
|       echo @GRID_LIBS@ | ||||
|     ;; | ||||
|      | ||||
|     --summary) | ||||
|       echo "" | ||||
|       echo "@GRID_SUMMARY@" | ||||
|       echo "" | ||||
|     ;; | ||||
|      | ||||
|     *) | ||||
|       usage | ||||
|       exit 1 | ||||
|     ;; | ||||
|   esac | ||||
|   shift | ||||
| done | ||||
|  | ||||
| exit 0 | ||||
							
								
								
									
										37
									
								
								lib/DisableWarnings.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								lib/DisableWarnings.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/DisableWarnings.h | ||||
|  | ||||
| Copyright (C) 2016 | ||||
|  | ||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef DISABLE_WARNINGS_H | ||||
| #define DISABLE_WARNINGS_H | ||||
|  | ||||
|  //disables and intel compiler specific warning (in json.hpp) | ||||
| #pragma warning disable 488   | ||||
|  | ||||
|  | ||||
| #endif | ||||
| @@ -41,7 +41,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/GridCore.h> | ||||
| #include <Grid/GridQCDcore.h> | ||||
| #include <Grid/qcd/action/Action.h> | ||||
| #include <Grid/qcd/utils/GaugeFix.h> | ||||
| #include <Grid/qcd/smearing/Smearing.h> | ||||
| #include <Grid/parallelIO/MetaData.h> | ||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -38,28 +38,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_BASE_H | ||||
| #define GRID_BASE_H | ||||
|  | ||||
| /////////////////// | ||||
| // Std C++ dependencies | ||||
| /////////////////// | ||||
| #include <cassert> | ||||
| #include <complex> | ||||
| #include <vector> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <random> | ||||
| #include <functional> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <stdio.h> | ||||
| #include <signal.h> | ||||
| #include <ctime> | ||||
| #include <sys/time.h> | ||||
| #include <chrono> | ||||
|  | ||||
| /////////////////// | ||||
| // Grid headers | ||||
| /////////////////// | ||||
| #include "Config.h" | ||||
| #include <Grid/GridStd.h> | ||||
|  | ||||
| #include <Grid/perfmon/Timer.h> | ||||
| #include <Grid/perfmon/PerfCount.h> | ||||
|   | ||||
							
								
								
									
										29
									
								
								lib/GridStd.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								lib/GridStd.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| #ifndef GRID_STD_H | ||||
| #define GRID_STD_H | ||||
|  | ||||
| /////////////////// | ||||
| // Std C++ dependencies | ||||
| /////////////////// | ||||
| #include <cassert> | ||||
| #include <complex> | ||||
| #include <vector> | ||||
| #include <string> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <random> | ||||
| #include <functional> | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <stdio.h> | ||||
| #include <signal.h> | ||||
| #include <ctime> | ||||
| #include <sys/time.h> | ||||
| #include <chrono> | ||||
| #include <zlib.h> | ||||
|  | ||||
| /////////////////// | ||||
| // Grid config | ||||
| /////////////////// | ||||
| #include "Config.h" | ||||
|  | ||||
| #endif /* GRID_STD_H */ | ||||
							
								
								
									
										9
									
								
								lib/Grid_Eigen_Dense.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								lib/Grid_Eigen_Dense.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| #pragma once | ||||
| #if defined __GNUC__ | ||||
| #pragma GCC diagnostic push | ||||
| #pragma GCC diagnostic ignored "-Wdeprecated-declarations" | ||||
| #endif | ||||
| #include <Grid/Eigen/Dense> | ||||
| #if defined __GNUC__ | ||||
| #pragma GCC diagnostic pop | ||||
| #endif | ||||
| @@ -10,8 +10,8 @@ if BUILD_COMMS_MPI3 | ||||
|   extra_sources+=communicator/Communicator_base.cc | ||||
| endif | ||||
|  | ||||
| if BUILD_COMMS_MPI3L | ||||
|   extra_sources+=communicator/Communicator_mpi3_leader.cc | ||||
| if BUILD_COMMS_MPIT | ||||
|   extra_sources+=communicator/Communicator_mpit.cc | ||||
|   extra_sources+=communicator/Communicator_base.cc | ||||
| endif | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|     Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
|     Source file: ./lib/Algorithms.h | ||||
|  | ||||
| @@ -37,6 +37,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/algorithms/approx/Chebyshev.h> | ||||
| #include <Grid/algorithms/approx/Remez.h> | ||||
| #include <Grid/algorithms/approx/MultiShiftFunction.h> | ||||
| #include <Grid/algorithms/approx/Forecast.h> | ||||
|  | ||||
| #include <Grid/algorithms/iterative/ConjugateGradient.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateResidual.h> | ||||
| @@ -44,30 +45,16 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||
|  | ||||
| // Lanczos support | ||||
| #include <Grid/algorithms/iterative/MatrixUtils.h> | ||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | ||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | ||||
| #include <Grid/algorithms/FFT.h> | ||||
|  | ||||
| // Eigen/lanczos | ||||
| // EigCg | ||||
| // MCR | ||||
| // Pcg | ||||
| // Multishift CG | ||||
| // Hdcg | ||||
| // GCR | ||||
| // etc.. | ||||
|  | ||||
| // integrator/Leapfrog | ||||
| // integrator/Omelyan | ||||
| // integrator/ForceGradient | ||||
|  | ||||
| // montecarlo/hmc | ||||
| // montecarlo/rhmc | ||||
| // montecarlo/metropolis | ||||
| // etc... | ||||
|  | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -103,29 +103,32 @@ namespace Grid { | ||||
|     GridBase *CoarseGrid; | ||||
|     GridBase *FineGrid; | ||||
|     std::vector<Lattice<Fobj> > subspace; | ||||
|     int checkerboard; | ||||
|  | ||||
|     Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :  | ||||
|       CoarseGrid(_CoarseGrid), | ||||
|   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  | ||||
|     CoarseGrid(_CoarseGrid), | ||||
|       FineGrid(_FineGrid), | ||||
|       subspace(nbasis,_FineGrid) | ||||
|       subspace(nbasis,_FineGrid), | ||||
|       checkerboard(_checkerboard) | ||||
| 	{ | ||||
| 	}; | ||||
|    | ||||
|     void Orthogonalise(void){ | ||||
|       CoarseScalar InnerProd(CoarseGrid);  | ||||
|       std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl; | ||||
|       blockOrthogonalise(InnerProd,subspace); | ||||
|       std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl; | ||||
|       blockOrthogonalise(InnerProd,subspace); | ||||
|       //      std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl; | ||||
|       //      CheckOrthogonal(); | ||||
|     }  | ||||
|     void CheckOrthogonal(void){ | ||||
|       CoarseVector iProj(CoarseGrid);  | ||||
|       CoarseVector eProj(CoarseGrid);  | ||||
|       Lattice<CComplex> pokey(CoarseGrid); | ||||
|  | ||||
|        | ||||
|       for(int i=0;i<nbasis;i++){ | ||||
| 	blockProject(iProj,subspace[i],subspace); | ||||
|  | ||||
| 	eProj=zero;  | ||||
| 	for(int ss=0;ss<CoarseGrid->oSites();ss++){ | ||||
| 	parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){ | ||||
| 	  eProj._odata[ss](i)=CComplex(1.0); | ||||
| 	} | ||||
| 	eProj=eProj - iProj; | ||||
| @@ -137,6 +140,7 @@ namespace Grid { | ||||
|       blockProject(CoarseVec,FineVec,subspace); | ||||
|     } | ||||
|     void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ | ||||
|       FineVec.checkerboard = subspace[0].checkerboard; | ||||
|       blockPromote(CoarseVec,FineVec,subspace); | ||||
|     } | ||||
|     void CreateSubspaceRandom(GridParallelRNG &RNG){ | ||||
| @@ -147,6 +151,7 @@ namespace Grid { | ||||
|       Orthogonalise(); | ||||
|     } | ||||
|  | ||||
|     /* | ||||
|     virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)  | ||||
|     { | ||||
|       // Run a Lanczos with sloppy convergence | ||||
| @@ -195,7 +200,7 @@ namespace Grid { | ||||
| 	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl; | ||||
| 	} | ||||
|     } | ||||
|  | ||||
|     */ | ||||
|     virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { | ||||
|  | ||||
|       RealD scale; | ||||
|   | ||||
| @@ -230,6 +230,7 @@ namespace Grid { | ||||
|       // Barrel shift and collect global pencil | ||||
|       std::vector<int> lcoor(Nd), gcoor(Nd); | ||||
|       result = source; | ||||
|       int pc = processor_coor[dim]; | ||||
|       for(int p=0;p<processors[dim];p++) { | ||||
|         PARALLEL_REGION | ||||
|         { | ||||
| @@ -240,7 +241,8 @@ namespace Grid { | ||||
|           for(int idx=0;idx<sgrid->lSites();idx++) { | ||||
|             sgrid->LocalIndexToLocalCoor(idx,cbuf); | ||||
|             peekLocalSite(s,result,cbuf); | ||||
|             cbuf[dim]+=p*L; | ||||
| 	    cbuf[dim]+=((pc+p) % processors[dim])*L; | ||||
| 	    //            cbuf[dim]+=p*L; | ||||
|             pokeLocalSite(s,pgbuf,cbuf); | ||||
|           } | ||||
|         } | ||||
| @@ -278,7 +280,6 @@ namespace Grid { | ||||
|       flops+= flops_call*NN; | ||||
|        | ||||
|       // writing out result | ||||
|       int pc = processor_coor[dim]; | ||||
|       PARALLEL_REGION | ||||
|       { | ||||
|         std::vector<int> clbuf(Nd), cgbuf(Nd); | ||||
|   | ||||
| @@ -162,15 +162,10 @@ namespace Grid { | ||||
| 	_Mat.M(in,out); | ||||
|       } | ||||
|       void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
| 	ComplexD dot; | ||||
|  | ||||
| 	_Mat.M(in,out); | ||||
| 	 | ||||
| 	dot= innerProduct(in,out); | ||||
| 	n1=real(dot); | ||||
|  | ||||
| 	dot = innerProduct(out,out); | ||||
| 	n2=real(dot); | ||||
| 	ComplexD dot= innerProduct(in,out); n1=real(dot); | ||||
| 	n2=norm2(out); | ||||
|       } | ||||
|       void HermOp(const Field &in, Field &out){ | ||||
| 	_Mat.M(in,out); | ||||
| @@ -192,10 +187,10 @@ namespace Grid { | ||||
| 	ni=Mpc(in,tmp); | ||||
| 	no=MpcDag(tmp,out); | ||||
|       } | ||||
|       void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
|       virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
| 	MpcDagMpc(in,out,n1,n2); | ||||
|       } | ||||
|       void HermOp(const Field &in, Field &out){ | ||||
|       virtual void HermOp(const Field &in, Field &out){ | ||||
| 	RealD n1,n2; | ||||
| 	HermOpAndNorm(in,out,n1,n2); | ||||
|       } | ||||
| @@ -212,7 +207,6 @@ namespace Grid { | ||||
|       void OpDir  (const Field &in, Field &out,int dir,int disp) { | ||||
| 	assert(0); | ||||
|       } | ||||
|  | ||||
|     }; | ||||
|     template<class Matrix,class Field> | ||||
|       class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { | ||||
| @@ -235,7 +229,7 @@ namespace Grid { | ||||
| 	Field tmp(in._grid); | ||||
|  | ||||
| 	_Mat.MeooeDag(in,tmp); | ||||
| 	_Mat.MooeeInvDag(tmp,out); | ||||
|         _Mat.MooeeInvDag(tmp,out); | ||||
| 	_Mat.MeooeDag(out,tmp); | ||||
|  | ||||
| 	_Mat.MooeeDag(in,out); | ||||
| @@ -270,7 +264,6 @@ namespace Grid { | ||||
| 	return axpy_norm(out,-1.0,tmp,in); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix,class Field> | ||||
|       class SchurDiagTwoOperator :  public SchurOperatorBase<Field> { | ||||
|     protected: | ||||
| @@ -299,6 +292,45 @@ namespace Grid { | ||||
| 	return axpy_norm(out,-1.0,tmp,in); | ||||
|       } | ||||
|     }; | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta | ||||
|     // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ; | ||||
|     template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ; | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     //  Staggered use | ||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     template<class Matrix,class Field> | ||||
|       class SchurStaggeredOperator :  public SchurOperatorBase<Field> { | ||||
|     protected: | ||||
|       Matrix &_Mat; | ||||
|     public: | ||||
|       SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; | ||||
|       virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
| 	n2 = Mpc(in,out); | ||||
| 	ComplexD dot= innerProduct(in,out); | ||||
| 	n1 = real(dot); | ||||
|       } | ||||
|       virtual void HermOp(const Field &in, Field &out){ | ||||
| 	Mpc(in,out); | ||||
|       } | ||||
|       virtual  RealD Mpc      (const Field &in, Field &out) { | ||||
| 	Field tmp(in._grid); | ||||
| 	_Mat.Meooe(in,tmp); | ||||
| 	_Mat.MooeeInv(tmp,out); | ||||
| 	_Mat.Meooe(out,tmp); | ||||
| 	_Mat.Mooee(in,out); | ||||
|         return axpy_norm(out,-1.0,tmp,out); | ||||
|       } | ||||
|       virtual  RealD MpcDag   (const Field &in, Field &out){ | ||||
| 	return Mpc(in,out); | ||||
|       } | ||||
|       virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { | ||||
| 	assert(0);// Never need with staggered | ||||
|       } | ||||
|     }; | ||||
|     template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>; | ||||
|  | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
| @@ -314,6 +346,14 @@ namespace Grid { | ||||
|       virtual void operator() (const Field &in, Field &out) = 0; | ||||
|     }; | ||||
|  | ||||
|     template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | ||||
|     public: | ||||
|       void operator() (const Field &in, Field &out){ | ||||
| 	out = in; | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Base classes for Multishift solvers for operators | ||||
|     ///////////////////////////////////////////////////////////// | ||||
| @@ -336,6 +376,64 @@ namespace Grid { | ||||
|      }; | ||||
|     */ | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Hermitian operator Linear function and operator function | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     template<class Field> | ||||
|       class HermOpOperatorFunction : public OperatorFunction<Field> { | ||||
|       void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
| 	Linop.HermOp(in,out); | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template<typename Field> | ||||
|       class PlainHermOp : public LinearFunction<Field> { | ||||
|     public: | ||||
|       LinearOperatorBase<Field> &_Linop; | ||||
|        | ||||
|       PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)  | ||||
|       {} | ||||
|        | ||||
|       void operator()(const Field& in, Field& out) { | ||||
| 	_Linop.HermOp(in,out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<typename Field> | ||||
|     class FunctionHermOp : public LinearFunction<Field> { | ||||
|     public: | ||||
|       OperatorFunction<Field>   & _poly; | ||||
|       LinearOperatorBase<Field> &_Linop; | ||||
|        | ||||
|       FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)  | ||||
| 	: _poly(poly), _Linop(linop) {}; | ||||
|        | ||||
|       void operator()(const Field& in, Field& out) { | ||||
| 	_poly(_Linop,in,out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|   template<class Field> | ||||
|   class Polynomial : public OperatorFunction<Field> { | ||||
|   private: | ||||
|     std::vector<RealD> Coeffs; | ||||
|   public: | ||||
|     Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; | ||||
|  | ||||
|     // Implement the required interface | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|  | ||||
|       Field AtoN(in._grid); | ||||
|       Field Mtmp(in._grid); | ||||
|       AtoN = in; | ||||
|       out = AtoN*Coeffs[0]; | ||||
|       for(int n=1;n<Coeffs.size();n++){ | ||||
| 	Mtmp = AtoN; | ||||
| 	Linop.HermOp(Mtmp,AtoN); | ||||
| 	out=out+AtoN*Coeffs[n]; | ||||
|       } | ||||
|     }; | ||||
|   }; | ||||
|  | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -8,6 +8,7 @@ | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Christoph Lehner <clehner@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
| @@ -33,41 +34,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Simple general polynomial with user supplied coefficients | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> | ||||
|   class HermOpOperatorFunction : public OperatorFunction<Field> { | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|       Linop.HermOp(in,out); | ||||
|     }; | ||||
|   }; | ||||
|  | ||||
|   template<class Field> | ||||
|   class Polynomial : public OperatorFunction<Field> { | ||||
|   private: | ||||
|     std::vector<RealD> Coeffs; | ||||
|   public: | ||||
|     Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; | ||||
|  | ||||
|     // Implement the required interface | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|  | ||||
|       Field AtoN(in._grid); | ||||
|       Field Mtmp(in._grid); | ||||
|       AtoN = in; | ||||
|       out = AtoN*Coeffs[0]; | ||||
| //            std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl; | ||||
| //            std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl; | ||||
|       for(int n=1;n<Coeffs.size();n++){ | ||||
| 	Mtmp = AtoN; | ||||
| 	Linop.HermOp(Mtmp,AtoN); | ||||
| 	out=out+AtoN*Coeffs[n]; | ||||
| //            std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl; | ||||
| //		std::cout << n<<" " <<norm2(out)<<std::endl; | ||||
|       } | ||||
|     }; | ||||
|   }; | ||||
| struct ChebyParams : Serializable { | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams, | ||||
| 				  RealD, alpha,   | ||||
| 				  RealD, beta,    | ||||
| 				  int, Npoly); | ||||
| }; | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Generic Chebyshev approximations | ||||
| @@ -82,8 +54,10 @@ namespace Grid { | ||||
|  | ||||
|   public: | ||||
|     void csv(std::ostream &out){ | ||||
| 	RealD diff = hi-lo; | ||||
|       for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) { | ||||
|       RealD diff = hi-lo; | ||||
|       RealD delta = (hi-lo)*1.0e-9; | ||||
|       for (RealD x=lo; x<hi; x+=delta) { | ||||
| 	delta*=1.1; | ||||
| 	RealD f = approx(x); | ||||
| 	out<< x<<" "<<f<<std::endl; | ||||
|       } | ||||
| @@ -99,6 +73,7 @@ namespace Grid { | ||||
|     }; | ||||
|  | ||||
|     Chebyshev(){}; | ||||
|     Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);}; | ||||
|     Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);}; | ||||
|     Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);}; | ||||
|  | ||||
| @@ -193,12 +168,54 @@ namespace Grid { | ||||
|       return sum; | ||||
|     }; | ||||
|  | ||||
|     RealD approxD(RealD x) | ||||
|     { | ||||
|       RealD Un; | ||||
|       RealD Unm; | ||||
|       RealD Unp; | ||||
|        | ||||
|       RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo)); | ||||
|        | ||||
|       RealD U0=1; | ||||
|       RealD U1=2*y; | ||||
|        | ||||
|       RealD sum; | ||||
|       sum = Coeffs[1]*U0; | ||||
|       sum+= Coeffs[2]*U1*2.0; | ||||
|        | ||||
|       Un =U1; | ||||
|       Unm=U0; | ||||
|       for(int i=2;i<order-1;i++){ | ||||
| 	Unp=2*y*Un-Unm; | ||||
| 	Unm=Un; | ||||
| 	Un =Unp; | ||||
| 	sum+= Un*Coeffs[i+1]*(i+1.0); | ||||
|       } | ||||
|       return sum/(0.5*(hi-lo)); | ||||
|     }; | ||||
|      | ||||
|     RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) { | ||||
|       RealD x = x0; | ||||
|       RealD eps; | ||||
|        | ||||
|       int i; | ||||
|       for (i=0;i<maxiter;i++) { | ||||
| 	eps = approx(x) - z; | ||||
| 	if (fabs(eps / z) < resid) | ||||
| 	  return x; | ||||
| 	x = x - eps / approxD(x); | ||||
|       } | ||||
|        | ||||
|       return std::numeric_limits<double>::quiet_NaN(); | ||||
|     } | ||||
|      | ||||
|     // Implement the required interface | ||||
|     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { | ||||
|  | ||||
|       GridBase *grid=in._grid; | ||||
| //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | ||||
| //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | ||||
|  | ||||
|       // std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; | ||||
|       //std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; | ||||
|  | ||||
|       int vol=grid->gSites(); | ||||
|  | ||||
|   | ||||
							
								
								
									
										152
									
								
								lib/algorithms/approx/Forecast.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								lib/algorithms/approx/Forecast.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,152 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/approx/Forecast.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: David Murphy <dmurphy@phys.columbia.edu> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #ifndef INCLUDED_FORECAST_H | ||||
| #define INCLUDED_FORECAST_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   // Abstract base class. | ||||
|   // Takes a matrix (Mat), a source (phi), and a vector of Fields (chi) | ||||
|   // and returns a forecasted solution to the system D*psi = phi (psi). | ||||
|   template<class Matrix, class Field> | ||||
|   class Forecast | ||||
|   { | ||||
|     public: | ||||
|       virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0; | ||||
|   }; | ||||
|  | ||||
|   // Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012), | ||||
|   // used to forecast solutions across poles of the EOFA heatbath. | ||||
|   // | ||||
|   // Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C) | ||||
|   template<class Matrix, class Field> | ||||
|   class ChronoForecast : public Forecast<Matrix,Field> | ||||
|   { | ||||
|     public: | ||||
|       Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns) | ||||
|       { | ||||
|         int degree = prev_solns.size(); | ||||
|         Field chi(phi); // forecasted solution | ||||
|  | ||||
|         // Trivial cases | ||||
|         if(degree == 0){ chi = zero; return chi; } | ||||
|         else if(degree == 1){ return prev_solns[0]; } | ||||
|  | ||||
|         RealD dot; | ||||
|         ComplexD xp; | ||||
|         Field r(phi); // residual | ||||
|         Field Mv(phi); | ||||
|         std::vector<Field> v(prev_solns); // orthonormalized previous solutions | ||||
|         std::vector<Field> MdagMv(degree,phi); | ||||
|  | ||||
|         // Array to hold the matrix elements | ||||
|         std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree)); | ||||
|  | ||||
|         // Solution and source vectors | ||||
|         std::vector<ComplexD> a(degree); | ||||
|         std::vector<ComplexD> b(degree); | ||||
|  | ||||
|         // Orthonormalize the vector basis | ||||
|         for(int i=0; i<degree; i++){ | ||||
|           v[i] *= 1.0/std::sqrt(norm2(v[i])); | ||||
|           for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; } | ||||
|         } | ||||
|  | ||||
|         // Perform sparse matrix multiplication and construct rhs | ||||
|         for(int i=0; i<degree; i++){ | ||||
|           b[i] = innerProduct(v[i],phi); | ||||
|           Mat.M(v[i],Mv); | ||||
|           Mat.Mdag(Mv,MdagMv[i]); | ||||
|           G[i][i] = innerProduct(v[i],MdagMv[i]); | ||||
|         } | ||||
|  | ||||
|         // Construct the matrix | ||||
|         for(int j=0; j<degree; j++){ | ||||
|         for(int k=j+1; k<degree; k++){ | ||||
|           G[j][k] = innerProduct(v[j],MdagMv[k]); | ||||
|           G[k][j] = std::conj(G[j][k]); | ||||
|         }} | ||||
|  | ||||
|         // Gauss-Jordan elimination with partial pivoting | ||||
|         for(int i=0; i<degree; i++){ | ||||
|  | ||||
|           // Perform partial pivoting | ||||
|           int k = i; | ||||
|           for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } } | ||||
|           if(k != i){ | ||||
|             xp = b[k]; | ||||
|             b[k] = b[i]; | ||||
|             b[i] = xp; | ||||
|             for(int j=0; j<degree; j++){ | ||||
|               xp = G[k][j]; | ||||
|               G[k][j] = G[i][j]; | ||||
|               G[i][j] = xp; | ||||
|             } | ||||
|           } | ||||
|  | ||||
|           // Convert matrix to upper triangular form | ||||
|           for(int j=i+1; j<degree; j++){ | ||||
|             xp = G[j][i]/G[i][i]; | ||||
|             b[j] -= xp * b[i]; | ||||
|             for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; } | ||||
|           } | ||||
|         } | ||||
|  | ||||
|         // Use Gaussian elimination to solve equations and calculate initial guess | ||||
|         chi = zero; | ||||
|         r = phi; | ||||
|         for(int i=degree-1; i>=0; i--){ | ||||
|           a[i] = 0.0; | ||||
|           for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; } | ||||
|           a[i] = (b[i]-a[i])/G[i][i]; | ||||
|           chi += a[i]*v[i]; | ||||
|           r -= a[i]*MdagMv[i]; | ||||
|         } | ||||
|  | ||||
|         RealD true_r(0.0); | ||||
|         ComplexD tmp; | ||||
|         for(int i=0; i<degree; i++){ | ||||
|           tmp = -b[i]; | ||||
|           for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; } | ||||
|           tmp = std::conj(tmp)*tmp; | ||||
|           true_r += std::sqrt(tmp.real()); | ||||
|         } | ||||
|  | ||||
|         RealD error = std::sqrt(norm2(r)/norm2(phi)); | ||||
|         std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl; | ||||
|  | ||||
|         return chi; | ||||
|       }; | ||||
|   }; | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
| @@ -16,7 +16,7 @@ | ||||
| #define INCLUDED_ALG_REMEZ_H | ||||
|  | ||||
| #include <stddef.h> | ||||
| #include <Config.h> | ||||
| #include <Grid/GridStd.h> | ||||
|  | ||||
| #ifdef HAVE_LIBGMP | ||||
| #include "bigfloat.h" | ||||
|   | ||||
							
								
								
									
										606
									
								
								lib/algorithms/iterative/BlockConjugateGradient.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										606
									
								
								lib/algorithms/iterative/BlockConjugateGradient.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,606 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h | ||||
|  | ||||
| Copyright (C) 2017 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H | ||||
| #define GRID_BLOCK_CONJUGATE_GRADIENT_H | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient. Dimension zero should be the block direction | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| template <class Field> | ||||
| class BlockConjugateGradient : public OperatorFunction<Field> { | ||||
|  public: | ||||
|  | ||||
|  | ||||
|   typedef typename Field::scalar_type scomplex; | ||||
|  | ||||
|   int blockDim ; | ||||
|   int Nblock; | ||||
|  | ||||
|   BlockCGtype CGtype; | ||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|    | ||||
|   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) | ||||
|   {}; | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Thin QR factorisation (google it) | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||
| 		 Eigen::MatrixXcd &C, | ||||
| 		 Eigen::MatrixXcd &Cinv, | ||||
| 		 Field & Q, | ||||
| 		 const Field & R) | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   //Dimensions | ||||
|   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock | ||||
|   // | ||||
|   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) | ||||
|   // | ||||
|   //   Q  C = R => Q = R C^{-1} | ||||
|   // | ||||
|   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  | ||||
|   // | ||||
|   // Set C = L^{dag}, and then Q^dag Q = ident  | ||||
|   // | ||||
|   // Checks: | ||||
|   // Cdag C = Rdag R ; passes. | ||||
|   // QdagQ  = 1      ; passes | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|  | ||||
|   // Force manifest hermitian to avoid rounding related | ||||
|   m_rr = 0.5*(m_rr+m_rr.adjoint()); | ||||
|  | ||||
| #if 0 | ||||
|   std::cout << " Calling Cholesky  ldlt on m_rr "  << m_rr <<std::endl; | ||||
|   Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();  | ||||
|   std::cout << " Called Cholesky  ldlt on m_rr "  << L_ldlt <<std::endl; | ||||
|   auto  D_ldlt = m_rr.ldlt().vectorD();  | ||||
|   std::cout << " Called Cholesky  ldlt on m_rr "  << D_ldlt <<std::endl; | ||||
| #endif | ||||
|  | ||||
|   //  std::cout << " Calling Cholesky  llt on m_rr "  <<std::endl; | ||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||
|   //  std::cout << " Called Cholesky  llt on m_rr "  << L <<std::endl; | ||||
|   C    = L.adjoint(); | ||||
|   Cinv = C.inverse(); | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Q = R C^{-1} | ||||
|   // | ||||
|   // Q_j  = R_i Cinv(i,j)  | ||||
|   // | ||||
|   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); | ||||
| } | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Call one of several implementations | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   if ( CGtype == BlockCGrQ ) { | ||||
|     BlockCGrQsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == BlockCG ) { | ||||
|     BlockCGsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == CGmultiRHS ) { | ||||
|     CGmultiRHSsolve(Linop,Src,Psi); | ||||
|   } else { | ||||
|     assert(0); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| // BlockCGrQ implementation: | ||||
| //-------------------------- | ||||
| // X is guess/Solution | ||||
| // B is RHS | ||||
| // Solve A X_i = B_i    ;        i refers to Nblock index | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = B._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   X.checkerboard = B.checkerboard; | ||||
|   conformable(X, B); | ||||
|  | ||||
|   Field tmp(B); | ||||
|   Field Q(B); | ||||
|   Field D(B); | ||||
|   Field Z(B); | ||||
|   Field AD(B); | ||||
|  | ||||
|   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|  | ||||
|   // Initial residual computation & set up | ||||
|   std::vector<RealD> residuals(Nblock); | ||||
|   std::vector<RealD> ssq(Nblock); | ||||
|  | ||||
|   sliceNorm(ssq,B,Orthog); | ||||
|   RealD sssum=0; | ||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||
|  | ||||
|   sliceNorm(residuals,B,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   sliceNorm(residuals,X,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   /************************************************************************ | ||||
|    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) | ||||
|    ************************************************************************ | ||||
|    * Dimensions: | ||||
|    * | ||||
|    *   X,B==(Nferm x Nblock) | ||||
|    *   A==(Nferm x Nferm) | ||||
|    *   | ||||
|    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site | ||||
|    *  | ||||
|    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|    * for k:  | ||||
|    *   Z  = AD | ||||
|    *   M  = [D^dag Z]^{-1} | ||||
|    *   X  = X + D MC | ||||
|    *   QS = Q - ZM | ||||
|    *   D  = Q + D S^dag | ||||
|    *   C  = S C | ||||
|    */ | ||||
|   /////////////////////////////////////// | ||||
|   // Initial block: initial search dir is guess | ||||
|   /////////////////////////////////////// | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; | ||||
|  | ||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|  | ||||
|   Linop.HermOp(X, AD); | ||||
|   tmp = B - AD;   | ||||
|   //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl; | ||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); | ||||
|   //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl; | ||||
|   //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl; | ||||
|   //std::cout << GridLogMessage << " m_C " << m_C<<std::endl; | ||||
|   //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl; | ||||
|   D=Q; | ||||
|  | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; | ||||
|  | ||||
|   /////////////////////////////////////// | ||||
|   // Timers | ||||
|   /////////////////////////////////////// | ||||
|   GridStopWatch sliceInnerTimer; | ||||
|   GridStopWatch sliceMaddTimer; | ||||
|   GridStopWatch QRTimer; | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch SolverTimer; | ||||
|   SolverTimer.Start(); | ||||
|  | ||||
|   int k; | ||||
|   for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|     //3. Z  = AD | ||||
|     MatrixTimer.Start(); | ||||
|     Linop.HermOp(D, Z);       | ||||
|     MatrixTimer.Stop(); | ||||
|     //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl; | ||||
|  | ||||
|     //4. M  = [D^dag Z]^{-1} | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_M       = m_DZ.inverse(); | ||||
|     //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl; | ||||
|      | ||||
|     //5. X  = X + D MC | ||||
|     m_tmp     = m_M * m_C; | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //6. QS = Q - ZM | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); | ||||
|     sliceMaddTimer.Stop(); | ||||
|     QRTimer.Start(); | ||||
|     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); | ||||
|     QRTimer.Stop(); | ||||
|      | ||||
|     //7. D  = Q + D S^dag | ||||
|     m_tmp = m_S.adjoint(); | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //8. C  = S C | ||||
|     m_C = m_S*m_C; | ||||
|      | ||||
|     /********************* | ||||
|      * convergence monitor | ||||
|      ********************* | ||||
|      */ | ||||
|     m_rr = m_C.adjoint() * m_C; | ||||
|  | ||||
|     RealD max_resid=0; | ||||
|     RealD rrsum=0; | ||||
|     RealD rr; | ||||
|  | ||||
|     for(int b=0;b<Nblock;b++) { | ||||
|       rrsum+=real(m_rr(b,b)); | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||
| 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; | ||||
|  | ||||
|     if ( max_resid < Tolerance*Tolerance ) {  | ||||
|  | ||||
|       SolverTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; | ||||
|  | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(X, AD); | ||||
|       AD = AD-B; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; | ||||
| 	     | ||||
|       IterationsToComplete = k; | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; | ||||
|  | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = Src._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   Psi.checkerboard = Src.checkerboard; | ||||
|   conformable(Psi, Src); | ||||
|  | ||||
|   Field P(Src); | ||||
|   Field AP(Src); | ||||
|   Field R(Src); | ||||
|    | ||||
|   Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   // Initial residual computation & set up | ||||
|   std::vector<RealD> residuals(Nblock); | ||||
|   std::vector<RealD> ssq(Nblock); | ||||
|  | ||||
|   sliceNorm(ssq,Src,Orthog); | ||||
|   RealD sssum=0; | ||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||
|  | ||||
|   sliceNorm(residuals,Src,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   sliceNorm(residuals,Psi,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   // Initial search dir is guess | ||||
|   Linop.HermOp(Psi, AP); | ||||
|    | ||||
|  | ||||
|   /************************************************************************ | ||||
|    * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) | ||||
|    ************************************************************************ | ||||
|    * O'Leary : R = B - A X | ||||
|    * O'Leary : P = M R ; preconditioner M = 1 | ||||
|    * O'Leary : alpha = PAP^{-1} RMR | ||||
|    * O'Leary : beta  = RMR^{-1}_old RMR_new | ||||
|    * O'Leary : X=X+Palpha | ||||
|    * O'Leary : R_new=R_old-AP alpha | ||||
|    * O'Leary : P=MR_new+P beta | ||||
|    */ | ||||
|  | ||||
|   R = Src - AP;   | ||||
|   P = R; | ||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|  | ||||
|   GridStopWatch sliceInnerTimer; | ||||
|   GridStopWatch sliceMaddTimer; | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch SolverTimer; | ||||
|   SolverTimer.Start(); | ||||
|  | ||||
|   int k; | ||||
|   for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|     RealD rrsum=0; | ||||
|     for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b)); | ||||
|  | ||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||
| 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     Linop.HermOp(P, AP); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     // Alpha | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_pAp,P,AP,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_pAp_inv = m_pAp.inverse(); | ||||
|     m_alpha   = m_pAp_inv * m_rr ; | ||||
|  | ||||
|     // Psi, R update | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi | ||||
|     sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     // Beta | ||||
|     m_rr_inv = m_rr.inverse(); | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_beta = m_rr_inv *m_rr; | ||||
|  | ||||
|     // Search update | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(AP,m_beta,P,R,Orthog); | ||||
|     sliceMaddTimer.Stop(); | ||||
|     P= AP; | ||||
|  | ||||
|     /********************* | ||||
|      * convergence monitor | ||||
|      ********************* | ||||
|      */ | ||||
|     RealD max_resid=0; | ||||
|     RealD rr; | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|      | ||||
|     if ( max_resid < Tolerance*Tolerance ) {  | ||||
|  | ||||
|       SolverTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(Psi, AP); | ||||
|       AP = AP-Src; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||
| 	     | ||||
|       IterationsToComplete = k; | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl; | ||||
|  | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||
| // Use this for spread out across nodes | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim | ||||
|   Nblock = Src._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   Psi.checkerboard = Src.checkerboard; | ||||
|   conformable(Psi, Src); | ||||
|  | ||||
|   Field P(Src); | ||||
|   Field AP(Src); | ||||
|   Field R(Src); | ||||
|    | ||||
|   std::vector<ComplexD> v_pAp(Nblock); | ||||
|   std::vector<RealD> v_rr (Nblock); | ||||
|   std::vector<RealD> v_rr_inv(Nblock); | ||||
|   std::vector<RealD> v_alpha(Nblock); | ||||
|   std::vector<RealD> v_beta(Nblock); | ||||
|  | ||||
|   // Initial residual computation & set up | ||||
|   std::vector<RealD> residuals(Nblock); | ||||
|   std::vector<RealD> ssq(Nblock); | ||||
|  | ||||
|   sliceNorm(ssq,Src,Orthog); | ||||
|   RealD sssum=0; | ||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||
|  | ||||
|   sliceNorm(residuals,Src,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   sliceNorm(residuals,Psi,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   // Initial search dir is guess | ||||
|   Linop.HermOp(Psi, AP); | ||||
|  | ||||
|   R = Src - AP;   | ||||
|   P = R; | ||||
|   sliceNorm(v_rr,R,Orthog); | ||||
|  | ||||
|   GridStopWatch sliceInnerTimer; | ||||
|   GridStopWatch sliceMaddTimer; | ||||
|   GridStopWatch sliceNormTimer; | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch SolverTimer; | ||||
|  | ||||
|   SolverTimer.Start(); | ||||
|   int k; | ||||
|   for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|     RealD rrsum=0; | ||||
|     for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]); | ||||
|  | ||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||
| 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; | ||||
|  | ||||
|     MatrixTimer.Start(); | ||||
|     Linop.HermOp(P, AP); | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     // Alpha | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductVector(v_pAp,P,AP,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       v_alpha[b] = v_rr[b]/real(v_pAp[b]); | ||||
|     } | ||||
|  | ||||
|     // Psi, R update | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddVector(Psi,v_alpha, P,Psi,Orthog);     // add alpha *  P to psi | ||||
|     sliceMaddVector(R  ,v_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     // Beta | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       v_rr_inv[b] = 1.0/v_rr[b]; | ||||
|     } | ||||
|     sliceNormTimer.Start(); | ||||
|     sliceNorm(v_rr,R,Orthog); | ||||
|     sliceNormTimer.Stop(); | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       v_beta[b] = v_rr_inv[b] *v_rr[b]; | ||||
|     } | ||||
|  | ||||
|     // Search update | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddVector(P,v_beta,P,R,Orthog); | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     /********************* | ||||
|      * convergence monitor | ||||
|      ********************* | ||||
|      */ | ||||
|     RealD max_resid=0; | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       RealD rr = v_rr[b]/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|      | ||||
|     if ( max_resid < Tolerance*Tolerance ) {  | ||||
|  | ||||
|       SolverTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(Psi, AP); | ||||
|       AP = AP-Src; | ||||
|       std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tNorm       " << sliceNormTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||
|  | ||||
|  | ||||
|       IterationsToComplete = k; | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl; | ||||
|  | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
|  | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
| @@ -52,8 +52,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|         MaxIterations(maxit), | ||||
|         ErrorOnNoConverge(err_on_no_conv){}; | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, | ||||
|                   Field &psi) { | ||||
|   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) { | ||||
|  | ||||
|     psi.checkerboard = src.checkerboard; | ||||
|     conformable(psi, src); | ||||
|  | ||||
| @@ -78,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|     cp = a; | ||||
|     ssq = norm2(src); | ||||
|  | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:   mmp " << b << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:     p " << a << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   mmp " << b << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:     p " << a << std::endl; | ||||
|  | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
| @@ -98,9 +92,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq | ||||
|               << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(8) | ||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|     GridStopWatch LinalgTimer; | ||||
|     GridStopWatch MatrixTimer; | ||||
| @@ -130,8 +123,11 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|       p = p * b + r; | ||||
|  | ||||
|       LinalgTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||
|                 << " residual " << cp << " target " << rsq << std::endl; | ||||
|       std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl; | ||||
|       std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl; | ||||
|  | ||||
|       // Stopping condition | ||||
|       if (cp <= rsq) { | ||||
| @@ -139,32 +135,33 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|         Linop.HermOpAndNorm(psi, mmp, d, qq); | ||||
|         p = mmp - src; | ||||
|  | ||||
|         RealD mmpnorm = sqrt(norm2(mmp)); | ||||
|         RealD psinorm = sqrt(norm2(psi)); | ||||
|         RealD srcnorm = sqrt(norm2(src)); | ||||
|         RealD resnorm = sqrt(norm2(p)); | ||||
|         RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
|         std::cout << GridLogMessage | ||||
|                   << "ConjugateGradient: Converged on iteration " << k << std::endl; | ||||
|         std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq) | ||||
|                   << " true residual " << true_residual << " target " | ||||
|                   << Tolerance << std::endl; | ||||
|         std::cout << GridLogMessage << "Time elapsed: Iterations " | ||||
|                   << SolverTimer.Elapsed() << " Matrix  " | ||||
|                   << MatrixTimer.Elapsed() << " Linalg " | ||||
|                   << LinalgTimer.Elapsed(); | ||||
|         std::cout << std::endl; | ||||
|         std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; | ||||
|         std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; | ||||
|  | ||||
|         std::cout << GridLogMessage << "Time breakdown "<<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||
|  | ||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||
|  | ||||
| 	IterationsToComplete = k;	 | ||||
|  | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|     std::cout << GridLogMessage << "ConjugateGradient did NOT converge" | ||||
|               << std::endl; | ||||
|  | ||||
|     if (ErrorOnNoConverge) assert(0); | ||||
|     IterationsToComplete = k; | ||||
|  | ||||
|   } | ||||
| }; | ||||
| } | ||||
|   | ||||
							
								
								
									
										256
									
								
								lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										256
									
								
								lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,256 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Christopher Kelly <ckelly@phys.columbia.edu> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H | ||||
| #define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  | ||||
|   class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> { | ||||
|   public: | ||||
|     bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||
|     // Defaults true. | ||||
|     RealD Tolerance; | ||||
|     Integer MaxIterations; | ||||
|     Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|     Integer ReliableUpdatesPerformed; | ||||
|  | ||||
|     bool DoFinalCleanup; //Final DP cleanup, defaults to true | ||||
|     Integer IterationsToCleanup; //Final DP cleanup step iterations | ||||
|      | ||||
|     LinearOperatorBase<FieldF> &Linop_f; | ||||
|     LinearOperatorBase<FieldD> &Linop_d; | ||||
|     GridBase* SinglePrecGrid; | ||||
|     RealD Delta; //reliable update parameter | ||||
|  | ||||
|     //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single | ||||
|     LinearOperatorBase<FieldF> *Linop_fallback; | ||||
|     RealD fallback_transition_tol; | ||||
|  | ||||
|      | ||||
|     ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true) | ||||
|       : Tolerance(tol), | ||||
|         MaxIterations(maxit), | ||||
| 	Delta(_delta), | ||||
| 	Linop_f(_Linop_f), | ||||
| 	Linop_d(_Linop_d), | ||||
| 	SinglePrecGrid(_sp_grid), | ||||
|         ErrorOnNoConverge(err_on_no_conv), | ||||
| 	DoFinalCleanup(true), | ||||
| 	Linop_fallback(NULL) | ||||
|     {}; | ||||
|  | ||||
|     void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){ | ||||
|       Linop_fallback = &_Linop_fallback; | ||||
|       fallback_transition_tol = _fallback_transition_tol;       | ||||
|     } | ||||
|      | ||||
|     void operator()(const FieldD &src, FieldD &psi) { | ||||
|       LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f; | ||||
|       bool using_fallback = false; | ||||
|        | ||||
|       psi.checkerboard = src.checkerboard; | ||||
|       conformable(psi, src); | ||||
|  | ||||
|       RealD cp, c, a, d, b, ssq, qq, b_pred; | ||||
|  | ||||
|       FieldD p(src); | ||||
|       FieldD mmp(src); | ||||
|       FieldD r(src); | ||||
|  | ||||
|       // Initial residual computation & set up | ||||
|       RealD guess = norm2(psi); | ||||
|       assert(std::isnan(guess) == 0); | ||||
|      | ||||
|       Linop_d.HermOpAndNorm(psi, mmp, d, b); | ||||
|      | ||||
|       r = src - mmp; | ||||
|       p = r; | ||||
|  | ||||
|       a = norm2(p); | ||||
|       cp = a; | ||||
|       ssq = norm2(src); | ||||
|  | ||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl; | ||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:   src " << ssq << std::endl; | ||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:    mp " << d << std::endl; | ||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:   mmp " << b << std::endl; | ||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:  cp,r " << cp << std::endl; | ||||
|       std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:     p " << a << std::endl; | ||||
|  | ||||
|       RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
|       // Check if guess is really REALLY good :) | ||||
|       if (cp <= rsq) { | ||||
| 	std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n"; | ||||
| 	std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; | ||||
| 	return; | ||||
|       } | ||||
|  | ||||
|       //Single prec initialization | ||||
|       FieldF r_f(SinglePrecGrid); | ||||
|       r_f.checkerboard = r.checkerboard; | ||||
|       precisionChange(r_f, r); | ||||
|  | ||||
|       FieldF psi_f(r_f); | ||||
|       psi_f = zero; | ||||
|  | ||||
|       FieldF p_f(r_f); | ||||
|       FieldF mmp_f(r_f); | ||||
|  | ||||
|       RealD MaxResidSinceLastRelUp = cp; //initial residual     | ||||
|      | ||||
|       std::cout << GridLogIterative << std::setprecision(4) | ||||
| 		<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; | ||||
|  | ||||
|       GridStopWatch LinalgTimer; | ||||
|       GridStopWatch MatrixTimer; | ||||
|       GridStopWatch SolverTimer; | ||||
|  | ||||
|       SolverTimer.Start(); | ||||
|       int k = 0; | ||||
|       int l = 0; | ||||
|      | ||||
|       for (k = 1; k <= MaxIterations; k++) { | ||||
| 	c = cp; | ||||
|  | ||||
| 	MatrixTimer.Start(); | ||||
| 	Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq); | ||||
| 	MatrixTimer.Stop(); | ||||
|  | ||||
| 	LinalgTimer.Start(); | ||||
|  | ||||
| 	a = c / d; | ||||
| 	b_pred = a * (a * qq - d) / c; | ||||
|  | ||||
| 	cp = axpy_norm(r_f, -a, mmp_f, r_f); | ||||
| 	b = cp / c; | ||||
|  | ||||
| 	// Fuse these loops ; should be really easy | ||||
| 	psi_f = a * p_f + psi_f; | ||||
| 	//p_f = p_f * b + r_f; | ||||
|  | ||||
| 	LinalgTimer.Stop(); | ||||
|  | ||||
| 	std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k | ||||
| 		  << " residual " << cp << " target " << rsq << std::endl; | ||||
| 	std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl; | ||||
| 	std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl; | ||||
|  | ||||
| 	if(cp > MaxResidSinceLastRelUp){ | ||||
| 	  std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl; | ||||
| 	  MaxResidSinceLastRelUp = cp; | ||||
| 	} | ||||
| 	   | ||||
| 	// Stopping condition | ||||
| 	if (cp <= rsq) { | ||||
| 	  //Although not written in the paper, I assume that I have to add on the final solution | ||||
| 	  precisionChange(mmp, psi_f); | ||||
| 	  psi = psi + mmp; | ||||
| 	 | ||||
| 	 | ||||
| 	  SolverTimer.Stop(); | ||||
| 	  Linop_d.HermOpAndNorm(psi, mmp, d, qq); | ||||
| 	  p = mmp - src; | ||||
|  | ||||
| 	  RealD srcnorm = sqrt(norm2(src)); | ||||
| 	  RealD resnorm = sqrt(norm2(p)); | ||||
| 	  RealD true_residual = resnorm / srcnorm; | ||||
|  | ||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl; | ||||
| 	  std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; | ||||
| 	  std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; | ||||
| 	  std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; | ||||
|  | ||||
| 	  std::cout << GridLogMessage << "Time breakdown "<<std::endl; | ||||
| 	  std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; | ||||
| 	  std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; | ||||
| 	  std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; | ||||
|  | ||||
| 	  IterationsToComplete = k;	 | ||||
| 	  ReliableUpdatesPerformed = l; | ||||
| 	   | ||||
| 	  if(DoFinalCleanup){ | ||||
| 	    //Do a final CG to cleanup | ||||
| 	    std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n"; | ||||
| 	    ConjugateGradient<FieldD> CG(Tolerance,MaxIterations); | ||||
| 	    CG.ErrorOnNoConverge = ErrorOnNoConverge; | ||||
| 	    CG(Linop_d,src,psi); | ||||
| 	    IterationsToCleanup = CG.IterationsToComplete; | ||||
| 	  } | ||||
| 	  else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||
|  | ||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n"; | ||||
| 	  return; | ||||
| 	} | ||||
| 	else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update | ||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate " | ||||
| 		    << cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n"; | ||||
| 	  precisionChange(mmp, psi_f); | ||||
| 	  psi = psi + mmp; | ||||
|  | ||||
| 	  Linop_d.HermOpAndNorm(psi, mmp, d, qq); | ||||
| 	  r = src - mmp; | ||||
|  | ||||
| 	  psi_f = zero; | ||||
| 	  precisionChange(r_f, r); | ||||
| 	  cp = norm2(r); | ||||
| 	  MaxResidSinceLastRelUp = cp; | ||||
|  | ||||
| 	  b = cp/c; | ||||
| 	   | ||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl; | ||||
| 	   | ||||
| 	  l = l+1; | ||||
| 	} | ||||
|  | ||||
| 	p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence | ||||
|  | ||||
| 	if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){ | ||||
| 	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl; | ||||
| 	  Linop_f_use = Linop_fallback; | ||||
| 	  using_fallback = true; | ||||
| 	} | ||||
|  | ||||
| 	 | ||||
|       } | ||||
|       std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge" | ||||
| 		<< std::endl; | ||||
|        | ||||
|       if (ErrorOnNoConverge) assert(0); | ||||
|       IterationsToComplete = k; | ||||
|       ReliableUpdatesPerformed = l;       | ||||
|     }     | ||||
|   }; | ||||
|  | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| #endif | ||||
| @@ -1,137 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/DenseMatrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_DENSE_MATRIX_H | ||||
| #define GRID_DENSE_MATRIX_H | ||||
|  | ||||
| namespace Grid { | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Matrix untils | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|  | ||||
| template<class T> using DenseVector = std::vector<T>; | ||||
| template<class T> using DenseMatrix = DenseVector<DenseVector<T> >; | ||||
|  | ||||
| template<class T> void Size(DenseVector<T> & vec, int &N)  | ||||
| {  | ||||
|   N= vec.size(); | ||||
| } | ||||
| template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)  | ||||
| {  | ||||
|   N= mat.size(); | ||||
|   M= mat[0].size(); | ||||
| } | ||||
|  | ||||
| template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)  | ||||
| {  | ||||
|   int M; Size(mat,N,M); | ||||
|   assert(N==M); | ||||
| } | ||||
|  | ||||
| template<class T> void Resize(DenseVector<T > & mat, int N) {  | ||||
|   mat.resize(N); | ||||
| } | ||||
| template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {  | ||||
|   mat.resize(N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     mat[i].resize(M); | ||||
|   } | ||||
| } | ||||
| template<class T> void Fill(DenseMatrix<T> & mat, T&val) {  | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   for(int i=0;i<N;i++){ | ||||
|   for(int j=0;j<M;j++){ | ||||
|     mat[i][j] = val; | ||||
|   }} | ||||
| } | ||||
|  | ||||
| /** Transpose of a matrix **/ | ||||
| template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){ | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   DenseMatrix<T> C; Resize(C,M,N); | ||||
|   for(int i=0;i<M;i++){ | ||||
|   for(int j=0;j<N;j++){ | ||||
|     C[i][j] = mat[j][i]; | ||||
|   }}  | ||||
|   return C; | ||||
| } | ||||
| /** Set DenseMatrix to unit matrix **/ | ||||
| template<class T> void Unity(DenseMatrix<T> &A){ | ||||
|   int N;  SizeSquare(A,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     for(int j=0;j<N;j++){ | ||||
|       if ( i==j ) A[i][j] = 1; | ||||
|       else        A[i][j] = 0; | ||||
|     }  | ||||
|   }  | ||||
| } | ||||
|  | ||||
| /** Add C * I to matrix **/ | ||||
| template<class T> | ||||
| void PlusUnit(DenseMatrix<T> & A,T c){ | ||||
|   int dim;  SizeSquare(A,dim); | ||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  | ||||
| } | ||||
|  | ||||
| /** return the Hermitian conjugate of matrix **/ | ||||
| template<class T> | ||||
| DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){ | ||||
|  | ||||
|   int dim; SizeSquare(mat,dim); | ||||
|  | ||||
|   DenseMatrix<T> C; Resize(C,dim,dim); | ||||
|  | ||||
|   for(int i=0;i<dim;i++){ | ||||
|     for(int j=0;j<dim;j++){ | ||||
|       C[i][j] = conj(mat[j][i]); | ||||
|     }  | ||||
|   }  | ||||
|   return C; | ||||
| } | ||||
| /**Get a square submatrix**/ | ||||
| template <class T> | ||||
| DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end) | ||||
| { | ||||
|   DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st); | ||||
|  | ||||
|   for(int i = row_st; i<row_end; i++){ | ||||
|   for(int j = col_st; j<col_end; j++){ | ||||
|     H[i-row_st][j-col_st]=A[i][j]; | ||||
|   }} | ||||
|   return H; | ||||
| } | ||||
|  | ||||
| } | ||||
|  | ||||
| #include "Householder.h" | ||||
| #include "Francis.h" | ||||
|  | ||||
| #endif | ||||
|  | ||||
| @@ -1,81 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/EigenSort.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_EIGENSORT_H | ||||
| #define GRID_EIGENSORT_H | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Eigen sorter to begin with | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|  | ||||
| template<class Field> | ||||
| class SortEigen { | ||||
|  private: | ||||
|    | ||||
| //hacking for testing for now | ||||
|  private: | ||||
|   static bool less_lmd(RealD left,RealD right){ | ||||
|     return left > right; | ||||
|   }   | ||||
|   static bool less_pair(std::pair<RealD,Field const*>& left, | ||||
|                         std::pair<RealD,Field const*>& right){ | ||||
|     return left.first > (right.first); | ||||
|   }   | ||||
|    | ||||
|    | ||||
|  public: | ||||
|  | ||||
|   void push(DenseVector<RealD>& lmd, | ||||
|             DenseVector<Field>& evec,int N) { | ||||
|     DenseVector<Field> cpy(lmd.size(),evec[0]._grid); | ||||
|     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; | ||||
|      | ||||
|     DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());     | ||||
|     for(int i=0;i<lmd.size();++i) | ||||
|       emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); | ||||
|  | ||||
|     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); | ||||
|  | ||||
|     typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); | ||||
|     for(int i=0;i<N;++i){ | ||||
|       lmd[i]=it->first; | ||||
|       evec[i]=*(it->second); | ||||
|       ++it; | ||||
|     } | ||||
|   } | ||||
|   void push(DenseVector<RealD>& lmd,int N) { | ||||
|     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); | ||||
|   } | ||||
|   bool saturated(RealD lmd, RealD thrs) { | ||||
|     return fabs(lmd) > fabs(thrs); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
| @@ -1,525 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/Francis.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef FRANCIS_H | ||||
| #define FRANCIS_H | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <stdexcept> | ||||
| #include <fstream> | ||||
| #include <complex> | ||||
| #include <algorithm> | ||||
|  | ||||
| //#include <timer.h> | ||||
| //#include <lapacke.h> | ||||
| //#include <Eigen/Dense> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||
| template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||
|  | ||||
| /** | ||||
|   Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. | ||||
| H = | ||||
|       x  x  x  x  x  x  x  x  x | ||||
|       x  x  x  x  x  x  x  x  x | ||||
|       0  x  x  x  x  x  x  x  x | ||||
|       0  0  x  x  x  x  x  x  x | ||||
|       0  0  0  x  x  x  x  x  x | ||||
|       0  0  0  0  x  x  x  x  x | ||||
|       0  0  0  0  0  x  x  x  x | ||||
|       0  0  0  0  0  0  x  x  x | ||||
|       0  0  0  0  0  0  0  x  x | ||||
| Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. | ||||
| **/ | ||||
| template <class T> | ||||
| int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||
| { | ||||
|   DenseMatrix<T> H = Hin;  | ||||
|  | ||||
|   int N ; SizeSquare(H,N); | ||||
|   int M = N; | ||||
|  | ||||
|   Fill(evals,0); | ||||
|   Fill(evecs,0); | ||||
|  | ||||
|   T s,t,x=0,y=0,z=0; | ||||
|   T u,d; | ||||
|   T apd,amd,bc; | ||||
|   DenseVector<T> p(N,0); | ||||
|   T nrm = Norm(H);    ///DenseMatrix Norm | ||||
|   int n, m; | ||||
|   int e = 0; | ||||
|   int it = 0; | ||||
|   int tot_it = 0; | ||||
|   int l = 0; | ||||
|   int r = 0; | ||||
|   DenseMatrix<T> P; Resize(P,N,N); Unity(P); | ||||
|   DenseVector<int> trows(N,0); | ||||
|  | ||||
|   /// Check if the matrix is really hessenberg, if not abort | ||||
|   RealD sth = 0; | ||||
|   for(int j=0;j<N;j++){ | ||||
|     for(int i=j+2;i<N;i++){ | ||||
|       sth = abs(H[i][j]); | ||||
|       if(sth > small){ | ||||
| 	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; | ||||
| 	exit(1); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   do{ | ||||
|     std::cout << "Francis QR Step N = " << N << std::endl; | ||||
|     /** Check for convergence | ||||
|       x  x  x  x  x | ||||
|       0  x  x  x  x | ||||
|       0  0  x  x  x | ||||
|       0  0  x  x  x | ||||
|       0  0  0  0  x | ||||
|       for this matrix l = 4 | ||||
|      **/ | ||||
|     do{ | ||||
|       l = Chop_subdiag(H,nrm,e,small); | ||||
|       r = 0;    ///May have converged on more than one eval | ||||
|       ///Single eval | ||||
|       if(l == N-1){ | ||||
|         evals[e] = H[l][l]; | ||||
|         N--; e++; r++; it = 0; | ||||
|       } | ||||
|       ///RealD eval | ||||
|       if(l == N-2){ | ||||
|         trows[l+1] = 1;    ///Needed for UTSolve | ||||
|         apd = H[l][l] + H[l+1][l+1]; | ||||
|         amd = H[l][l] - H[l+1][l+1]; | ||||
|         bc =  (T)4.0*H[l+1][l]*H[l][l+1]; | ||||
|         evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) ); | ||||
|         evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); | ||||
|         N-=2; e+=2; r++; it = 0; | ||||
|       } | ||||
|     } while(r>0); | ||||
|  | ||||
|     if(N ==0) break; | ||||
|  | ||||
|     DenseVector<T > ck; Resize(ck,3); | ||||
|     DenseVector<T> v;   Resize(v,3); | ||||
|  | ||||
|     for(int m = N-3; m >= l; m--){ | ||||
|       ///Starting vector essentially random shift. | ||||
|       if(it%10 == 0 && N >= 3 && it > 0){ | ||||
|         s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||
|         t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||
|         z = H[m+1][m]*H[m+2][m+1]; | ||||
|       } | ||||
|       ///Starting vector implicit Q theorem | ||||
|       else{ | ||||
|         s = (H[N-2][N-2] + H[N-1][N-1]); | ||||
|         t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); | ||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||
|         z = H[m+1][m]*H[m+2][m+1]; | ||||
|       } | ||||
|       ck[0] = x; ck[1] = y; ck[2] = z; | ||||
|  | ||||
|       if(m == l) break; | ||||
|  | ||||
|       /** Some stupid thing from numerical recipies, seems to work**/ | ||||
|       // PAB.. for heaven's sake quote page, purpose, evidence it works. | ||||
|       //       what sort of comment is that!?!?!? | ||||
|       u=abs(H[m][m-1])*(abs(y)+abs(z)); | ||||
|       d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); | ||||
|       if ((T)abs(u+d) == (T)abs(d) ){ | ||||
| 	l = m; break; | ||||
|       } | ||||
|  | ||||
|       //if (u < small){l = m; break;} | ||||
|     } | ||||
|     if(it > 100000){ | ||||
|      std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; | ||||
|      std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|     normalize(ck);    ///Normalization cancels in PHP anyway | ||||
|     T beta; | ||||
|     Householder_vector<T >(ck, 0, 2, v, beta); | ||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,0); | ||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,1); | ||||
|     ///Accumulate eigenvector | ||||
|     Householder_mult<T >(P,v,beta,0,l,l+2,1); | ||||
|     int sw = 0;      ///Are we on the last row? | ||||
|     for(int k=l;k<N-2;k++){ | ||||
|       x = H[k+1][k]; | ||||
|       y = H[k+2][k]; | ||||
|       z = (T)0.0; | ||||
|       if(k+3 <= N-1){ | ||||
| 	z = H[k+3][k]; | ||||
|       } else{ | ||||
| 	sw = 1;  | ||||
| 	v[2] = (T)0.0; | ||||
|       } | ||||
|       ck[0] = x; ck[1] = y; ck[2] = z; | ||||
|       normalize(ck); | ||||
|       Householder_vector<T >(ck, 0, 2-sw, v, beta); | ||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0); | ||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1); | ||||
|       ///Accumulate eigenvector | ||||
|       Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1); | ||||
|     } | ||||
|     it++; | ||||
|     tot_it++; | ||||
|   }while(N > 1); | ||||
|   N = evals.size(); | ||||
|   ///Annoying - UT solves in reverse order; | ||||
|   DenseVector<T> tmp; Resize(tmp,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     tmp[i] = evals[N-i-1]; | ||||
|   }  | ||||
|   evals = tmp; | ||||
|   UTeigenvectors(H, trows, evals, evecs); | ||||
|   for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);} | ||||
|   return tot_it; | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||
| { | ||||
|   /** | ||||
|   Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. | ||||
|   H = | ||||
|   x  x  0  0  0  0 | ||||
|   x  x  x  0  0  0 | ||||
|   0  x  x  x  0  0 | ||||
|   0  0  x  x  x  0 | ||||
|   0  0  0  x  x  x | ||||
|   0  0  0  0  x  x | ||||
|   Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/ | ||||
|   return my_Wilkinson(Hin, evals, evecs, small, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol) | ||||
| { | ||||
|   int N; SizeSquare(Hin,N); | ||||
|   int M = N; | ||||
|  | ||||
|   ///I don't want to modify the input but matricies must be passed by reference | ||||
|   //Scale a matrix by its "norm" | ||||
|   //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm); | ||||
|   DenseMatrix<T> H;  H = Hin; | ||||
|    | ||||
|   RealD Hnorm = abs(Norm(Hin)); | ||||
|   H = H * (1.0 / Hnorm); | ||||
|  | ||||
|   // TODO use openmp and memset | ||||
|   Fill(evals,0); | ||||
|   Fill(evecs,0); | ||||
|  | ||||
|   T s, t, x = 0, y = 0, z = 0; | ||||
|   T u, d; | ||||
|   T apd, amd, bc; | ||||
|   DenseVector<T> p; Resize(p,N); Fill(p,0); | ||||
|  | ||||
|   T nrm = Norm(H);    ///DenseMatrix Norm | ||||
|   int n, m; | ||||
|   int e = 0; | ||||
|   int it = 0; | ||||
|   int tot_it = 0; | ||||
|   int l = 0; | ||||
|   int r = 0; | ||||
|   DenseMatrix<T> P; Resize(P,N,N); | ||||
|   Unity(P); | ||||
|   DenseVector<int> trows(N, 0); | ||||
|   /// Check if the matrix is really symm tridiag | ||||
|   RealD sth = 0; | ||||
|   for(int j = 0; j < N; ++j) | ||||
|   { | ||||
|     for(int i = j + 2; i < N; ++i) | ||||
|     { | ||||
|       if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) | ||||
|       { | ||||
| 	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; | ||||
| 	std::cout << "Warning tridiagonalize and call again" << std::endl; | ||||
|         // exit(1); // see what is going on | ||||
|         //return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   do{ | ||||
|     do{ | ||||
|       //Jasper | ||||
|       //Check if the subdiagonal term is small enough (<small) | ||||
|       //if true then it is converged. | ||||
|       //check start from H.dim - e - 1 | ||||
|       //How to deal with more than 2 are converged? | ||||
|       //What if Chop_symm_subdiag return something int the middle? | ||||
|       //-------------- | ||||
|       l = Chop_symm_subdiag(H,nrm, e, small); | ||||
|       r = 0;    ///May have converged on more than one eval | ||||
|       //Jasper | ||||
|       //In this case | ||||
|       // x  x  0  0  0  0 | ||||
|       // x  x  x  0  0  0 | ||||
|       // 0  x  x  x  0  0 | ||||
|       // 0  0  x  x  x  0 | ||||
|       // 0  0  0  x  x  0 | ||||
|       // 0  0  0  0  0  x  <- l | ||||
|       //-------------- | ||||
|       ///Single eval | ||||
|       if(l == N - 1) | ||||
|       { | ||||
|         evals[e] = H[l][l]; | ||||
|         N--; | ||||
|         e++; | ||||
|         r++; | ||||
|         it = 0; | ||||
|       } | ||||
|       //Jasper | ||||
|       // x  x  0  0  0  0 | ||||
|       // x  x  x  0  0  0 | ||||
|       // 0  x  x  x  0  0 | ||||
|       // 0  0  x  x  0  0 | ||||
|       // 0  0  0  0  x  x  <- l | ||||
|       // 0  0  0  0  x  x | ||||
|       //-------------- | ||||
|       ///RealD eval | ||||
|       if(l == N - 2) | ||||
|       { | ||||
|         trows[l + 1] = 1;    ///Needed for UTSolve | ||||
|         apd = H[l][l] + H[l + 1][ l + 1]; | ||||
|         amd = H[l][l] - H[l + 1][l + 1]; | ||||
|         bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1]; | ||||
|         evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc)); | ||||
|         evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc)); | ||||
|         N -= 2; | ||||
|         e += 2; | ||||
|         r++; | ||||
|         it = 0; | ||||
|       } | ||||
|     }while(r > 0); | ||||
|     //Jasper | ||||
|     //Already converged | ||||
|     //-------------- | ||||
|     if(N == 0) break; | ||||
|  | ||||
|     DenseVector<T> ck,v; Resize(ck,2); Resize(v,2); | ||||
|  | ||||
|     for(int m = N - 3; m >= l; m--) | ||||
|     { | ||||
|       ///Starting vector essentially random shift. | ||||
|       if(it%10 == 0 && N >= 3 && it > 0) | ||||
|       { | ||||
|         t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); | ||||
|         x = H[m][m] - t; | ||||
|         z = H[m + 1][m]; | ||||
|       } else { | ||||
|       ///Starting vector implicit Q theorem | ||||
|         d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; | ||||
|         t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]  | ||||
| 	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); | ||||
|         x = H[m][m] - t; | ||||
|         z = H[m + 1][m]; | ||||
|       } | ||||
|       //Jasper | ||||
|       //why it is here???? | ||||
|       //----------------------- | ||||
|       if(m == l) | ||||
|         break; | ||||
|  | ||||
|       u = abs(H[m][m - 1]) * (abs(y) + abs(z)); | ||||
|       d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); | ||||
|       if ((T)abs(u + d) == (T)abs(d)) | ||||
|       { | ||||
|         l = m; | ||||
|         break; | ||||
|       } | ||||
|     } | ||||
|     //Jasper | ||||
|     if(it > 1000000) | ||||
|     { | ||||
|       std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; | ||||
|       std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|     // | ||||
|     T s, c; | ||||
|     Givens_calc<T>(x, z, c, s); | ||||
|     Givens_mult<T>(H, l, l + 1, c, -s, 0); | ||||
|     Givens_mult<T>(H, l, l + 1, c,  s, 1); | ||||
|     Givens_mult<T>(P, l, l + 1, c,  s, 1); | ||||
|     // | ||||
|     for(int k = l; k < N - 2; ++k) | ||||
|     { | ||||
|       x = H.A[k + 1][k]; | ||||
|       z = H.A[k + 2][k]; | ||||
|       Givens_calc<T>(x, z, c, s); | ||||
|       Givens_mult<T>(H, k + 1, k + 2, c, -s, 0); | ||||
|       Givens_mult<T>(H, k + 1, k + 2, c,  s, 1); | ||||
|       Givens_mult<T>(P, k + 1, k + 2, c,  s, 1); | ||||
|     } | ||||
|     it++; | ||||
|     tot_it++; | ||||
|   }while(N > 1); | ||||
|  | ||||
|   N = evals.size(); | ||||
|   ///Annoying - UT solves in reverse order; | ||||
|   DenseVector<T> tmp(N); | ||||
|   for(int i = 0; i < N; ++i) | ||||
|     tmp[i] = evals[N-i-1]; | ||||
|   evals = tmp; | ||||
|   // | ||||
|   UTeigenvectors(H, trows, evals, evecs); | ||||
|   //UTSymmEigenvectors(H, trows, evals, evecs); | ||||
|   for(int i = 0; i < evals.size(); ++i) | ||||
|   { | ||||
|     evecs[i] = P * evecs[i]; | ||||
|     normalize(evecs[i]); | ||||
|     evals[i] = evals[i] * Hnorm; | ||||
|   } | ||||
|   // // FIXME this is to test | ||||
|   // Hin.write("evecs3", evecs); | ||||
|   // Hin.write("evals3", evals); | ||||
|   // // check rsd | ||||
|   // for(int i = 0; i < M; i++) { | ||||
|   //   vector<T> Aevec = Hin * evecs[i]; | ||||
|   //   RealD norm2(0.); | ||||
|   //   for(int j = 0; j < M; j++) { | ||||
|   //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); | ||||
|   //   } | ||||
|   // } | ||||
|   return tot_it; | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||
|  | ||||
|   /** | ||||
|   turn a matrix A = | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   into | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   0  x  x  x  x | ||||
|   0  0  x  x  x | ||||
|   0  0  0  x  x | ||||
|   with householder rotations | ||||
|   Slow. | ||||
|   */ | ||||
|   int N ; SizeSquare(A,N); | ||||
|   DenseVector<T > p; Resize(p,N); Fill(p,0); | ||||
|  | ||||
|   for(int k=start;k<N-2;k++){ | ||||
|     //cerr << "hess" << k << std::endl; | ||||
|     DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); | ||||
|     for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column | ||||
|     normalize(ck);    ///Normalization cancels in PHP anyway | ||||
|     T beta; | ||||
|     Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector | ||||
|     Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA | ||||
|     Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H | ||||
|     ///Accumulate eigenvector | ||||
|     Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H | ||||
|   } | ||||
|   /*for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|     A(0,k,l); | ||||
|     } | ||||
|     }*/ | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||
| ///Tridiagonalize a matrix | ||||
|   int N; SizeSquare(A,N); | ||||
|   Hess(A,Q,start); | ||||
|   /*for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|     A(0,l,k); | ||||
|     } | ||||
|     }*/ | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void ForceTridiagonal(DenseMatrix<T> &A){ | ||||
| ///Tridiagonalize a matrix | ||||
|   int N ; SizeSquare(A,N); | ||||
|   for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|       A[l][k]=0; | ||||
|       A[k][l]=0; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   ///Solve a symmetric eigensystem, not necessarily in tridiagonal form | ||||
|   int N; SizeSquare(Ain,N); | ||||
|   DenseMatrix<T > A; A = Ain; | ||||
|   DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q); | ||||
|   Tri(A,Q,0); | ||||
|   int it = my_Wilkinson<T>(A, evals, evecs, small); | ||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||
|   return it; | ||||
| } | ||||
|  | ||||
|  | ||||
| template <class T> | ||||
| int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   return my_Wilkinson(Ain, evals, evecs, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   return my_SymmEigensystem(Ain, evals, evecs, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
| ///Solve a general eigensystem, not necessarily in tridiagonal form | ||||
|   int N = Ain.dim; | ||||
|   DenseMatrix<T > A(N); A = Ain; | ||||
|   DenseMatrix<T > Q(N);Q.Unity(); | ||||
|   Hess(A,Q,0); | ||||
|   int it = QReigensystem<T>(A, evals, evecs, small); | ||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||
|   return it; | ||||
| } | ||||
|  | ||||
| } | ||||
| #endif | ||||
| @@ -1,242 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/Householder.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef HOUSEHOLDER_H | ||||
| #define HOUSEHOLDER_H | ||||
|  | ||||
| #define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
| #define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
| #define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <stdexcept> | ||||
| #include <fstream> | ||||
| #include <complex> | ||||
| #include <algorithm> | ||||
|  | ||||
| namespace Grid { | ||||
| /** Comparison function for finding the max element in a vector **/ | ||||
| template <class T> bool cf(T i, T j) {  | ||||
|   return abs(i) < abs(j);  | ||||
| } | ||||
|  | ||||
| /**  | ||||
| 	Calculate a real Givens angle  | ||||
|  **/ | ||||
| template <class T> inline void Givens_calc(T y, T z, T &c, T &s){ | ||||
|  | ||||
|   RealD mz = (RealD)abs(z); | ||||
|    | ||||
|   if(mz==0.0){ | ||||
|     c = 1; s = 0; | ||||
|   } | ||||
|   if(mz >= (RealD)abs(y)){ | ||||
|     T t = -y/z; | ||||
|     s = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||
|     c = s * t; | ||||
|   } else { | ||||
|     T t = -z/y; | ||||
|     c = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||
|     s = c * t; | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir) | ||||
| { | ||||
|   int q ; SizeSquare(A,q); | ||||
|  | ||||
|   if(dir == 0){ | ||||
|     for(int j=0;j<q;j++){ | ||||
|       T nu = A[i][j]; | ||||
|       T w  = A[k][j]; | ||||
|       A[i][j] = (c*nu + s*w); | ||||
|       A[k][j] = (-s*nu + c*w); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if(dir == 1){ | ||||
|     for(int j=0;j<q;j++){ | ||||
|       T nu = A[j][i]; | ||||
|       T w  = A[j][k]; | ||||
|       A[j][i] = (c*nu - s*w); | ||||
|       A[j][k] = (s*nu + c*w); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	from input = x; | ||||
| 	Compute the complex Householder vector, v, such that | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
|  | ||||
| 	P | x |    | x | k = 0 | ||||
| 	| x |    | 0 |  | ||||
| 	| x | =  | 0 | | ||||
| 	| x |    | 0 | j = 3 | ||||
| 	| x |	   | x | | ||||
|  | ||||
| 	These are the "Unreduced" Householder vectors. | ||||
|  | ||||
|  **/ | ||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta) | ||||
| { | ||||
|   int N ; Size(input,N); | ||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> ); | ||||
|  | ||||
|   if(abs(m) > 0.0){ | ||||
|     T alpha = 0; | ||||
|  | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = input[i]/m; | ||||
|       alpha = alpha + v[i]*conj(v[i]); | ||||
|     } | ||||
|     alpha = sqrt(alpha); | ||||
|     beta = (T)1.0/(alpha*(alpha + abs(v[k]) )); | ||||
|  | ||||
|     if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha; | ||||
|     else                 v[k] = -alpha; | ||||
|   } else{ | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = 0.0; | ||||
|     }  | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	from input = x; | ||||
| 	Compute the complex Householder vector, v, such that | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
|  | ||||
| 	Px = alpha*e_dir | ||||
|  | ||||
| 	These are the "Unreduced" Householder vectors. | ||||
|  | ||||
|  **/ | ||||
|  | ||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta) | ||||
| { | ||||
|   int N = input.size(); | ||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); | ||||
|    | ||||
|   if(abs(m) > 0.0){ | ||||
|     T alpha = 0; | ||||
|  | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = input[i]/m; | ||||
|       alpha = alpha + v[i]*conj(v[i]); | ||||
|     } | ||||
|      | ||||
|     alpha = sqrt(alpha); | ||||
|     beta = 1.0/(alpha*(alpha + abs(v[dir]) )); | ||||
| 	 | ||||
|     if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; | ||||
|     else                  v[dir] = -alpha; | ||||
|   }else{ | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = 0.0; | ||||
|     }  | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	Compute the product PA if trans = 0 | ||||
| 	AP if trans = 1 | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
| 	start at element l of matrix A | ||||
| 	v is of length j - k + 1 of v are nonzero | ||||
|  **/ | ||||
|  | ||||
| template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans) | ||||
| { | ||||
|   int N ; SizeSquare(A,N); | ||||
|  | ||||
|   if(abs(beta) > 0.0){ | ||||
|     for(int p=l; p<N; p++){ | ||||
|       T s = 0; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p]; | ||||
| 	s *= beta; | ||||
| 	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);} | ||||
|       } else { | ||||
| 	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];} | ||||
| 	s *= beta; | ||||
| 	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);} | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	Compute the product PA if trans = 0 | ||||
| 	AP if trans = 1 | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
| 	start at element l of matrix A | ||||
| 	v is of length j - k + 1 of v are nonzero | ||||
| 	A is tridiagonal | ||||
|  **/ | ||||
| template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans) | ||||
| { | ||||
|   if(abs(beta) > 0.0){ | ||||
|  | ||||
|     int N ; SizeSquare(A,N); | ||||
|  | ||||
|     DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);  | ||||
|  | ||||
|     T s; | ||||
|     for(int p=l; p<M; p++){ | ||||
|       s = 0; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i]; | ||||
|       } | ||||
|       s = beta*s; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]); | ||||
|       } | ||||
|     } | ||||
|     for(int p=l; p<M; p++){ | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| } | ||||
| #endif | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										352
									
								
								lib/algorithms/iterative/LocalCoherenceLanczos.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										352
									
								
								lib/algorithms/iterative/LocalCoherenceLanczos.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,352 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Christoph Lehner <clehner@bnl.gov> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_LOCAL_COHERENCE_IRL_H | ||||
| #define GRID_LOCAL_COHERENCE_IRL_H | ||||
| namespace Grid {  | ||||
| struct LanczosParams : Serializable { | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, | ||||
| 				  ChebyParams, Cheby,/*Chebyshev*/ | ||||
| 				  int, Nstop,    /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ | ||||
| 				  int, Nk,       /*Vecs in Lanczos seek converge*/ | ||||
| 				  int, Nm,       /*Total vecs in Lanczos include restart*/ | ||||
| 				  RealD, resid,  /*residual*/ | ||||
|  				  int, MaxIt,  | ||||
| 				  RealD, betastp,  /* ? */ | ||||
| 				  int, MinRes);    // Must restart | ||||
| }; | ||||
|  | ||||
| struct LocalCoherenceLanczosParams : Serializable { | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, | ||||
| 				  bool, doFine, | ||||
| 				  bool, doFineRead, | ||||
| 				  bool, doCoarse, | ||||
| 	       			  bool, doCoarseRead, | ||||
| 				  LanczosParams, FineParams, | ||||
| 				  LanczosParams, CoarseParams, | ||||
| 				  ChebyParams,   Smoother, | ||||
| 				  RealD        , coarse_relax_tol, | ||||
| 				  std::vector<int>, blockSize, | ||||
| 				  std::string, config, | ||||
| 				  std::vector < std::complex<double>  >, omega, | ||||
| 				  RealD, mass, | ||||
| 				  RealD, M5); | ||||
| }; | ||||
|  | ||||
| // Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj>          FineField; | ||||
|  | ||||
|   LinearOperatorBase<FineField> &_Linop; | ||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; | ||||
|  | ||||
|   ProjectedHermOp(LinearOperatorBase<FineField>& linop,  Aggregation<Fobj,CComplex,nbasis> &aggregate) :  | ||||
|     _Linop(linop), | ||||
|     _Aggregate(aggregate)  {  }; | ||||
|  | ||||
|   void operator()(const CoarseField& in, CoarseField& out) { | ||||
|  | ||||
|     GridBase *FineGrid = _Aggregate.FineGrid; | ||||
|     FineField fin(FineGrid); | ||||
|     FineField fout(FineGrid); | ||||
|  | ||||
|     _Aggregate.PromoteFromSubspace(in,fin);    std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl; | ||||
|     _Linop.HermOp(fin,fout);                   std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl; | ||||
|     _Aggregate.ProjectToSubspace(out,fout);    std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj>          FineField; | ||||
|  | ||||
|  | ||||
|   OperatorFunction<FineField>   & _poly; | ||||
|   LinearOperatorBase<FineField> &_Linop; | ||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; | ||||
|  | ||||
|   ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,  | ||||
| 			  Aggregation<Fobj,CComplex,nbasis> &aggregate) :  | ||||
|     _poly(poly), | ||||
|     _Linop(linop), | ||||
|     _Aggregate(aggregate)  {  }; | ||||
|  | ||||
|   void operator()(const CoarseField& in, CoarseField& out) { | ||||
|  | ||||
|     GridBase *FineGrid = _Aggregate.FineGrid; | ||||
|  | ||||
|     FineField fin(FineGrid) ;fin.checkerboard  =_Aggregate.checkerboard; | ||||
|     FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; | ||||
|      | ||||
|     _Aggregate.PromoteFromSubspace(in,fin);    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl; | ||||
|     _poly(_Linop,fin,fout);                    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl; | ||||
|     _Aggregate.ProjectToSubspace(out,fout);    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class ImplicitlyRestartedLanczosSmoothedTester  : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > > | ||||
| { | ||||
|  public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<Fobj>          FineField; | ||||
|  | ||||
|   LinearFunction<CoarseField> & _Poly; | ||||
|   OperatorFunction<FineField>   & _smoother; | ||||
|   LinearOperatorBase<FineField> &_Linop; | ||||
|   Aggregation<Fobj,CComplex,nbasis> &_Aggregate; | ||||
|   RealD                             _coarse_relax_tol; | ||||
|   ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField>   &Poly, | ||||
| 					   OperatorFunction<FineField>   &smoother, | ||||
| 					   LinearOperatorBase<FineField> &Linop, | ||||
| 					   Aggregation<Fobj,CComplex,nbasis> &Aggregate, | ||||
| 					   RealD coarse_relax_tol=5.0e3)  | ||||
|     : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol)  {    }; | ||||
|  | ||||
|   int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) | ||||
|   { | ||||
|     CoarseField v(B); | ||||
|     RealD eval_poly = eval; | ||||
|     // Apply operator | ||||
|     _Poly(B,v); | ||||
|  | ||||
|     RealD vnum = real(innerProduct(B,v)); // HermOp. | ||||
|     RealD vden = norm2(B); | ||||
|     RealD vv0  = norm2(v); | ||||
|     eval   = vnum/vden; | ||||
|     v -= eval*B; | ||||
|  | ||||
|     RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); | ||||
|  | ||||
|     std::cout.precision(13); | ||||
|     std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] " | ||||
| 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" | ||||
| 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv | ||||
| 	     <<std::endl; | ||||
|  | ||||
|     int conv=0; | ||||
|     if( (vv<eresid*eresid) ) conv = 1; | ||||
|     return conv; | ||||
|   } | ||||
|   int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) | ||||
|   { | ||||
|     GridBase *FineGrid = _Aggregate.FineGrid; | ||||
|  | ||||
|     int checkerboard   = _Aggregate.checkerboard; | ||||
|  | ||||
|     FineField fB(FineGrid);fB.checkerboard =checkerboard; | ||||
|     FineField fv(FineGrid);fv.checkerboard =checkerboard; | ||||
|  | ||||
|     _Aggregate.PromoteFromSubspace(B,fv); | ||||
|     _smoother(_Linop,fv,fB);  | ||||
|  | ||||
|     RealD eval_poly = eval; | ||||
|     _Linop.HermOp(fB,fv); | ||||
|  | ||||
|     RealD vnum = real(innerProduct(fB,fv)); // HermOp. | ||||
|     RealD vden = norm2(fB); | ||||
|     RealD vv0  = norm2(fv); | ||||
|     eval   = vnum/vden; | ||||
|     fv -= eval*fB; | ||||
|     RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0); | ||||
|  | ||||
|     std::cout.precision(13); | ||||
|     std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] " | ||||
| 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" | ||||
| 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv | ||||
| 	     <<std::endl; | ||||
|     if ( j > nbasis ) eresid = eresid*_coarse_relax_tol; | ||||
|     if( (vv<eresid*eresid) ) return 1; | ||||
|     return 0; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Make serializable Lanczos params | ||||
| //////////////////////////////////////////// | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| class LocalCoherenceLanczos  | ||||
| { | ||||
| public: | ||||
|   typedef iVector<CComplex,nbasis >           CoarseSiteVector; | ||||
|   typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field | ||||
|   typedef Lattice<CoarseSiteVector>           CoarseField; | ||||
|   typedef Lattice<Fobj>                       FineField; | ||||
|  | ||||
| protected: | ||||
|   GridBase *_CoarseGrid; | ||||
|   GridBase *_FineGrid; | ||||
|   int _checkerboard; | ||||
|   LinearOperatorBase<FineField>                 & _FineOp; | ||||
|    | ||||
|   // FIXME replace Aggregation with vector of fine; the code reuse is too small for | ||||
|   // the hassle and complexity of cross coupling. | ||||
|   Aggregation<Fobj,CComplex,nbasis>               _Aggregate;   | ||||
|   std::vector<RealD>                              evals_fine; | ||||
|   std::vector<RealD>                              evals_coarse;  | ||||
|   std::vector<CoarseField>                        evec_coarse; | ||||
| public: | ||||
|   LocalCoherenceLanczos(GridBase *FineGrid, | ||||
| 		GridBase *CoarseGrid, | ||||
| 		LinearOperatorBase<FineField> &FineOp, | ||||
| 		int checkerboard) : | ||||
|     _CoarseGrid(CoarseGrid), | ||||
|     _FineGrid(FineGrid), | ||||
|     _Aggregate(CoarseGrid,FineGrid,checkerboard), | ||||
|     _FineOp(FineOp), | ||||
|     _checkerboard(checkerboard) | ||||
|   { | ||||
|     evals_fine.resize(0); | ||||
|     evals_coarse.resize(0); | ||||
|   }; | ||||
|   void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } | ||||
|  | ||||
|   template<typename T>  static RealD normalise(T& v)  | ||||
|   { | ||||
|     RealD nn = norm2(v); | ||||
|     nn = ::sqrt(nn); | ||||
|     v = v * (1.0/nn); | ||||
|     return nn; | ||||
|   } | ||||
|  | ||||
|   void fakeFine(void) | ||||
|   { | ||||
|     int Nk = nbasis; | ||||
|     _Aggregate.subspace.resize(Nk,_FineGrid); | ||||
|     _Aggregate.subspace[0]=1.0; | ||||
|     _Aggregate.subspace[0].checkerboard=_checkerboard; | ||||
|     normalise(_Aggregate.subspace[0]); | ||||
|     PlainHermOp<FineField>    Op(_FineOp); | ||||
|     for(int k=1;k<Nk;k++){ | ||||
|       _Aggregate.subspace[k].checkerboard=_checkerboard; | ||||
|       Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]); | ||||
|       normalise(_Aggregate.subspace[k]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void testFine(RealD resid)  | ||||
|   { | ||||
|     assert(evals_fine.size() == nbasis); | ||||
|     assert(_Aggregate.subspace.size() == nbasis); | ||||
|     PlainHermOp<FineField>    Op(_FineOp); | ||||
|     ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op); | ||||
|     for(int k=0;k<nbasis;k++){ | ||||
|       assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)  | ||||
|   { | ||||
|     assert(evals_fine.size() == nbasis); | ||||
|     assert(_Aggregate.subspace.size() == nbasis); | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     Chebyshev<FineField>                          ChebySmooth(cheby_smooth); | ||||
|     ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate); | ||||
|     ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); | ||||
|  | ||||
|     for(int k=0;k<evec_coarse.size();k++){ | ||||
|       if ( k < nbasis ) {  | ||||
| 	assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); | ||||
|       } else {  | ||||
| 	assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,  | ||||
| 		RealD MaxIt, RealD betastp, int MinRes) | ||||
|   { | ||||
|     assert(nbasis<=Nm); | ||||
|     Chebyshev<FineField>      Cheby(cheby_parms); | ||||
|     FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp); | ||||
|     PlainHermOp<FineField>    Op(_FineOp); | ||||
|  | ||||
|     evals_fine.resize(Nm); | ||||
|     _Aggregate.subspace.resize(Nm,_FineGrid); | ||||
|  | ||||
|     ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); | ||||
|  | ||||
|     FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; | ||||
|  | ||||
|     int Nconv; | ||||
|     IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); | ||||
|      | ||||
|     // Shrink down to number saved | ||||
|     assert(Nstop>=nbasis); | ||||
|     assert(Nconv>=nbasis); | ||||
|     evals_fine.resize(nbasis); | ||||
|     _Aggregate.subspace.resize(nbasis,_FineGrid); | ||||
|   } | ||||
|   void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, | ||||
| 		  int Nstop, int Nk, int Nm,RealD resid,  | ||||
| 		  RealD MaxIt, RealD betastp, int MinRes) | ||||
|   { | ||||
|     Chebyshev<FineField>                          Cheby(cheby_op); | ||||
|     ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,_Aggregate); | ||||
|     ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate); | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL | ||||
|     ////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     Chebyshev<FineField>                                           ChebySmooth(cheby_smooth); | ||||
|     ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); | ||||
|  | ||||
|     evals_coarse.resize(Nm); | ||||
|     evec_coarse.resize(Nm,_CoarseGrid); | ||||
|  | ||||
|     CoarseField src(_CoarseGrid);     src=1.0;  | ||||
|  | ||||
|     ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); | ||||
|     int Nconv=0; | ||||
|     IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); | ||||
|     assert(Nconv>=Nstop); | ||||
|     evals_coarse.resize(Nstop); | ||||
|     evec_coarse.resize (Nstop,_CoarseGrid); | ||||
|     for (int i=0;i<Nstop;i++){ | ||||
|       std::cout << i << " Coarse eval = " << evals_coarse[i]  << std::endl; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
| @@ -1,453 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/Matrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef MATRIX_H | ||||
| #define MATRIX_H | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <cmath> | ||||
| #include <vector> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <complex> | ||||
| #include <typeinfo> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
|  | ||||
| /** Sign function **/ | ||||
| template <class T> T sign(T p){return ( p/abs(p) );} | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| ///////////////////// Hijack STL containers for our wicked means ///////////////////////////////////////// | ||||
| ///////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| template<class T> using Vector = Vector<T>; | ||||
| template<class T> using Matrix = Vector<Vector<T> >; | ||||
|  | ||||
| template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); } | ||||
|  | ||||
| template<class T> void Resize(Matrix<T > & mat, int N, int M) {  | ||||
|   mat.resize(N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     mat[i].resize(M); | ||||
|   } | ||||
| } | ||||
| template<class T> void Size(Vector<T> & vec, int &N)  | ||||
| {  | ||||
|   N= vec.size(); | ||||
| } | ||||
| template<class T> void Size(Matrix<T> & mat, int &N,int &M)  | ||||
| {  | ||||
|   N= mat.size(); | ||||
|   M= mat[0].size(); | ||||
| } | ||||
| template<class T> void SizeSquare(Matrix<T> & mat, int &N)  | ||||
| {  | ||||
|   int M; Size(mat,N,M); | ||||
|   assert(N==M); | ||||
| } | ||||
| template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)  | ||||
| {  | ||||
|   int N2,M2; | ||||
|   Size(mat1,N1,M1); | ||||
|   Size(mat2,N2,M2); | ||||
|   assert(N1==N2); | ||||
|   assert(M1==M2); | ||||
| } | ||||
|  | ||||
| //***************************************** | ||||
| //*	(Complex) Vector operations	* | ||||
| //***************************************** | ||||
|  | ||||
| /**Conj of a Vector **/ | ||||
| template <class T> Vector<T> conj(Vector<T> p){ | ||||
| 	Vector<T> q(p.size()); | ||||
| 	for(int i=0;i<p.size();i++){q[i] = conj(p[i]);} | ||||
| 	return q; | ||||
| } | ||||
|  | ||||
| /** Norm of a Vector**/ | ||||
| template <class T> T norm(Vector<T> p){ | ||||
| 	T sum = 0; | ||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} | ||||
| 	return abs(sqrt(sum)); | ||||
| } | ||||
|  | ||||
| /** Norm squared of a Vector **/ | ||||
| template <class T> T norm2(Vector<T> p){ | ||||
| 	T sum = 0; | ||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} | ||||
| 	return abs((sum)); | ||||
| } | ||||
|  | ||||
| /** Sum elements of a Vector **/ | ||||
| template <class T> T trace(Vector<T> p){ | ||||
| 	T sum = 0; | ||||
| 	for(int i=0;i<p.size();i++){sum = sum + p[i];} | ||||
| 	return sum; | ||||
| } | ||||
|  | ||||
| /** Fill a Vector with constant c **/ | ||||
| template <class T> void Fill(Vector<T> &p, T c){ | ||||
| 	for(int i=0;i<p.size();i++){p[i] = c;} | ||||
| } | ||||
| /** Normalize a Vector **/ | ||||
| template <class T> void normalize(Vector<T> &p){ | ||||
| 	T m = norm(p); | ||||
| 	if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;} | ||||
| } | ||||
| /** Vector by scalar **/ | ||||
| template <class T, class U> Vector<T> times(Vector<T> p, U s){ | ||||
| 	for(int i=0;i<p.size();i++){p[i] *= s;} | ||||
| 	return p; | ||||
| } | ||||
| template <class T, class U> Vector<T> times(U s, Vector<T> p){ | ||||
| 	for(int i=0;i<p.size();i++){p[i] *= s;} | ||||
| 	return p; | ||||
| } | ||||
| /** inner product of a and b = conj(a) . b **/ | ||||
| template <class T> T inner(Vector<T> a, Vector<T> b){ | ||||
| 	T m = 0.; | ||||
| 	for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];} | ||||
| 	return m; | ||||
| } | ||||
| /** sum of a and b = a + b **/ | ||||
| template <class T> Vector<T> add(Vector<T> a, Vector<T> b){ | ||||
| 	Vector<T> m(a.size()); | ||||
| 	for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];} | ||||
| 	return m; | ||||
| } | ||||
| /** sum of a and b = a - b **/ | ||||
| template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){ | ||||
| 	Vector<T> m(a.size()); | ||||
| 	for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];} | ||||
| 	return m; | ||||
| } | ||||
|  | ||||
| /**  | ||||
|  ********************************* | ||||
|  *	Matrices	         * | ||||
|  ********************************* | ||||
|  **/ | ||||
|  | ||||
| template<class T> void Fill(Matrix<T> & mat, T&val) {  | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   for(int i=0;i<N;i++){ | ||||
|   for(int j=0;j<M;j++){ | ||||
|     mat[i][j] = val; | ||||
|   }} | ||||
| } | ||||
|  | ||||
| /** Transpose of a matrix **/ | ||||
| Matrix<T> Transpose(Matrix<T> & mat){ | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   Matrix C; Resize(C,M,N); | ||||
|   for(int i=0;i<M;i++){ | ||||
|   for(int j=0;j<N;j++){ | ||||
|     C[i][j] = mat[j][i]; | ||||
|   }}  | ||||
|   return C; | ||||
| } | ||||
| /** Set Matrix to unit matrix **/ | ||||
| template<class T> void Unity(Matrix<T> &mat){ | ||||
|   int N;  SizeSquare(mat,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     for(int j=0;j<N;j++){ | ||||
|       if ( i==j ) A[i][j] = 1; | ||||
|       else        A[i][j] = 0; | ||||
|     }  | ||||
|   }  | ||||
| } | ||||
| /** Add C * I to matrix **/ | ||||
| template<class T> | ||||
| void PlusUnit(Matrix<T> & A,T c){ | ||||
|   int dim;  SizeSquare(A,dim); | ||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  | ||||
| } | ||||
|  | ||||
| /** return the Hermitian conjugate of matrix **/ | ||||
| Matrix<T> HermitianConj(Matrix<T> &mat){ | ||||
|  | ||||
|   int dim; SizeSquare(mat,dim); | ||||
|  | ||||
|   Matrix<T> C; Resize(C,dim,dim); | ||||
|  | ||||
|   for(int i=0;i<dim;i++){ | ||||
|     for(int j=0;j<dim;j++){ | ||||
|       C[i][j] = conj(mat[j][i]); | ||||
|     }  | ||||
|   }  | ||||
|   return C; | ||||
| } | ||||
|  | ||||
| /** return diagonal entries as a Vector **/ | ||||
| Vector<T> diag(Matrix<T> &A) | ||||
| { | ||||
|   int dim; SizeSquare(A,dim); | ||||
|   Vector<T> d; Resize(d,dim); | ||||
|  | ||||
|   for(int i=0;i<dim;i++){ | ||||
|     d[i] = A[i][i]; | ||||
|   } | ||||
|   return d; | ||||
| } | ||||
|  | ||||
| /** Left multiply by a Vector **/ | ||||
| Vector<T> operator *(Vector<T> &B,Matrix<T> &A) | ||||
| { | ||||
|   int K,M,N;  | ||||
|   Size(B,K); | ||||
|   Size(A,M,N); | ||||
|   assert(K==M); | ||||
|    | ||||
|   Vector<T> C; Resize(C,N); | ||||
|  | ||||
|   for(int j=0;j<N;j++){ | ||||
|     T sum = 0.0; | ||||
|     for(int i=0;i<M;i++){ | ||||
|       sum += B[i] * A[i][j]; | ||||
|     } | ||||
|     C[j] =  sum; | ||||
|   } | ||||
|   return C;  | ||||
| } | ||||
|  | ||||
| /** return 1/diagonal entries as a Vector **/ | ||||
| Vector<T> inv_diag(Matrix<T> & A){ | ||||
|   int dim; SizeSquare(A,dim); | ||||
|   Vector<T> d; Resize(d,dim); | ||||
|   for(int i=0;i<dim;i++){ | ||||
|     d[i] = 1.0/A[i][i]; | ||||
|   } | ||||
|   return d; | ||||
| } | ||||
| /** Matrix Addition **/ | ||||
| inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B) | ||||
| { | ||||
|   int N,M  ; SizeSame(A,B,N,M); | ||||
|   Matrix C; Resize(C,N,M); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     for(int j=0;j<M;j++){ | ||||
|       C[i][j] = A[i][j] +  B[i][j]; | ||||
|     }  | ||||
|   }  | ||||
|   return C; | ||||
| }  | ||||
| /** Matrix Subtraction **/ | ||||
| inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){ | ||||
|   int N,M  ; SizeSame(A,B,N,M); | ||||
|   Matrix C; Resize(C,N,M); | ||||
|   for(int i=0;i<N;i++){ | ||||
|   for(int j=0;j<M;j++){ | ||||
|     C[i][j] = A[i][j] -  B[i][j]; | ||||
|   }} | ||||
|   return C; | ||||
| }  | ||||
|  | ||||
| /** Matrix scalar multiplication **/ | ||||
| inline Matrix<T> operator* (Matrix<T> & A,T c){ | ||||
|   int N,M; Size(A,N,M); | ||||
|   Matrix C; Resize(C,N,M); | ||||
|   for(int i=0;i<N;i++){ | ||||
|   for(int j=0;j<M;j++){ | ||||
|     C[i][j] = A[i][j]*c; | ||||
|   }}  | ||||
|   return C; | ||||
| }  | ||||
| /** Matrix Matrix multiplication **/ | ||||
| inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){ | ||||
|   int K,L,N,M; | ||||
|   Size(A,K,L); | ||||
|   Size(B,N,M); assert(L==N); | ||||
|   Matrix C; Resize(C,K,M); | ||||
|  | ||||
|   for(int i=0;i<K;i++){ | ||||
|     for(int j=0;j<M;j++){ | ||||
|       T sum = 0.0; | ||||
|       for(int k=0;k<N;k++) sum += A[i][k]*B[k][j]; | ||||
|       C[i][j] =sum; | ||||
|     } | ||||
|   } | ||||
|   return C;  | ||||
| }  | ||||
| /** Matrix Vector multiplication **/ | ||||
| inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){ | ||||
|   int M,N,K; | ||||
|   Size(A,N,M); | ||||
|   Size(B,K); assert(K==M); | ||||
|   Vector<T> C; Resize(C,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     T sum = 0.0; | ||||
|     for(int j=0;j<M;j++) sum += A[i][j]*B[j]; | ||||
|     C[i] =  sum; | ||||
|   } | ||||
|   return C;  | ||||
| }  | ||||
|  | ||||
| /** Some version of Matrix norm **/ | ||||
| /* | ||||
| inline T Norm(){ // this is not a usual L2 norm | ||||
|     T norm = 0; | ||||
|     for(int i=0;i<dim;i++){ | ||||
|       for(int j=0;j<dim;j++){ | ||||
| 	norm += abs(A[i][j]); | ||||
|     }} | ||||
|     return norm; | ||||
|   } | ||||
| */ | ||||
|  | ||||
| /** Some version of Matrix norm **/ | ||||
| template<class T> T LargestDiag(Matrix<T> &A) | ||||
| { | ||||
|   int dim ; SizeSquare(A,dim);  | ||||
|  | ||||
|   T ld = abs(A[0][0]); | ||||
|   for(int i=1;i<dim;i++){ | ||||
|     T cf = abs(A[i][i]); | ||||
|     if(abs(cf) > abs(ld) ){ld = cf;} | ||||
|   } | ||||
|   return ld; | ||||
| } | ||||
|  | ||||
| /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ | ||||
| template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small) | ||||
| { | ||||
|   int dim; SizeSquare(A,dim); | ||||
|   for(int l = dim - 1 - offset; l >= 1; l--) {             		 | ||||
|     if((U)abs(A[l][l - 1]) < (U)small) { | ||||
|       A[l][l-1]=(U)0.0; | ||||
|       return l; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ | ||||
| template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)  | ||||
| { | ||||
|   int dim; SizeSquare(A,dim); | ||||
|   for(int l = dim - 1 - offset; l >= 1; l--) { | ||||
|     if((U)abs(A[l][l - 1]) < (U)small) { | ||||
|       A[l][l - 1] = (U)0.0; | ||||
|       A[l - 1][l] = (U)0.0; | ||||
|       return l; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
| /**Assign a submatrix to a larger one**/ | ||||
| template<class T> | ||||
| void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) | ||||
| { | ||||
|   for(int i = row_st; i<row_end; i++){ | ||||
|     for(int j = col_st; j<col_end; j++){ | ||||
|       A[i][j] = S[i - row_st][j - col_st]; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /**Get a square submatrix**/ | ||||
| template <class T> | ||||
| Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end) | ||||
| { | ||||
|   Matrix<T> H; Resize(row_end - row_st,col_end-col_st); | ||||
|  | ||||
|   for(int i = row_st; i<row_end; i++){ | ||||
|   for(int j = col_st; j<col_end; j++){ | ||||
|     H[i-row_st][j-col_st]=A[i][j]; | ||||
|   }} | ||||
|   return H; | ||||
| } | ||||
|    | ||||
|  /**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/ | ||||
| template<class T> | ||||
| void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) | ||||
| { | ||||
|   for(int i = row_st; i<row_end; i++){ | ||||
|   for(int j = col_st; j<col_end; j++){ | ||||
|     A[i][j] = S[i - row_st][j - col_st]; | ||||
|   }} | ||||
| } | ||||
|    | ||||
| /** compute b_i A_ij b_j **/ // surprised no Conj | ||||
| template<class T> T proj(Matrix<T> A, Vector<T> B){ | ||||
|   int dim; SizeSquare(A,dim); | ||||
|   int dimB; Size(B,dimB); | ||||
|   assert(dimB==dim); | ||||
|   T C = 0; | ||||
|   for(int i=0;i<dim;i++){ | ||||
|     T sum = 0.0; | ||||
|     for(int j=0;j<dim;j++){ | ||||
|       sum += A[i][j]*B[j]; | ||||
|     } | ||||
|     C +=  B[i]*sum; // No conj? | ||||
|   } | ||||
|   return C;  | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  ************************************************************* | ||||
|  * | ||||
|  * Matrix Vector products | ||||
|  * | ||||
|  ************************************************************* | ||||
|  */ | ||||
| // Instead make a linop and call my CG; | ||||
|  | ||||
| /// q -> q Q | ||||
| template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q) | ||||
| { | ||||
|   int M; SizeSquare(Q,M); | ||||
|   int N; Size(q,N);  | ||||
|   assert(M==N); | ||||
|  | ||||
|   times(q,Q,N); | ||||
| } | ||||
|  | ||||
| /// q -> q Q | ||||
| template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N) | ||||
| { | ||||
|   GridBase *grid = q[0]._grid; | ||||
|   int M; SizeSquare(Q,M); | ||||
|   int K; Size(q,K);  | ||||
|   assert(N<M); | ||||
|   assert(N<K); | ||||
|   Vector<Fermion> S(N,grid ); | ||||
|   for(int j=0;j<N;j++){ | ||||
|     S[j] = zero; | ||||
|     for(int k=0;k<N;k++){ | ||||
|       S[j] = S[j] +  q[k]* Q[k][j];  | ||||
|     } | ||||
|   } | ||||
|   for(int j=0;j<q.size();j++){ | ||||
|     q[j] = S[j]; | ||||
|   } | ||||
| } | ||||
| #endif | ||||
| @@ -1,75 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/MatrixUtils.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_MATRIX_UTILS_H | ||||
| #define GRID_MATRIX_UTILS_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   namespace MatrixUtils {  | ||||
|  | ||||
|     template<class T> inline void Size(Matrix<T>& A,int &N,int &M){ | ||||
|       N=A.size(); assert(N>0); | ||||
|       M=A[0].size(); | ||||
|       for(int i=0;i<N;i++){ | ||||
| 	assert(A[i].size()==M); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     template<class T> inline void SizeSquare(Matrix<T>& A,int &N) | ||||
|     { | ||||
|       int M; | ||||
|       Size(A,N,M); | ||||
|       assert(N==M); | ||||
|     } | ||||
|  | ||||
|     template<class T> inline void Fill(Matrix<T>& A,T & val) | ||||
|     {  | ||||
|       int N,M; | ||||
|       Size(A,N,M); | ||||
|       for(int i=0;i<N;i++){ | ||||
|       for(int j=0;j<M;j++){ | ||||
| 	A[i][j]=val; | ||||
|       }} | ||||
|     } | ||||
|     template<class T> inline void Diagonal(Matrix<T>& A,T & val) | ||||
|     {  | ||||
|       int N; | ||||
|       SizeSquare(A,N); | ||||
|       for(int i=0;i<N;i++){ | ||||
| 	A[i][i]=val; | ||||
|       } | ||||
|     } | ||||
|     template<class T> inline void Identity(Matrix<T>& A) | ||||
|     { | ||||
|       Fill(A,0.0); | ||||
|       Diagonal(A,1.0); | ||||
|     } | ||||
|  | ||||
|   }; | ||||
| } | ||||
| #endif | ||||
| @@ -53,16 +53,119 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|    *     M psi = eta | ||||
|    *********************** | ||||
|    *Odd | ||||
|    * i)   (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o | ||||
|    * i)                 D_oo psi_o =  L^{-1}  eta_o | ||||
|    *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||
|    * | ||||
|    * Wilson: | ||||
|    *      (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o | ||||
|    * Stag: | ||||
|    *      D_oo psi_o = L^{-1}  eta =    (eta_o - Moe Mee^{-1} eta_e) | ||||
|    * | ||||
|    * L^-1 eta_o= (1              0 ) (e | ||||
|    *             (-MoeMee^{-1}   1 )    | ||||
|    * | ||||
|    *Even | ||||
|    * ii)  Mee psi_e + Meo psi_o = src_e | ||||
|    * | ||||
|    *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|    * | ||||
|    *  | ||||
|    * TODO: Other options: | ||||
|    *  | ||||
|    * a) change checkerboards for Schur e<->o | ||||
|    * | ||||
|    * Left precon by Moo^-1 | ||||
|    * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 =  (D_oo)^dag M_oo^-dag Moo^-1 L^{-1}  eta_o | ||||
|    *                              eta_o'     = (D_oo)^dag  M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e) | ||||
|    * | ||||
|    * Right precon by Moo^-1 | ||||
|    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o | ||||
|    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||
|    *                              psi_o = M_oo^-1 phi_o | ||||
|    * TODO: Deflation  | ||||
|    */ | ||||
| namespace Grid { | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   template<class Field> class SchurRedBlackStaggeredSolve { | ||||
|   private: | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       src_o = tmp;     assert(src_o.checkerboard ==Odd); | ||||
|       //  _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl; | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       _Matrix.M(out,resid);  | ||||
|       resid = resid-in; | ||||
|       RealD ns = norm2(in); | ||||
|       RealD nr = norm2(resid); | ||||
|  | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|     }      | ||||
|   }; | ||||
|   template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>; | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
| @@ -76,12 +179,10 @@ namespace Grid { | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|   SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0)  :  _HermitianRBSolver(HermitianRBSolver)  | ||||
|   {  | ||||
|     CBfactorise=cb; | ||||
|   }; | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
| @@ -141,5 +242,166 @@ namespace Grid { | ||||
|     }      | ||||
|   }; | ||||
|  | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagTwoSolve { | ||||
|   private: | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       // get the right MpcDag | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
|       _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       _Matrix.M(out,resid);  | ||||
|       resid = resid-in; | ||||
|       RealD ns = norm2(in); | ||||
|       RealD nr = norm2(resid); | ||||
|  | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|     }      | ||||
|   }; | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Take a matrix and form a Red Black solver calling a Herm solver | ||||
|   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagTwoMixed { | ||||
|   private: | ||||
|     LinearFunction<Field> & _HermitianRBSolver; | ||||
|     int CBfactorise; | ||||
|   public: | ||||
|  | ||||
|     ///////////////////////////////////////////////////// | ||||
|     // Wrap the usual normal equations Schur trick | ||||
|     ///////////////////////////////////////////////////// | ||||
|   SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver)  : | ||||
|      _HermitianRBSolver(HermitianRBSolver)  | ||||
|     {  | ||||
|       CBfactorise=0; | ||||
|     }; | ||||
|  | ||||
|     template<class Matrix> | ||||
|       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||
|  | ||||
|       // FIXME CGdiagonalMee not implemented virtual function | ||||
|       // FIXME use CBfactorise to control schur decomp | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|   | ||||
|       Field src_e(grid); | ||||
|       Field src_o(grid); | ||||
|       Field sol_e(grid); | ||||
|       Field sol_o(grid); | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|       Field resid(fgrid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,in); | ||||
|       pickCheckerboard(Odd ,src_o,in); | ||||
|       pickCheckerboard(Even,sol_e,out); | ||||
|       pickCheckerboard(Odd ,sol_o,out); | ||||
|      | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||
|       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||
|  | ||||
|       // get the right MpcDag | ||||
|       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||
|  | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the red-black solver | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||
| //      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       _HermitianRBSolver(src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||
|       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); | ||||
|  | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||
|       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||
|       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||
|       | ||||
|       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||
|       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       _Matrix.M(out,resid);  | ||||
|       resid = resid-in; | ||||
|       RealD ns = norm2(in); | ||||
|       RealD nr = norm2(resid); | ||||
|  | ||||
|       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||
|     }      | ||||
|   }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -1,15 +0,0 @@ | ||||
| - ConjugateGradientMultiShift | ||||
| - MCR | ||||
|  | ||||
| - Potentially Useful Boost libraries | ||||
|  | ||||
| - MultiArray | ||||
| - Aligned allocator; memory pool | ||||
| - Remez -- Mike or Boost? | ||||
| - Multiprecision | ||||
| - quaternians | ||||
| - Tokenize | ||||
| - Serialization | ||||
| - Regex | ||||
| - Proto (ET) | ||||
| - uBlas | ||||
| @@ -1,122 +0,0 @@ | ||||
| #include <math.h> | ||||
| #include <stdlib.h> | ||||
| #include <vector> | ||||
|  | ||||
| struct Bisection { | ||||
|  | ||||
| static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig) | ||||
| { | ||||
|   int i,j; | ||||
|   std::vector<RealD> evec1(row_num+3); | ||||
|   std::vector<RealD> evec2(row_num+3); | ||||
|   RealD eps2; | ||||
|   ALPHA[1]=0.; | ||||
|   BETHA[1]=0.; | ||||
|   for(i=0;i<row_num-1;i++) { | ||||
|     ALPHA[i+1] = A[i*(row_num+1)].real(); | ||||
|     BETHA[i+2] = A[i*(row_num+1)+1].real(); | ||||
|   } | ||||
|   ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real(); | ||||
|   bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2); | ||||
|   bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2); | ||||
|  | ||||
|   // Do we really need to sort here? | ||||
|   int begin=1; | ||||
|   int end = row_num; | ||||
|   int swapped=1; | ||||
|   while(swapped) { | ||||
|     swapped=0; | ||||
|     for(i=begin;i<end;i++){ | ||||
|       if(mag(evec2[i])>mag(evec2[i+1]))	{ | ||||
| 	swap(evec2+i,evec2+i+1); | ||||
| 	swapped=1; | ||||
|       } | ||||
|     } | ||||
|     end--; | ||||
|     for(i=end-1;i>=begin;i--){ | ||||
|       if(mag(evec2[i])>mag(evec2[i+1]))	{ | ||||
| 	swap(evec2+i,evec2+i+1); | ||||
| 	swapped=1; | ||||
|       } | ||||
|     } | ||||
|     begin++; | ||||
|   } | ||||
|  | ||||
|   for(i=0;i<row_num;i++){ | ||||
|     for(j=0;j<row_num;j++) { | ||||
|       if(i==j) H[i*row_num+j]=evec2[i+1]; | ||||
|       else H[i*row_num+j]=0.; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void bisec(std::vector<RealD> &c,    | ||||
| 		  std::vector<RealD> &b, | ||||
| 		  int n, | ||||
| 		  int m1, | ||||
| 		  int m2, | ||||
| 		  RealD eps1, | ||||
| 		  RealD relfeh, | ||||
| 		  std::vector<RealD> &x, | ||||
| 		  RealD &eps2) | ||||
| { | ||||
|   std::vector<RealD> wu(n+2); | ||||
|  | ||||
|   RealD h,q,x1,xu,x0,xmin,xmax;  | ||||
|   int i,a,k; | ||||
|  | ||||
|   b[1]=0.0; | ||||
|   xmin=c[n]-fabs(b[n]); | ||||
|   xmax=c[n]+fabs(b[n]); | ||||
|   for(i=1;i<n;i++){ | ||||
|     h=fabs(b[i])+fabs(b[i+1]); | ||||
|     if(c[i]+h>xmax) xmax= c[i]+h; | ||||
|     if(c[i]-h<xmin) xmin= c[i]-h; | ||||
|   } | ||||
|   xmax *=2.; | ||||
|  | ||||
|   eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin); | ||||
|   if(eps1<=0.0) eps1=eps2; | ||||
|   eps2=0.5*eps1+7.0*(eps2); | ||||
|   x0=xmax; | ||||
|   for(i=m1;i<=m2;i++){ | ||||
|     x[i]=xmax; | ||||
|     wu[i]=xmin; | ||||
|   } | ||||
|  | ||||
|   for(k=m2;k>=m1;k--){ | ||||
|     xu=xmin; | ||||
|     i=k; | ||||
|     do{ | ||||
|       if(xu<wu[i]){ | ||||
| 	xu=wu[i]; | ||||
| 	i=m1-1; | ||||
|       } | ||||
|       i--; | ||||
|     }while(i>=m1); | ||||
|     if(x0>x[k]) x0=x[k]; | ||||
|     while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){ | ||||
|       x1=(xu+x0)/2; | ||||
|  | ||||
|       a=0; | ||||
|       q=1.0; | ||||
|       for(i=1;i<=n;i++){ | ||||
| 	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh); | ||||
| 	if(q<0) a++; | ||||
|       } | ||||
|       //			printf("x1=%e a=%d\n",x1,a); | ||||
|       if(a<k){ | ||||
| 	if(a<m1){ | ||||
| 	  xu=x1; | ||||
| 	  wu[m1]=x1; | ||||
| 	}else { | ||||
| 	  xu=x1; | ||||
| 	  wu[a+1]=x1; | ||||
| 	  if(x[a]>x1) x[a]=x1; | ||||
| 	} | ||||
|       }else x0=x1; | ||||
|     } | ||||
|     x[k]=(x0+xu)/2; | ||||
|   } | ||||
| } | ||||
| } | ||||
| @@ -1 +0,0 @@ | ||||
|  | ||||
| @@ -1,7 +1,5 @@ | ||||
|  | ||||
|  | ||||
|  | ||||
| #include <Grid/GridCore.h> | ||||
| #include <fcntl.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| @@ -11,7 +9,7 @@ int PointerCache::victim; | ||||
|  | ||||
| void *PointerCache::Insert(void *ptr,size_t bytes) { | ||||
|  | ||||
|   if (bytes < 4096 ) return NULL; | ||||
|   if (bytes < 4096 ) return ptr; | ||||
|  | ||||
| #ifdef GRID_OMP | ||||
|   assert(omp_in_parallel()==0); | ||||
| @@ -63,4 +61,37 @@ void *PointerCache::Lookup(size_t bytes) { | ||||
|   return NULL; | ||||
| } | ||||
|  | ||||
|  | ||||
| void check_huge_pages(void *Buf,uint64_t BYTES) | ||||
| { | ||||
| #ifdef __linux__ | ||||
|   int fd = open("/proc/self/pagemap", O_RDONLY); | ||||
|   assert(fd >= 0); | ||||
|   const int page_size = 4096; | ||||
|   uint64_t virt_pfn = (uint64_t)Buf / page_size; | ||||
|   off_t offset = sizeof(uint64_t) * virt_pfn; | ||||
|   uint64_t npages = (BYTES + page_size-1) / page_size; | ||||
|   uint64_t pagedata[npages]; | ||||
|   uint64_t ret = lseek(fd, offset, SEEK_SET); | ||||
|   assert(ret == offset); | ||||
|   ret = ::read(fd, pagedata, sizeof(uint64_t)*npages); | ||||
|   assert(ret == sizeof(uint64_t) * npages); | ||||
|   int nhugepages = npages / 512; | ||||
|   int n4ktotal, nnothuge; | ||||
|   n4ktotal = 0; | ||||
|   nnothuge = 0; | ||||
|   for (int i = 0; i < nhugepages; ++i) { | ||||
|     uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size; | ||||
|     for (int j = 0; j < 512; ++j) { | ||||
|       uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size; | ||||
|       ++n4ktotal; | ||||
|       if (pageaddr != baseaddr + j * page_size) | ||||
| 	++nnothuge; | ||||
|       } | ||||
|   } | ||||
|   int rank = CartesianCommunicator::RankWorld(); | ||||
|   printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -64,6 +64,8 @@ namespace Grid { | ||||
|  | ||||
|   }; | ||||
|  | ||||
|   void check_huge_pages(void *Buf,uint64_t BYTES); | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////// | ||||
| // A lattice of something, but assume the something is SIMDized. | ||||
| //////////////////////////////////////////////////////////////////// | ||||
| @@ -92,18 +94,34 @@ public: | ||||
|     size_type bytes = __n*sizeof(_Tp); | ||||
|  | ||||
|     _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); | ||||
|      | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128); | ||||
| #else | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); | ||||
| #endif | ||||
|     //    if ( ptr != NULL )  | ||||
|     //      std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl; | ||||
|  | ||||
|     ////////////////// | ||||
|     // Hack 2MB align; could make option probably doesn't need configurability | ||||
|     ////////////////// | ||||
| //define GRID_ALLOC_ALIGN (128) | ||||
| #define GRID_ALLOC_ALIGN (2*1024*1024) | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); | ||||
| #else | ||||
|     if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes); | ||||
| #endif | ||||
|     //    std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl; | ||||
|     // First touch optimise in threaded loop | ||||
|     uint8_t *cp = (uint8_t *)ptr; | ||||
| #ifdef GRID_OMP | ||||
| #pragma omp parallel for | ||||
| #endif | ||||
|     for(size_type n=0;n<bytes;n+=4096){ | ||||
|       cp[n]=0; | ||||
|     } | ||||
|     return ptr; | ||||
|   } | ||||
|  | ||||
|   void deallocate(pointer __p, size_type __n) {  | ||||
|     size_type bytes = __n * sizeof(_Tp); | ||||
|  | ||||
|     pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); | ||||
|  | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
| @@ -182,10 +200,19 @@ public: | ||||
|   pointer allocate(size_type __n, const void* _p= 0)  | ||||
|   { | ||||
| #ifdef HAVE_MM_MALLOC_H | ||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); | ||||
|     _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN); | ||||
| #else | ||||
|     _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); | ||||
|     _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp)); | ||||
| #endif | ||||
|     size_type bytes = __n*sizeof(_Tp); | ||||
|     uint8_t *cp = (uint8_t *)ptr; | ||||
|     if ( ptr ) {  | ||||
|     // One touch per 4k page, static OMP loop to catch same loop order | ||||
| #pragma omp parallel for schedule(static) | ||||
|       for(size_type n=0;n<bytes;n+=4096){ | ||||
| 	cp[n]=0; | ||||
|       } | ||||
|     } | ||||
|     return ptr; | ||||
|   } | ||||
|   void deallocate(pointer __p, size_type) {  | ||||
|   | ||||
| @@ -6,8 +6,9 @@ | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
| @@ -43,12 +44,20 @@ namespace Grid{ | ||||
|   class GridBase : public CartesianCommunicator , public GridThread { | ||||
|  | ||||
| public: | ||||
|  | ||||
|     int dummy; | ||||
|     // Give Lattice access | ||||
|     template<class object> friend class Lattice; | ||||
|  | ||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||
|     GridBase(const std::vector<int> & processor_grid, | ||||
| 	     const CartesianCommunicator &parent, | ||||
| 	     int &split_rank)  | ||||
|       : CartesianCommunicator(processor_grid,parent,split_rank) {}; | ||||
|     GridBase(const std::vector<int> & processor_grid, | ||||
| 	     const CartesianCommunicator &parent)  | ||||
|       : CartesianCommunicator(processor_grid,parent,dummy) {}; | ||||
|  | ||||
|     virtual ~GridBase() = default; | ||||
|  | ||||
|     // Physics Grid information. | ||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||
| @@ -62,13 +71,12 @@ public: | ||||
|     int _isites; | ||||
|     int _fsites;                  // _isites*_osites = product(dimensions). | ||||
|     int _gsites; | ||||
|     std::vector<int> _slice_block;   // subslice information | ||||
|     std::vector<int> _slice_block;// subslice information | ||||
|     std::vector<int> _slice_stride; | ||||
|     std::vector<int> _slice_nblock; | ||||
|  | ||||
|     // Might need these at some point | ||||
|     //    std::vector<int> _lstart;     // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] | ||||
|     //    std::vector<int> _lend;       // local end of array in gcoors    _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||
|     std::vector<int> _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] | ||||
|     std::vector<int> _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||
|  | ||||
| public: | ||||
|  | ||||
| @@ -99,7 +107,7 @@ public: | ||||
|     virtual int oIndex(std::vector<int> &coor) | ||||
|     { | ||||
|         int idx=0; | ||||
| 	// Works with either global or local coordinates | ||||
|         // Works with either global or local coordinates | ||||
|         for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||
|         return idx; | ||||
|     } | ||||
| @@ -121,6 +129,12 @@ public: | ||||
|       Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); | ||||
|     } | ||||
|  | ||||
|     inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) { | ||||
|       lcoor.resize(_ndimension); | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|         lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d]; | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////////////////////////// | ||||
|     // SIMD lane addressing | ||||
|     ////////////////////////////////////////////////////////// | ||||
| @@ -128,6 +142,7 @@ public: | ||||
|     { | ||||
|       Lexicographic::CoorFromIndex(coor,lane,_simd_layout); | ||||
|     } | ||||
|  | ||||
|     inline int PermuteDim(int dimension){ | ||||
|       return _simd_layout[dimension]>1; | ||||
|     } | ||||
| @@ -145,15 +160,15 @@ public: | ||||
|       // Distance should be either 0,1,2.. | ||||
|       // | ||||
|       if ( _simd_layout[dimension] > 2 ) {  | ||||
| 	for(int d=0;d<_ndimension;d++){ | ||||
| 	  if ( d != dimension ) assert ( (_simd_layout[d]==1)  ); | ||||
| 	} | ||||
| 	permute_type = RotateBit; // How to specify distance; this is not just direction. | ||||
| 	return permute_type; | ||||
|         for(int d=0;d<_ndimension;d++){ | ||||
|           if ( d != dimension ) assert ( (_simd_layout[d]==1)  ); | ||||
|         } | ||||
|         permute_type = RotateBit; // How to specify distance; this is not just direction. | ||||
|         return permute_type; | ||||
|       } | ||||
|  | ||||
|       for(int d=_ndimension-1;d>dimension;d--){ | ||||
| 	if (_simd_layout[d]>1 ) permute_type++; | ||||
|         if (_simd_layout[d]>1 ) permute_type++; | ||||
|       } | ||||
|       return permute_type; | ||||
|     } | ||||
| @@ -168,11 +183,31 @@ public: | ||||
|     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||
|     inline int Nd    (void) const { return _ndimension;}; | ||||
|  | ||||
|     inline const std::vector<int> LocalStarts(void)             { return _lstart;    }; | ||||
|     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; | ||||
|     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; | ||||
|     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; | ||||
|     inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////// | ||||
|     // Utility to print the full decomposition details  | ||||
|     //////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     void show_decomposition(){ | ||||
|       std::cout << GridLogMessage << "\tFull Dimensions    : " << _fdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tSIMD layout        : " << _simd_layout << std::endl; | ||||
|       std::cout << GridLogMessage << "\tGlobal Dimensions  : " << _gdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tLocal Dimensions   : " << _ldimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl; | ||||
|       std::cout << GridLogMessage << "\tOuter strides      : " << _ostride << std::endl; | ||||
|       std::cout << GridLogMessage << "\tInner strides      : " << _istride << std::endl; | ||||
|       std::cout << GridLogMessage << "\tiSites             : " << _isites << std::endl; | ||||
|       std::cout << GridLogMessage << "\toSites             : " << _osites << std::endl; | ||||
|       std::cout << GridLogMessage << "\tlSites             : " << lSites() << std::endl;         | ||||
|       std::cout << GridLogMessage << "\tgSites             : " << gSites() << std::endl; | ||||
|       std::cout << GridLogMessage << "\tNd                 : " << _ndimension << std::endl;              | ||||
|     }  | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////// | ||||
|     // Global addressing | ||||
|     //////////////////////////////////////////////////////////////// | ||||
| @@ -188,8 +223,8 @@ public: | ||||
|       gidx=0; | ||||
|       int mult=1; | ||||
|       for(int mu=0;mu<_ndimension;mu++) { | ||||
| 	gidx+=mult*gcoor[mu]; | ||||
| 	mult*=_gdimensions[mu]; | ||||
|         gidx+=mult*gcoor[mu]; | ||||
|         mult*=_gdimensions[mu]; | ||||
|       } | ||||
|     } | ||||
|     void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) | ||||
| @@ -197,9 +232,9 @@ public: | ||||
|       pcoor.resize(_ndimension); | ||||
|       lcoor.resize(_ndimension); | ||||
|       for(int mu=0;mu<_ndimension;mu++){ | ||||
| 	int _fld  = _fdimensions[mu]/_processors[mu]; | ||||
| 	pcoor[mu] = gcoor[mu]/_fld; | ||||
| 	lcoor[mu] = gcoor[mu]%_fld; | ||||
|         int _fld  = _fdimensions[mu]/_processors[mu]; | ||||
|         pcoor[mu] = gcoor[mu]/_fld; | ||||
|         lcoor[mu] = gcoor[mu]%_fld; | ||||
|       } | ||||
|     } | ||||
|     void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) | ||||
| @@ -211,9 +246,9 @@ public: | ||||
|       /* | ||||
|       std::vector<int> cblcoor(lcoor); | ||||
|       for(int d=0;d<cblcoor.size();d++){ | ||||
| 	if( this->CheckerBoarded(d) ) { | ||||
| 	  cblcoor[d] = lcoor[d]/2; | ||||
| 	} | ||||
|         if( this->CheckerBoarded(d) ) { | ||||
|           cblcoor[d] = lcoor[d]/2; | ||||
|         } | ||||
|       } | ||||
|       */ | ||||
|       i_idx= iIndex(lcoor); | ||||
| @@ -239,7 +274,7 @@ public: | ||||
|     { | ||||
|       RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); | ||||
|       if(CheckerBoarded(0)){ | ||||
| 	fcoor[0] = fcoor[0]*2+cb; | ||||
|         fcoor[0] = fcoor[0]*2+cb; | ||||
|       } | ||||
|     } | ||||
|     void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) | ||||
|   | ||||
| @@ -38,7 +38,7 @@ namespace Grid{ | ||||
| class GridCartesian: public GridBase { | ||||
|  | ||||
| public: | ||||
|  | ||||
|     int dummy; | ||||
|     virtual int  CheckerBoardFromOindexTable (int Oindex) { | ||||
|       return 0; | ||||
|     } | ||||
| @@ -61,74 +61,111 @@ public: | ||||
|     virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){ | ||||
|       return shift; | ||||
|     } | ||||
|     ///////////////////////////////////////////////////////////////////////// | ||||
|     // Constructor takes a parent grid and possibly subdivides communicator. | ||||
|     ///////////////////////////////////////////////////////////////////////// | ||||
|     GridCartesian(const std::vector<int> &dimensions, | ||||
| 		  const std::vector<int> &simd_layout, | ||||
| 		  const std::vector<int> &processor_grid | ||||
| 		  ) : GridBase(processor_grid) | ||||
| 		  const std::vector<int> &processor_grid, | ||||
| 		  const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) | ||||
|     { | ||||
|         /////////////////////// | ||||
|         // Grid information | ||||
|         /////////////////////// | ||||
|         _ndimension = dimensions.size(); | ||||
|              | ||||
|         _fdimensions.resize(_ndimension); | ||||
|         _gdimensions.resize(_ndimension); | ||||
|         _ldimensions.resize(_ndimension); | ||||
|         _rdimensions.resize(_ndimension); | ||||
|         _simd_layout.resize(_ndimension); | ||||
|              | ||||
|         _ostride.resize(_ndimension); | ||||
|         _istride.resize(_ndimension); | ||||
|              | ||||
|         _fsites = _gsites = _osites = _isites = 1; | ||||
|       Init(dimensions,simd_layout,processor_grid); | ||||
|     } | ||||
|     GridCartesian(const std::vector<int> &dimensions, | ||||
| 		  const std::vector<int> &simd_layout, | ||||
| 		  const std::vector<int> &processor_grid, | ||||
| 		  const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) | ||||
|     { | ||||
|       Init(dimensions,simd_layout,processor_grid); | ||||
|     } | ||||
|     ///////////////////////////////////////////////////////////////////////// | ||||
|     // Construct from comm world | ||||
|     ///////////////////////////////////////////////////////////////////////// | ||||
|     GridCartesian(const std::vector<int> &dimensions, | ||||
| 		  const std::vector<int> &simd_layout, | ||||
| 		  const std::vector<int> &processor_grid) : GridBase(processor_grid) | ||||
|     { | ||||
|       Init(dimensions,simd_layout,processor_grid); | ||||
|     } | ||||
|  | ||||
|         for(int d=0;d<_ndimension;d++){ | ||||
| 	  _fdimensions[d] = dimensions[d]; // Global dimensions | ||||
| 	  _gdimensions[d] = _fdimensions[d]; // Global dimensions | ||||
| 	  _simd_layout[d] = simd_layout[d]; | ||||
| 	  _fsites = _fsites * _fdimensions[d]; | ||||
| 	  _gsites = _gsites * _gdimensions[d]; | ||||
|     virtual ~GridCartesian() = default; | ||||
|  | ||||
| 	  //FIXME check for exact division | ||||
|     void Init(const std::vector<int> &dimensions, | ||||
| 	      const std::vector<int> &simd_layout, | ||||
| 	      const std::vector<int> &processor_grid) | ||||
|     { | ||||
|       /////////////////////// | ||||
|       // Grid information | ||||
|       /////////////////////// | ||||
|       _ndimension = dimensions.size(); | ||||
|  | ||||
| 	  // Use a reduced simd grid | ||||
| 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions | ||||
| 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition | ||||
| 	  _osites *= _rdimensions[d]; | ||||
| 	  _isites *= _simd_layout[d]; | ||||
|                  | ||||
| 	  // Addressing support | ||||
| 	  if ( d==0 ) { | ||||
| 	    _ostride[d] = 1; | ||||
| 	    _istride[d] = 1; | ||||
| 	  } else { | ||||
| 	    _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||
| 	    _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||
| 	  } | ||||
|       _fdimensions.resize(_ndimension); | ||||
|       _gdimensions.resize(_ndimension); | ||||
|       _ldimensions.resize(_ndimension); | ||||
|       _rdimensions.resize(_ndimension); | ||||
|       _simd_layout.resize(_ndimension); | ||||
|       _lstart.resize(_ndimension); | ||||
|       _lend.resize(_ndimension); | ||||
|  | ||||
|       _ostride.resize(_ndimension); | ||||
|       _istride.resize(_ndimension); | ||||
|  | ||||
|       _fsites = _gsites = _osites = _isites = 1; | ||||
|  | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         _fdimensions[d] = dimensions[d];   // Global dimensions | ||||
|         _gdimensions[d] = _fdimensions[d]; // Global dimensions | ||||
|         _simd_layout[d] = simd_layout[d]; | ||||
|         _fsites = _fsites * _fdimensions[d]; | ||||
|         _gsites = _gsites * _gdimensions[d]; | ||||
|  | ||||
|         // Use a reduced simd grid | ||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions | ||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | ||||
|  | ||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition | ||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | ||||
|  | ||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; | ||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; | ||||
|         _osites *= _rdimensions[d]; | ||||
|         _isites *= _simd_layout[d]; | ||||
|  | ||||
|         // Addressing support | ||||
|         if (d == 0) | ||||
|         { | ||||
|           _ostride[d] = 1; | ||||
|           _istride[d] = 1; | ||||
|         } | ||||
|          | ||||
|         /////////////////////// | ||||
|         // subplane information | ||||
|         /////////////////////// | ||||
|         _slice_block.resize(_ndimension); | ||||
|         _slice_stride.resize(_ndimension); | ||||
|         _slice_nblock.resize(_ndimension); | ||||
|              | ||||
|         int block =1; | ||||
|         int nblock=1; | ||||
|         for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||
|              | ||||
|         for(int d=0;d<_ndimension;d++){ | ||||
|             nblock/=_rdimensions[d]; | ||||
|             _slice_block[d] =block; | ||||
|             _slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||
|             _slice_nblock[d]=nblock; | ||||
|             block = block*_rdimensions[d]; | ||||
|         else | ||||
|         { | ||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; | ||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       /////////////////////// | ||||
|       // subplane information | ||||
|       /////////////////////// | ||||
|       _slice_block.resize(_ndimension); | ||||
|       _slice_stride.resize(_ndimension); | ||||
|       _slice_nblock.resize(_ndimension); | ||||
|  | ||||
|       int block = 1; | ||||
|       int nblock = 1; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|         nblock *= _rdimensions[d]; | ||||
|  | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         nblock /= _rdimensions[d]; | ||||
|         _slice_block[d] = block; | ||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | ||||
|         _slice_nblock[d] = nblock; | ||||
|         block = block * _rdimensions[d]; | ||||
|       } | ||||
|     }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -112,151 +112,210 @@ public: | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors)  {}; | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Create Redblack from original grid; require full grid pointer ? | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base) | ||||
|     { | ||||
|       int dims = base->_ndimension; | ||||
|       std::vector<int> checker_dim_mask(dims,1); | ||||
|       int checker_dim = 0; | ||||
|       Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim); | ||||
|     }; | ||||
|  | ||||
|     GridRedBlackCartesian(const std::vector<int> &dimensions, | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Create redblack from original grid, with non-trivial checker dim mask | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     GridRedBlackCartesian(const GridBase *base, | ||||
| 			  const std::vector<int> &checker_dim_mask, | ||||
| 			  int checker_dim | ||||
| 			  ) :  GridBase(base->_processors,*base)  | ||||
|     { | ||||
|       Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim)  ; | ||||
|     } | ||||
|  | ||||
|     virtual ~GridRedBlackCartesian() = default; | ||||
| #if 0 | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Create redblack grid ;; deprecate these. Should not | ||||
|     // need direct creation of redblack without a full grid to base on | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     GridRedBlackCartesian(const GridBase *base, | ||||
| 			  const std::vector<int> &dimensions, | ||||
| 			  const std::vector<int> &simd_layout, | ||||
| 			  const std::vector<int> &processor_grid, | ||||
| 			  const std::vector<int> &checker_dim_mask, | ||||
| 			  int checker_dim | ||||
| 			  ) :  GridBase(processor_grid)  | ||||
| 			  ) :  GridBase(processor_grid,*base)  | ||||
|     { | ||||
|       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); | ||||
|     } | ||||
|     GridRedBlackCartesian(const std::vector<int> &dimensions, | ||||
|  | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Create redblack grid | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     GridRedBlackCartesian(const GridBase *base, | ||||
| 			  const std::vector<int> &dimensions, | ||||
| 			  const std::vector<int> &simd_layout, | ||||
| 			  const std::vector<int> &processor_grid) : GridBase(processor_grid)  | ||||
| 			  const std::vector<int> &processor_grid) : GridBase(processor_grid,*base)  | ||||
|     { | ||||
|       std::vector<int> checker_dim_mask(dimensions.size(),1); | ||||
|       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); | ||||
|       int checker_dim = 0; | ||||
|       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     void Init(const std::vector<int> &dimensions, | ||||
| 	      const std::vector<int> &simd_layout, | ||||
| 	      const std::vector<int> &processor_grid, | ||||
| 	      const std::vector<int> &checker_dim_mask, | ||||
| 	      int checker_dim) | ||||
|               const std::vector<int> &simd_layout, | ||||
|               const std::vector<int> &processor_grid, | ||||
|               const std::vector<int> &checker_dim_mask, | ||||
|               int checker_dim) | ||||
|     { | ||||
|     /////////////////////// | ||||
|     // Grid information | ||||
|     /////////////////////// | ||||
|       /////////////////////// | ||||
|       // Grid information | ||||
|       /////////////////////// | ||||
|       _checker_dim = checker_dim; | ||||
|       assert(checker_dim_mask[checker_dim]==1); | ||||
|       assert(checker_dim_mask[checker_dim] == 1); | ||||
|       _ndimension = dimensions.size(); | ||||
|       assert(checker_dim_mask.size()==_ndimension); | ||||
|       assert(processor_grid.size()==_ndimension); | ||||
|       assert(simd_layout.size()==_ndimension); | ||||
|        | ||||
|       assert(checker_dim_mask.size() == _ndimension); | ||||
|       assert(processor_grid.size() == _ndimension); | ||||
|       assert(simd_layout.size() == _ndimension); | ||||
|  | ||||
|       _fdimensions.resize(_ndimension); | ||||
|       _gdimensions.resize(_ndimension); | ||||
|       _ldimensions.resize(_ndimension); | ||||
|       _rdimensions.resize(_ndimension); | ||||
|       _simd_layout.resize(_ndimension); | ||||
|        | ||||
|       _lstart.resize(_ndimension); | ||||
|       _lend.resize(_ndimension); | ||||
|  | ||||
|       _ostride.resize(_ndimension); | ||||
|       _istride.resize(_ndimension); | ||||
|        | ||||
|  | ||||
|       _fsites = _gsites = _osites = _isites = 1; | ||||
| 	 | ||||
|       _checker_dim_mask=checker_dim_mask; | ||||
|  | ||||
|       for(int d=0;d<_ndimension;d++){ | ||||
| 	_fdimensions[d] = dimensions[d]; | ||||
| 	_gdimensions[d] = _fdimensions[d]; | ||||
| 	_fsites = _fsites * _fdimensions[d]; | ||||
| 	_gsites = _gsites * _gdimensions[d]; | ||||
|          | ||||
| 	if (d==_checker_dim) { | ||||
| 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard | ||||
| 	} | ||||
| 	_ldimensions[d] = _gdimensions[d]/_processors[d]; | ||||
|       _checker_dim_mask = checker_dim_mask; | ||||
|  | ||||
| 	// Use a reduced simd grid | ||||
| 	_simd_layout[d] = simd_layout[d]; | ||||
| 	_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; | ||||
| 	assert(_rdimensions[d]>0); | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         _fdimensions[d] = dimensions[d]; | ||||
|         _gdimensions[d] = _fdimensions[d]; | ||||
|         _fsites = _fsites * _fdimensions[d]; | ||||
|         _gsites = _gsites * _gdimensions[d]; | ||||
|  | ||||
| 	// all elements of a simd vector must have same checkerboard. | ||||
| 	// If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||
| 	if ( _simd_layout[d]>1 ) { | ||||
| 	  if ( checker_dim_mask[d] ) {  | ||||
| 	    assert( (_rdimensions[d]&0x1) == 0 ); | ||||
| 	  } | ||||
| 	} | ||||
|         if (d == _checker_dim) | ||||
|         { | ||||
|           assert((_gdimensions[d] & 0x1) == 0); | ||||
|           _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard | ||||
| 	  _gsites /= 2; | ||||
|         } | ||||
|         _ldimensions[d] = _gdimensions[d] / _processors[d]; | ||||
|         assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); | ||||
|         _lstart[d] = _processor_coor[d] * _ldimensions[d]; | ||||
|         _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1; | ||||
|  | ||||
| 	_osites *= _rdimensions[d]; | ||||
| 	_isites *= _simd_layout[d]; | ||||
|          | ||||
| 	// Addressing support | ||||
| 	if ( d==0 ) { | ||||
| 	  _ostride[d] = 1; | ||||
| 	  _istride[d] = 1; | ||||
| 	} else { | ||||
| 	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; | ||||
| 	  _istride[d] = _istride[d-1]*_simd_layout[d-1]; | ||||
| 	} | ||||
|         // Use a reduced simd grid | ||||
|         _simd_layout[d] = simd_layout[d]; | ||||
|         _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer | ||||
|         assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]); | ||||
|         assert(_rdimensions[d] > 0); | ||||
|  | ||||
|         // all elements of a simd vector must have same checkerboard. | ||||
|         // If Ls vectorised, this must still be the case; e.g. dwf rb5d | ||||
|         if (_simd_layout[d] > 1) | ||||
|         { | ||||
|           if (checker_dim_mask[d]) | ||||
|           { | ||||
|             assert((_rdimensions[d] & 0x1) == 0); | ||||
|           } | ||||
|         } | ||||
|  | ||||
|         _osites *= _rdimensions[d]; | ||||
|         _isites *= _simd_layout[d]; | ||||
|  | ||||
|         // Addressing support | ||||
|         if (d == 0) | ||||
|         { | ||||
|           _ostride[d] = 1; | ||||
|           _istride[d] = 1; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; | ||||
|           _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; | ||||
|         } | ||||
|       } | ||||
|              | ||||
|  | ||||
|       //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|       // subplane information | ||||
|       //////////////////////////////////////////////////////////////////////////////////////////// | ||||
|       _slice_block.resize(_ndimension); | ||||
|       _slice_stride.resize(_ndimension); | ||||
|       _slice_nblock.resize(_ndimension); | ||||
|          | ||||
|       int block =1; | ||||
|       int nblock=1; | ||||
|       for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d]; | ||||
|        | ||||
|       for(int d=0;d<_ndimension;d++){ | ||||
| 	nblock/=_rdimensions[d]; | ||||
| 	_slice_block[d] =block; | ||||
| 	_slice_stride[d]=_ostride[d]*_rdimensions[d]; | ||||
| 	_slice_nblock[d]=nblock; | ||||
| 	block = block*_rdimensions[d]; | ||||
|  | ||||
|       int block = 1; | ||||
|       int nblock = 1; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|         nblock *= _rdimensions[d]; | ||||
|  | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         nblock /= _rdimensions[d]; | ||||
|         _slice_block[d] = block; | ||||
|         _slice_stride[d] = _ostride[d] * _rdimensions[d]; | ||||
|         _slice_nblock[d] = nblock; | ||||
|         block = block * _rdimensions[d]; | ||||
|       } | ||||
|  | ||||
|       //////////////////////////////////////////////// | ||||
|       // Create a checkerboard lookup table | ||||
|       //////////////////////////////////////////////// | ||||
|       int rvol = 1; | ||||
|       for(int d=0;d<_ndimension;d++){ | ||||
| 	rvol=rvol * _rdimensions[d]; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         rvol = rvol * _rdimensions[d]; | ||||
|       } | ||||
|       _checker_board.resize(rvol); | ||||
|       for(int osite=0;osite<_osites;osite++){ | ||||
| 	_checker_board[osite] = CheckerBoardFromOindex (osite); | ||||
|       for (int osite = 0; osite < _osites; osite++) | ||||
|       { | ||||
|         _checker_board[osite] = CheckerBoardFromOindex(osite); | ||||
|       } | ||||
|        | ||||
|     }; | ||||
| protected: | ||||
|  | ||||
|   protected: | ||||
|     virtual int oIndex(std::vector<int> &coor) | ||||
|     { | ||||
|       int idx=0; | ||||
|       for(int d=0;d<_ndimension;d++) { | ||||
| 	if( d==_checker_dim ) { | ||||
| 	  idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]); | ||||
| 	} else { | ||||
| 	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]); | ||||
| 	} | ||||
|       int idx = 0; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         if (d == _checker_dim) | ||||
|         { | ||||
|           idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           idx += _ostride[d] * (coor[d] % _rdimensions[d]); | ||||
|         } | ||||
|       } | ||||
|       return idx; | ||||
|     }; | ||||
|          | ||||
|  | ||||
|     virtual int iIndex(std::vector<int> &lcoor) | ||||
|     { | ||||
|         int idx=0; | ||||
|         for(int d=0;d<_ndimension;d++) { | ||||
| 	  if( d==_checker_dim ) { | ||||
| 	    idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); | ||||
| 	  } else {  | ||||
| 	    idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); | ||||
| 	  } | ||||
| 	} | ||||
|         return idx; | ||||
|       int idx = 0; | ||||
|       for (int d = 0; d < _ndimension; d++) | ||||
|       { | ||||
|         if (d == _checker_dim) | ||||
|         { | ||||
|           idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           idx += _istride[d] * (lcoor[d] / _rdimensions[d]); | ||||
|         } | ||||
|       } | ||||
|       return idx; | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user