mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Compare commits
	
		
			484 Commits
		
	
	
		
			ckelly-dec
			...
			feature/mp
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					b820076b91 | ||
| 
						 | 
					09f66100d3 | ||
| 
						 | 
					d7d92af09d | ||
| 
						 | 
					460d0753a1 | ||
| 
						 | 
					8f8058f8a5 | ||
| 
						 | 
					d97a27f483 | ||
| 
						 | 
					7c3363b91e | ||
| 
						 | 
					b94478fa51 | ||
| 13bf0482e3 | |||
| a795b5705e | |||
| 392e064513 | |||
| 
						 | 
					b6a65059a2 | ||
| 
						 | 
					ea25a4d9ac | ||
| 
						 | 
					c190221fd3 | ||
| 
						 | 
					0fcd2e7188 | ||
| 
						 | 
					910b8dd6a1 | ||
| 
						 | 
					75ebd3a0d1 | ||
| 
						 | 
					09fd5c43a7 | ||
| 
						 | 
					f22317748f | ||
| 
						 | 
					6a9eae6b6b | ||
| 
						 | 
					fad96cf250 | ||
| 
						 | 
					f331809c27 | ||
| 
						 | 
					2c54a53d0a | ||
| 
						 | 
					306160ad9a | ||
| 
						 | 
					20a091c3ed | ||
| 
						 | 
					202078eb1b | ||
| 
						 | 
					a762b1fb71 | ||
| 
						 | 
					5b5925b8e5 | ||
| 
						 | 
					b58adc6a4b | ||
| 
						 | 
					f9d5e95d72 | ||
| 
						 | 
					4f8e636a43 | ||
| 
						 | 
					9b39f35ae6 | ||
| 
						 | 
					5fe2b85cbd | ||
| 
						 | 
					c7cccaaa69 | ||
| 
						 | 
					cbcfea466f | ||
| 
						 | 
					4955672fc3 | ||
| 
						 | 
					39f1c880b8 | ||
| 
						 | 
					8c043da5b7 | ||
| 
						 | 
					3cbe974eb4 | ||
| 
						 | 
					7af9b87318 | ||
| 
						 | 
					811ca45473 | ||
| 
						 | 
					bc1a4d40ba | ||
| 
						 | 
					c8079e6621 | ||
| 
						 | 
					8b0d171c9a | ||
| 
						 | 
					1f293b76b4 | ||
| 
						 | 
					8bbd9ebc27 | ||
| 
						 | 
					6472b431f0 | ||
| 
						 | 
					bd205a3293 | ||
| 
						 | 
					496beffa88 | ||
| 
						 | 
					9b63e97108 | ||
| 
						 | 
					81f2aeaece | ||
| 
						 | 
					2d4a45c758 | ||
| 
						 | 
					0f182f033b | ||
| 
						 | 
					7240d73184 | ||
| 
						 | 
					42cd148f5e | ||
| 
						 | 
					611b5d74ba | ||
| 
						 | 
					b56c9ffa52 | ||
| 70c32fa49b | |||
| 77c8a94dae | |||
| 
						 | 
					2e453dfbf5 | ||
| 
						 | 
					4089984431 | ||
| 98439847cf | |||
| 
						 | 
					c78bbd0f8c | ||
| 7ea4b959a4 | |||
| 536e2ff073 | |||
| 798ff34d7e | |||
| 
						 | 
					04a437c92c | ||
| 
						 | 
					5c190a1b8c | ||
| 
						 | 
					15d8f5c88c | ||
| 
						 | 
					c4ac6e7e8f | ||
| 
						 | 
					510e340e16 | ||
| 
						 | 
					6ffadca153 | ||
| 
						 | 
					b6597b74e7 | ||
| d2573189d8 | |||
| 65ca174dbb | |||
| 
						 | 
					0724f7af75 | ||
| 2e74520821 | |||
| 
						 | 
					6dd75ad9e5 | ||
| 
						 | 
					fda408ee6f | ||
| 
						 | 
					b9c80318a2 | ||
| 
						 | 
					5df5d52d41 | ||
| 
						 | 
					f76f281e58 | ||
| 
						 | 
					aa20cc8b52 | ||
| 
						 | 
					0fd179fb33 | ||
| 
						 | 
					f45ef8d114 | ||
| 
						 | 
					fd5614738d | ||
| 
						 | 
					005dcc51aa | ||
| 
						 | 
					655c893f86 | ||
| 
						 | 
					843f5783b4 | ||
| 
						 | 
					8986c9fedd | ||
| 
						 | 
					c80a1d427c | ||
| 
						 | 
					ae57032500 | ||
| 
						 | 
					f75468728f | ||
| 
						 | 
					5acd856663 | ||
| 
						 | 
					b0d3e4bb2c | ||
| 
						 | 
					b512ccbee6 | ||
| 
						 | 
					8c89391c02 | ||
| 
						 | 
					bfac5195b8 | ||
| 
						 | 
					a782ca3238 | ||
| 
						 | 
					744691097f | ||
| 
						 | 
					ff6da364e8 | ||
| 4d11a6f5f2 | |||
| 
						 | 
					88be3b39bb | ||
| 
						 | 
					8a02824e08 | ||
| 
						 | 
					356e7940fd | ||
| 
						 | 
					73ce476890 | ||
| 
						 | 
					29c4ef41de | ||
| 
						 | 
					e423a09974 | ||
| 
						 | 
					17097a93ec | ||
| 
						 | 
					94a6373a7f | ||
| 
						 | 
					4ab7dbfd57 | ||
| 
						 | 
					90e70790f3 | ||
| 
						 | 
					9c2e8d5e28 | ||
| 
						 | 
					147e2025b9 | ||
| 573b8c6020 | |||
| 15218ec57f | |||
| ec68e08dd2 | |||
| 
						 | 
					fc25d2295c | ||
| 
						 | 
					8dc2cfcedb | ||
| 836f93780c | |||
| 
						 | 
					5a68715be3 | ||
| 
						 | 
					32bc7a6ab8 | ||
| b65e72e521 | |||
| d1aaff65e8 | |||
| 93d29bb699 | |||
| 3b376ed54e | |||
| d5c1f614ba | |||
| 2edc24225d | |||
| 629283726b | |||
| 6adb66dd08 | |||
| 5be92bb708 | |||
| f4c049ea6d | |||
| bc092ad30f | |||
| dad642ed1b | |||
| 63ae39abc7 | |||
| 9e5b934d21 | |||
| a7b483d67a | |||
| bb99ce0680 | |||
| 83307df1af | |||
| 
						 | 
					49b5c49851 | ||
| e9f30cab2c | |||
| 
						 | 
					089f0ab582 | ||
| 
						 | 
					df6c9f55d1 | ||
| 
						 | 
					b93e18ed50 | ||
| 
						 | 
					9c77bb69a5 | ||
| 
						 | 
					27f3ecc833 | ||
| 
						 | 
					f9e90eeb1f | ||
| 
						 | 
					fad5c675eb | ||
| 
						 | 
					4908b77d46 | ||
| 
						 | 
					f4dd5062d7 | ||
| 
						 | 
					da34d75841 | ||
| 
						 | 
					980ff18956 | ||
| 
						 | 
					7edf4c6c04 | ||
| 
						 | 
					1a6c7204ac | ||
| 
						 | 
					49310fbab3 | ||
| 
						 | 
					6049d5ac47 | ||
| 
						 | 
					35d0d35238 | ||
| 
						 | 
					c0e878705e | ||
| 
						 | 
					5c0c8efb9e | ||
| 
						 | 
					dfd714e1ef | ||
| 
						 | 
					79a8ca1a62 | ||
| 
						 | 
					fb45eb2eb2 | ||
| 
						 | 
					a307274c96 | ||
| 
						 | 
					3f2c44a5fe | ||
| 
						 | 
					48fb1cdc11 | ||
| 
						 | 
					8a79e93cc2 | ||
| 
						 | 
					3493b51879 | ||
| 
						 | 
					de3e79d300 | ||
| 
						 | 
					dd62a61c5c | ||
| 
						 | 
					8f47d0b5ab | ||
| 
						 | 
					42af132dab | ||
| 
						 | 
					9db2c6525d | ||
| 
						 | 
					adbc7c1188 | ||
| 
						 | 
					9dc345e8e8 | ||
| 
						 | 
					8b9301a74c | ||
| 
						 | 
					6f47fbb1e2 | ||
| 
						 | 
					a9ae30f868 | ||
| 
						 | 
					a3c0fb79b6 | ||
| 
						 | 
					62601bb649 | ||
| 
						 | 
					ef97e32152 | ||
| 
						 | 
					daea5297ee | ||
| 
						 | 
					5028969d4b | ||
| 
						 | 
					c667d9fdcc | ||
| 
						 | 
					7dbb94bab2 | ||
| 
						 | 
					236dcc820b | ||
| 
						 | 
					a42a441a6a | ||
| 
						 | 
					a0676beeb1 | ||
| 
						 | 
					c5106d0c03 | ||
| 
						 | 
					fbf96b1bbb | ||
| 
						 | 
					3c49ddfaa4 | ||
| 
						 | 
					ffb8b3116c | ||
| 
						 | 
					290493e162 | ||
| 
						 | 
					dd8cfff111 | ||
| 
						 | 
					184642adb0 | ||
| 
						 | 
					4774a3bcd2 | ||
| 
						 | 
					25fafa9a89 | ||
| 
						 | 
					713520d3d2 | ||
| 
						 | 
					85ed8175cb | ||
| 
						 | 
					df5c788ef2 | ||
| 
						 | 
					15f22425c8 | ||
| 
						 | 
					e87182cf98 | ||
| 
						 | 
					e3d5319470 | ||
| 
						 | 
					ffedeb1c58 | ||
| 
						 | 
					3e3b367aa9 | ||
| 
						 | 
					3e80947c2b | ||
| 
						 | 
					fdfbf11c6d | ||
| 
						 | 
					9cb90f714e | ||
| 
						 | 
					6ce174cd60 | ||
| 
						 | 
					17ca5240f7 | ||
| 
						 | 
					2daffdf95d | ||
| 
						 | 
					149f826601 | ||
| 
						 | 
					cd8ee27080 | ||
| 
						 | 
					0fa66e8f3c | ||
| 
						 | 
					8dd099267d | ||
| 
						 | 
					1a6d65c6a4 | ||
| 
						 | 
					fc4a043663 | ||
| 
						 | 
					61ba50665e | ||
| 
						 | 
					bfe14000a9 | ||
| 
						 | 
					092fa0d8da | ||
| 
						 | 
					1ceff48133 | ||
| 
						 | 
					680645f849 | ||
| 
						 | 
					3fc6e03ad1 | ||
| 
						 | 
					2d6614f3a1 | ||
| 
						 | 
					4e041b5103 | ||
| 
						 | 
					712b9a3489 | ||
| 
						 | 
					bdaa5b1767 | ||
| 
						 | 
					8fcefc021a | ||
| 
						 | 
					1445189361 | ||
| 
						 | 
					05c884a62a | ||
| 
						 | 
					a25bec87d9 | ||
| 
						 | 
					2d8bb4c594 | ||
| 
						 | 
					51cb2d4328 | ||
| 
						 | 
					6d58cb2a68 | ||
| 
						 | 
					c8b35d960c | ||
| 
						 | 
					532f41dd61 | ||
| 
						 | 
					661b0ab45d | ||
| 
						 | 
					565e9329ba | ||
| 
						 | 
					4bc08ed995 | ||
| 
						 | 
					b2933a0557 | ||
| 
						 | 
					db057cc276 | ||
| 
						 | 
					22e88eaf54 | ||
| 
						 | 
					09fe3caebd | ||
| 
						 | 
					5e02392f9c | ||
| 
						 | 
					17a8f51a9b | ||
| 
						 | 
					1b7f88dd00 | ||
| d6737e4bd8 | |||
| d539888e57 | |||
| 
						 | 
					86187d7cca | ||
| 
						 | 
					87418e7df1 | ||
| 
						 | 
					55f65b81b5 | ||
| 
						 | 
					d9408893b3 | ||
| 
						 | 
					05acc22920 | ||
| 
						 | 
					8ac021de73 | ||
| 
						 | 
					e503ef5590 | ||
| 
						 | 
					a7682b0060 | ||
| 
						 | 
					d4c9d71fc8 | ||
| 
						 | 
					786ca52c43 | ||
| 
						 | 
					048ac04abc | ||
| 
						 | 
					f78d89bcbe | ||
| 
						 | 
					53d06046b0 | ||
| 
						 | 
					5d3a1a025d | ||
| 
						 | 
					139cc5f1ae | ||
| 1c0e922585 | |||
| 9d5f693cbe | |||
| 
						 | 
					5c90c3b457 | ||
| 91e04056f9 | |||
| 3789e3f31c | |||
| 0c66719210 | |||
| 
						 | 
					3a5b5c8bec | ||
| 
						 | 
					fdbe071213 | ||
| 4bc21ec7cb | |||
| e3083b6dfc | |||
| 
						 | 
					ab89418658 | ||
| 
						 | 
					28cd99882c | ||
| 
						 | 
					aceaee774c | ||
| 
						 | 
					f8f9fd6f22 | ||
| 101aa769eb | |||
| 0bf99bfde5 | |||
| 64bf6fe54e | |||
| 1161d566b9 | |||
| c698b16d75 | |||
| c4c89336fe | |||
| fa59789580 | |||
| 92c2c7d3b5 | |||
| e99ce0875f | |||
| cc1d9eb05b | |||
| 57c027fea2 | |||
| 207dc439a7 | |||
| 77ef0bba48 | |||
| 
						 | 
					999b3a2e26 | ||
| 7ee577eee6 | |||
| d27ceb75dd | |||
| 65c2b794b5 | |||
| de82b08f70 | |||
| 1d03f515b9 | |||
| 1c4c287925 | |||
| 10bbfdc3b2 | |||
| e15f0b47c1 | |||
| 0fd0661be3 | |||
| 6628806142 | |||
| 17198a4abd | |||
| 
						 | 
					465e6f01b7 | ||
| 
						 | 
					0eec752216 | ||
| 
						 | 
					122195384e | ||
| 
						 | 
					2ae1c14c03 | ||
| 
						 | 
					0ddb7e707b | ||
| 
						 | 
					e2d8f67f63 | ||
| 
						 | 
					0d99f62027 | ||
| 
						 | 
					c23375cd65 | ||
| 
						 | 
					a762a0d9ff | ||
| 
						 | 
					f7ca6ca889 | ||
| 
						 | 
					ec4a9b7f6c | ||
| 
						 | 
					5341977948 | ||
| 
						 | 
					f0aed4672e | ||
| 344d251fc4 | |||
| f6c53e5039 | |||
| ba09cbae3e | |||
| 6aa000176f | |||
| 23b6172c31 | |||
| ca5eebe10c | |||
| 3f128443ab | |||
| 
						 | 
					1e554350ac | ||
| 
						 | 
					c79ea0dcef | ||
| 
						 | 
					e3f141f82f | ||
| 
						 | 
					a6dfa2386b | ||
| 
						 | 
					d9b5e66877 | ||
| 
						 | 
					8fd8bc25e9 | ||
| 
						 | 
					ba427abde9 | ||
| 
						 | 
					9b6ab6db16 | ||
| 
						 | 
					806a83d38b | ||
| 
						 | 
					7223753355 | ||
| 
						 | 
					b27bac4669 | ||
| 
						 | 
					c8a93d6a93 | ||
| 
						 | 
					04072a5e1f | ||
| 
						 | 
					574ea4f843 | ||
| 
						 | 
					f2ae9682ff | ||
| 
						 | 
					587f80cd93 | ||
| 
						 | 
					528eb773ad | ||
| 
						 | 
					e5657510b0 | ||
| 
						 | 
					f473919526 | ||
| 
						 | 
					8f1b0afc2a | ||
| 
						 | 
					1494b0f397 | ||
| 
						 | 
					ab56ccdd25 | ||
| cf2f69812b | |||
| 
						 | 
					339be37dba | ||
| 
						 | 
					c323425496 | ||
| 
						 | 
					a87b744621 | ||
| 
						 | 
					a646260e82 | ||
| 
						 | 
					af9c8d1372 | ||
| 
						 | 
					650e02b344 | ||
| 
						 | 
					a524ca2a4b | ||
| 
						 | 
					23a7176b71 | ||
| 
						 | 
					b1192a8908 | ||
| 
						 | 
					e8dddb1596 | ||
| 97d0d56bcb | |||
| 
						 | 
					c7ba47bdc7 | ||
| 7c7ea35ffb | |||
| 4b1cf580e0 | |||
| 
						 | 
					e67fc2be18 | ||
| 
						 | 
					f473ef7591 | ||
| 
						 | 
					f7b1060aed | ||
| 
						 | 
					8052556275 | ||
| 
						 | 
					60d965f79e | ||
| 
						 | 
					83b15bfcdd | ||
| 
						 | 
					1ecbf9794d | ||
| 
						 | 
					2ded354403 | ||
| 
						 | 
					340428a1fe | ||
| 
						 | 
					c77b7ee897 | ||
| 
						 | 
					b6c3bc574b | ||
| 
						 | 
					1e355a51e1 | ||
| 
						 | 
					ad80f61fba | ||
| 
						 | 
					61469252fe | ||
| 
						 | 
					02198ac5b5 | ||
| 
						 | 
					21abaf7e91 | ||
| 
						 | 
					165bffc2e7 | ||
| 
						 | 
					644fd6d32e | ||
| 
						 | 
					f54e0ec9bd | ||
| 
						 | 
					a155a362da | ||
| 
						 | 
					60d4564151 | ||
| 
						 | 
					d4e57f4bc6 | ||
| 
						 | 
					3920b2c0ab | ||
| 
						 | 
					2733c4b93c | ||
| 
						 | 
					e17c773a0b | ||
| 
						 | 
					36a800f26c | ||
| 
						 | 
					b75da563d9 | ||
| 
						 | 
					f9faec38be | ||
| 
						 | 
					d6b64f47d9 | ||
| 
						 | 
					a359f7a9f5 | ||
| 
						 | 
					b606deb3f0 | ||
| 
						 | 
					090e7aa930 | ||
| 
						 | 
					2dce9c3cff | ||
| 
						 | 
					1e72bd8b8c | ||
| 
						 | 
					dc72293398 | ||
| 
						 | 
					e55c35734b | ||
| 
						 | 
					325e745daa | ||
| 
						 | 
					61413565d0 | ||
| 
						 | 
					ff129d9ad9 | ||
| 
						 | 
					03fcd3b33a | ||
| 
						 | 
					68b02da483 | ||
| 
						 | 
					e051119769 | ||
| 2d8bb356e3 | |||
| 
						 | 
					f3661aac4f | ||
| a7251f28c7 | |||
| 1eb169ac0b | |||
| 5674c3e241 | |||
| 62c4ba0d1e | |||
| 
						 | 
					497e7e4c53 | ||
| 19526d09c2 | |||
| 
						 | 
					6aeaf6f568 | ||
| 
						 | 
					40f2db9bc0 | ||
| 
						 | 
					2cfa20cc4e | ||
| 
						 | 
					a5f683d124 | ||
| 
						 | 
					02a57ffa6f | ||
| 
						 | 
					9f0d9ade68 | ||
| 
						 | 
					c1b1b89d17 | ||
| 
						 | 
					771235017d | ||
| 
						 | 
					3425751cb8 | ||
| 
						 | 
					db5e8050a8 | ||
| 
						 | 
					a3fbabf404 | ||
| 
						 | 
					22422a84d9 | ||
| 
						 | 
					b6f6da923e | ||
| 
						 | 
					c9fadf97a5 | ||
| 
						 | 
					c650bb3f3d | ||
| 
						 | 
					81395e85d1 | ||
| 
						 | 
					340a29b735 | ||
| 
						 | 
					f7be108e35 | ||
| 
						 | 
					a0fc47c6f9 | ||
| 
						 | 
					42a9ac71d2 | ||
| 
						 | 
					41c2b09184 | ||
| 
						 | 
					294dbf1bf0 | ||
| 
						 | 
					9548c8b91f | ||
| 
						 | 
					7f927a541c | ||
| 
						 | 
					e2f73e3ead | ||
| 
						 | 
					6371676a75 | ||
| 
						 | 
					bd84c23298 | ||
| 
						 | 
					7aa8d5e8af | ||
| 
						 | 
					6012b0ec23 | ||
| 
						 | 
					411ac49dd7 | ||
| 
						 | 
					b8fb05a422 | ||
| 
						 | 
					5c57d4f403 | ||
| 
						 | 
					fc6ad65751 | ||
| 
						 | 
					dafc74020c | ||
| 
						 | 
					d19321dfde | ||
| 
						 | 
					5924e5a562 | ||
| 
						 | 
					c99d748da6 | ||
| 
						 | 
					02452afd36 | ||
| 
						 | 
					331768dcff | ||
| 
						 | 
					4aac345bea | ||
| 
						 | 
					15c0022042 | ||
| 
						 | 
					aae8bf31a7 | ||
| 
						 | 
					1e68b1c1bd | ||
| 
						 | 
					491a708225 | ||
| 
						 | 
					5a80930dd2 | ||
| 
						 | 
					145a295231 | ||
| 
						 | 
					841a37f941 | ||
| 
						 | 
					e6cad3821c | ||
| 
						 | 
					98de1cbb6a | ||
| 
						 | 
					f7d61b8b81 | ||
| 
						 | 
					78c4e862ef | ||
| 1e0be161e5 | |||
| 
						 | 
					0afcf1cf13 | ||
| 
						 | 
					08edbb5cbe | ||
| 
						 | 
					0abfbcc8eb | ||
| 
						 | 
					1b94253ba4 | ||
| 
						 | 
					36e6f9ac7b | ||
| 
						 | 
					2f41691c11 | ||
| 
						 | 
					09bfe52840 | ||
| 
						 | 
					8c9010d0f4 | ||
| 
						 | 
					42c583265c | ||
| 
						 | 
					539d698492 | ||
| 
						 | 
					31ca609d12 | ||
| 
						 | 
					5710966324 | ||
| 
						 | 
					e108e708a3 | ||
| 
						 | 
					6f0198d4d9 | ||
| 
						 | 
					67ccb043f1 | ||
| 
						 | 
					24a5a81c53 | ||
| 
						 | 
					eb1759d7ea | ||
| 
						 | 
					34a0fde2ad | ||
| 
						 | 
					bc34b7e808 | ||
| 
						 | 
					284453c5e9 | ||
| 
						 | 
					77054bd61c | ||
| 
						 | 
					f2b4edc090 | ||
| 
						 | 
					fb81acca3c | ||
| 
						 | 
					ee9ecb6115 | 
							
								
								
									
										32
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										32
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -5,7 +5,6 @@
 | 
			
		||||
*.o
 | 
			
		||||
*.obj
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Editor files #
 | 
			
		||||
################
 | 
			
		||||
*~
 | 
			
		||||
@@ -48,6 +47,7 @@ Config.h.in
 | 
			
		||||
config.log
 | 
			
		||||
config.status
 | 
			
		||||
.deps
 | 
			
		||||
*.inc
 | 
			
		||||
 | 
			
		||||
# http://www.gnu.org/software/autoconf #
 | 
			
		||||
########################################
 | 
			
		||||
@@ -62,19 +62,8 @@ stamp-h1
 | 
			
		||||
config.sub
 | 
			
		||||
config.guess
 | 
			
		||||
INSTALL
 | 
			
		||||
 | 
			
		||||
# Packages #
 | 
			
		||||
############
 | 
			
		||||
# it's better to unpack these files and commit the raw source
 | 
			
		||||
# git has its own built in compression methods
 | 
			
		||||
*.7z
 | 
			
		||||
*.dmg
 | 
			
		||||
*.gz
 | 
			
		||||
*.iso
 | 
			
		||||
*.jar
 | 
			
		||||
*.rar
 | 
			
		||||
*.tar
 | 
			
		||||
*.zip
 | 
			
		||||
.dirstamp
 | 
			
		||||
ltmain.sh
 | 
			
		||||
 
 | 
			
		||||
# Logs and databases #
 | 
			
		||||
######################
 | 
			
		||||
@@ -94,9 +83,22 @@ Thumbs.db
 | 
			
		||||
 | 
			
		||||
# build directory #
 | 
			
		||||
###################
 | 
			
		||||
build/*
 | 
			
		||||
build*/*
 | 
			
		||||
 | 
			
		||||
# IDE related files #
 | 
			
		||||
#####################
 | 
			
		||||
*.xcodeproj/*
 | 
			
		||||
build.sh
 | 
			
		||||
 | 
			
		||||
# Eigen source #
 | 
			
		||||
################
 | 
			
		||||
lib/Eigen/*
 | 
			
		||||
 | 
			
		||||
# FFTW source #
 | 
			
		||||
################
 | 
			
		||||
lib/fftw/*
 | 
			
		||||
 | 
			
		||||
# libtool macros #
 | 
			
		||||
##################
 | 
			
		||||
m4/lt*
 | 
			
		||||
m4/libtool.m4
 | 
			
		||||
							
								
								
									
										106
									
								
								.travis.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								.travis.yml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,106 @@
 | 
			
		||||
language: cpp
 | 
			
		||||
 | 
			
		||||
cache:
 | 
			
		||||
  directories:
 | 
			
		||||
    - clang
 | 
			
		||||
 | 
			
		||||
matrix:
 | 
			
		||||
  include:
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode7.2
 | 
			
		||||
      compiler: clang
 | 
			
		||||
    - compiler: gcc
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-4.9
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-4.9
 | 
			
		||||
    - compiler: gcc
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-5
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-5
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-4.8
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-4.8
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
 | 
			
		||||
      
 | 
			
		||||
before_install:
 | 
			
		||||
    - export GRIDDIR=`pwd`
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
 | 
			
		||||
    
 | 
			
		||||
install:
 | 
			
		||||
    - export CC=$CC$VERSION
 | 
			
		||||
    - export CXX=$CXX$VERSION
 | 
			
		||||
    - echo $PATH
 | 
			
		||||
    - which $CC
 | 
			
		||||
    - $CC  --version
 | 
			
		||||
    - which $CXX
 | 
			
		||||
    - $CXX --version
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
 | 
			
		||||
    
 | 
			
		||||
script:
 | 
			
		||||
    - ./bootstrap.sh
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4 
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1
 | 
			
		||||
    - echo make clean
 | 
			
		||||
    - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1
 | 
			
		||||
    - echo make clean
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
 | 
			
		||||
    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										9
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								AUTHORS
									
									
									
									
									
								
							@@ -1,5 +1,4 @@
 | 
			
		||||
Peter Boyle
 | 
			
		||||
Azusa Yamaguchi
 | 
			
		||||
Intel Parallel Computing Centre @ Higgs Centre for Theoretical Physics
 | 
			
		||||
University of Edinburgh
 | 
			
		||||
Scotland, UK
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										876
									
								
								COPYING
									
									
									
									
									
								
							
							
						
						
									
										876
									
								
								COPYING
									
									
									
									
									
								
							@@ -1,622 +1,281 @@
 | 
			
		||||
                    GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                       Version 3, 29 June 2007
 | 
			
		||||
                   GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                       Version 2, June 1991
 | 
			
		||||
 | 
			
		||||
 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 | 
			
		||||
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
 | 
			
		||||
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 Everyone is permitted to copy and distribute verbatim copies
 | 
			
		||||
 of this license document, but changing it is not allowed.
 | 
			
		||||
 | 
			
		||||
                            Preamble
 | 
			
		||||
 | 
			
		||||
  The GNU General Public License is a free, copyleft license for
 | 
			
		||||
software and other kinds of works.
 | 
			
		||||
 | 
			
		||||
  The licenses for most software and other practical works are designed
 | 
			
		||||
to take away your freedom to share and change the works.  By contrast,
 | 
			
		||||
the GNU General Public License is intended to guarantee your freedom to
 | 
			
		||||
share and change all versions of a program--to make sure it remains free
 | 
			
		||||
software for all its users.  We, the Free Software Foundation, use the
 | 
			
		||||
GNU General Public License for most of our software; it applies also to
 | 
			
		||||
any other work released this way by its authors.  You can apply it to
 | 
			
		||||
  The licenses for most software are designed to take away your
 | 
			
		||||
freedom to share and change it.  By contrast, the GNU General Public
 | 
			
		||||
License is intended to guarantee your freedom to share and change free
 | 
			
		||||
software--to make sure the software is free for all its users.  This
 | 
			
		||||
General Public License applies to most of the Free Software
 | 
			
		||||
Foundation's software and to any other program whose authors commit to
 | 
			
		||||
using it.  (Some other Free Software Foundation software is covered by
 | 
			
		||||
the GNU Lesser General Public License instead.)  You can apply it to
 | 
			
		||||
your programs, too.
 | 
			
		||||
 | 
			
		||||
  When we speak of free software, we are referring to freedom, not
 | 
			
		||||
price.  Our General Public Licenses are designed to make sure that you
 | 
			
		||||
have the freedom to distribute copies of free software (and charge for
 | 
			
		||||
them if you wish), that you receive source code or can get it if you
 | 
			
		||||
want it, that you can change the software or use pieces of it in new
 | 
			
		||||
free programs, and that you know you can do these things.
 | 
			
		||||
this service if you wish), that you receive source code or can get it
 | 
			
		||||
if you want it, that you can change the software or use pieces of it
 | 
			
		||||
in new free programs; and that you know you can do these things.
 | 
			
		||||
 | 
			
		||||
  To protect your rights, we need to prevent others from denying you
 | 
			
		||||
these rights or asking you to surrender the rights.  Therefore, you have
 | 
			
		||||
certain responsibilities if you distribute copies of the software, or if
 | 
			
		||||
you modify it: responsibilities to respect the freedom of others.
 | 
			
		||||
  To protect your rights, we need to make restrictions that forbid
 | 
			
		||||
anyone to deny you these rights or to ask you to surrender the rights.
 | 
			
		||||
These restrictions translate to certain responsibilities for you if you
 | 
			
		||||
distribute copies of the software, or if you modify it.
 | 
			
		||||
 | 
			
		||||
  For example, if you distribute copies of such a program, whether
 | 
			
		||||
gratis or for a fee, you must pass on to the recipients the same
 | 
			
		||||
freedoms that you received.  You must make sure that they, too, receive
 | 
			
		||||
or can get the source code.  And you must show them these terms so they
 | 
			
		||||
know their rights.
 | 
			
		||||
gratis or for a fee, you must give the recipients all the rights that
 | 
			
		||||
you have.  You must make sure that they, too, receive or can get the
 | 
			
		||||
source code.  And you must show them these terms so they know their
 | 
			
		||||
rights.
 | 
			
		||||
 | 
			
		||||
  Developers that use the GNU GPL protect your rights with two steps:
 | 
			
		||||
(1) assert copyright on the software, and (2) offer you this License
 | 
			
		||||
giving you legal permission to copy, distribute and/or modify it.
 | 
			
		||||
  We protect your rights with two steps: (1) copyright the software, and
 | 
			
		||||
(2) offer you this license which gives you legal permission to copy,
 | 
			
		||||
distribute and/or modify the software.
 | 
			
		||||
 | 
			
		||||
  For the developers' and authors' protection, the GPL clearly explains
 | 
			
		||||
that there is no warranty for this free software.  For both users' and
 | 
			
		||||
authors' sake, the GPL requires that modified versions be marked as
 | 
			
		||||
changed, so that their problems will not be attributed erroneously to
 | 
			
		||||
authors of previous versions.
 | 
			
		||||
  Also, for each author's protection and ours, we want to make certain
 | 
			
		||||
that everyone understands that there is no warranty for this free
 | 
			
		||||
software.  If the software is modified by someone else and passed on, we
 | 
			
		||||
want its recipients to know that what they have is not the original, so
 | 
			
		||||
that any problems introduced by others will not reflect on the original
 | 
			
		||||
authors' reputations.
 | 
			
		||||
 | 
			
		||||
  Some devices are designed to deny users access to install or run
 | 
			
		||||
modified versions of the software inside them, although the manufacturer
 | 
			
		||||
can do so.  This is fundamentally incompatible with the aim of
 | 
			
		||||
protecting users' freedom to change the software.  The systematic
 | 
			
		||||
pattern of such abuse occurs in the area of products for individuals to
 | 
			
		||||
use, which is precisely where it is most unacceptable.  Therefore, we
 | 
			
		||||
have designed this version of the GPL to prohibit the practice for those
 | 
			
		||||
products.  If such problems arise substantially in other domains, we
 | 
			
		||||
stand ready to extend this provision to those domains in future versions
 | 
			
		||||
of the GPL, as needed to protect the freedom of users.
 | 
			
		||||
 | 
			
		||||
  Finally, every program is threatened constantly by software patents.
 | 
			
		||||
States should not allow patents to restrict development and use of
 | 
			
		||||
software on general-purpose computers, but in those that do, we wish to
 | 
			
		||||
avoid the special danger that patents applied to a free program could
 | 
			
		||||
make it effectively proprietary.  To prevent this, the GPL assures that
 | 
			
		||||
patents cannot be used to render the program non-free.
 | 
			
		||||
  Finally, any free program is threatened constantly by software
 | 
			
		||||
patents.  We wish to avoid the danger that redistributors of a free
 | 
			
		||||
program will individually obtain patent licenses, in effect making the
 | 
			
		||||
program proprietary.  To prevent this, we have made it clear that any
 | 
			
		||||
patent must be licensed for everyone's free use or not licensed at all.
 | 
			
		||||
 | 
			
		||||
  The precise terms and conditions for copying, distribution and
 | 
			
		||||
modification follow.
 | 
			
		||||
 | 
			
		||||
                       TERMS AND CONDITIONS
 | 
			
		||||
 | 
			
		||||
  0. Definitions.
 | 
			
		||||
 | 
			
		||||
  "This License" refers to version 3 of the GNU General Public License.
 | 
			
		||||
 | 
			
		||||
  "Copyright" also means copyright-like laws that apply to other kinds of
 | 
			
		||||
works, such as semiconductor masks.
 | 
			
		||||
 | 
			
		||||
  "The Program" refers to any copyrightable work licensed under this
 | 
			
		||||
License.  Each licensee is addressed as "you".  "Licensees" and
 | 
			
		||||
"recipients" may be individuals or organizations.
 | 
			
		||||
 | 
			
		||||
  To "modify" a work means to copy from or adapt all or part of the work
 | 
			
		||||
in a fashion requiring copyright permission, other than the making of an
 | 
			
		||||
exact copy.  The resulting work is called a "modified version" of the
 | 
			
		||||
earlier work or a work "based on" the earlier work.
 | 
			
		||||
 | 
			
		||||
  A "covered work" means either the unmodified Program or a work based
 | 
			
		||||
on the Program.
 | 
			
		||||
 | 
			
		||||
  To "propagate" a work means to do anything with it that, without
 | 
			
		||||
permission, would make you directly or secondarily liable for
 | 
			
		||||
infringement under applicable copyright law, except executing it on a
 | 
			
		||||
computer or modifying a private copy.  Propagation includes copying,
 | 
			
		||||
distribution (with or without modification), making available to the
 | 
			
		||||
public, and in some countries other activities as well.
 | 
			
		||||
 | 
			
		||||
  To "convey" a work means any kind of propagation that enables other
 | 
			
		||||
parties to make or receive copies.  Mere interaction with a user through
 | 
			
		||||
a computer network, with no transfer of a copy, is not conveying.
 | 
			
		||||
 | 
			
		||||
  An interactive user interface displays "Appropriate Legal Notices"
 | 
			
		||||
to the extent that it includes a convenient and prominently visible
 | 
			
		||||
feature that (1) displays an appropriate copyright notice, and (2)
 | 
			
		||||
tells the user that there is no warranty for the work (except to the
 | 
			
		||||
extent that warranties are provided), that licensees may convey the
 | 
			
		||||
work under this License, and how to view a copy of this License.  If
 | 
			
		||||
the interface presents a list of user commands or options, such as a
 | 
			
		||||
menu, a prominent item in the list meets this criterion.
 | 
			
		||||
 | 
			
		||||
  1. Source Code.
 | 
			
		||||
 | 
			
		||||
  The "source code" for a work means the preferred form of the work
 | 
			
		||||
for making modifications to it.  "Object code" means any non-source
 | 
			
		||||
form of a work.
 | 
			
		||||
 | 
			
		||||
  A "Standard Interface" means an interface that either is an official
 | 
			
		||||
standard defined by a recognized standards body, or, in the case of
 | 
			
		||||
interfaces specified for a particular programming language, one that
 | 
			
		||||
is widely used among developers working in that language.
 | 
			
		||||
 | 
			
		||||
  The "System Libraries" of an executable work include anything, other
 | 
			
		||||
than the work as a whole, that (a) is included in the normal form of
 | 
			
		||||
packaging a Major Component, but which is not part of that Major
 | 
			
		||||
Component, and (b) serves only to enable use of the work with that
 | 
			
		||||
Major Component, or to implement a Standard Interface for which an
 | 
			
		||||
implementation is available to the public in source code form.  A
 | 
			
		||||
"Major Component", in this context, means a major essential component
 | 
			
		||||
(kernel, window system, and so on) of the specific operating system
 | 
			
		||||
(if any) on which the executable work runs, or a compiler used to
 | 
			
		||||
produce the work, or an object code interpreter used to run it.
 | 
			
		||||
 | 
			
		||||
  The "Corresponding Source" for a work in object code form means all
 | 
			
		||||
the source code needed to generate, install, and (for an executable
 | 
			
		||||
work) run the object code and to modify the work, including scripts to
 | 
			
		||||
control those activities.  However, it does not include the work's
 | 
			
		||||
System Libraries, or general-purpose tools or generally available free
 | 
			
		||||
programs which are used unmodified in performing those activities but
 | 
			
		||||
which are not part of the work.  For example, Corresponding Source
 | 
			
		||||
includes interface definition files associated with source files for
 | 
			
		||||
the work, and the source code for shared libraries and dynamically
 | 
			
		||||
linked subprograms that the work is specifically designed to require,
 | 
			
		||||
such as by intimate data communication or control flow between those
 | 
			
		||||
subprograms and other parts of the work.
 | 
			
		||||
 | 
			
		||||
  The Corresponding Source need not include anything that users
 | 
			
		||||
can regenerate automatically from other parts of the Corresponding
 | 
			
		||||
Source.
 | 
			
		||||
 | 
			
		||||
  The Corresponding Source for a work in source code form is that
 | 
			
		||||
same work.
 | 
			
		||||
 | 
			
		||||
  2. Basic Permissions.
 | 
			
		||||
 | 
			
		||||
  All rights granted under this License are granted for the term of
 | 
			
		||||
copyright on the Program, and are irrevocable provided the stated
 | 
			
		||||
conditions are met.  This License explicitly affirms your unlimited
 | 
			
		||||
permission to run the unmodified Program.  The output from running a
 | 
			
		||||
covered work is covered by this License only if the output, given its
 | 
			
		||||
content, constitutes a covered work.  This License acknowledges your
 | 
			
		||||
rights of fair use or other equivalent, as provided by copyright law.
 | 
			
		||||
 | 
			
		||||
  You may make, run and propagate covered works that you do not
 | 
			
		||||
convey, without conditions so long as your license otherwise remains
 | 
			
		||||
in force.  You may convey covered works to others for the sole purpose
 | 
			
		||||
of having them make modifications exclusively for you, or provide you
 | 
			
		||||
with facilities for running those works, provided that you comply with
 | 
			
		||||
the terms of this License in conveying all material for which you do
 | 
			
		||||
not control copyright.  Those thus making or running the covered works
 | 
			
		||||
for you must do so exclusively on your behalf, under your direction
 | 
			
		||||
and control, on terms that prohibit them from making any copies of
 | 
			
		||||
your copyrighted material outside their relationship with you.
 | 
			
		||||
 | 
			
		||||
  Conveying under any other circumstances is permitted solely under
 | 
			
		||||
the conditions stated below.  Sublicensing is not allowed; section 10
 | 
			
		||||
makes it unnecessary.
 | 
			
		||||
 | 
			
		||||
  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
 | 
			
		||||
 | 
			
		||||
  No covered work shall be deemed part of an effective technological
 | 
			
		||||
measure under any applicable law fulfilling obligations under article
 | 
			
		||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
 | 
			
		||||
similar laws prohibiting or restricting circumvention of such
 | 
			
		||||
measures.
 | 
			
		||||
 | 
			
		||||
  When you convey a covered work, you waive any legal power to forbid
 | 
			
		||||
circumvention of technological measures to the extent such circumvention
 | 
			
		||||
is effected by exercising rights under this License with respect to
 | 
			
		||||
the covered work, and you disclaim any intention to limit operation or
 | 
			
		||||
modification of the work as a means of enforcing, against the work's
 | 
			
		||||
users, your or third parties' legal rights to forbid circumvention of
 | 
			
		||||
technological measures.
 | 
			
		||||
 | 
			
		||||
  4. Conveying Verbatim Copies.
 | 
			
		||||
 | 
			
		||||
  You may convey verbatim copies of the Program's source code as you
 | 
			
		||||
receive it, in any medium, provided that you conspicuously and
 | 
			
		||||
appropriately publish on each copy an appropriate copyright notice;
 | 
			
		||||
keep intact all notices stating that this License and any
 | 
			
		||||
non-permissive terms added in accord with section 7 apply to the code;
 | 
			
		||||
keep intact all notices of the absence of any warranty; and give all
 | 
			
		||||
recipients a copy of this License along with the Program.
 | 
			
		||||
 | 
			
		||||
  You may charge any price or no price for each copy that you convey,
 | 
			
		||||
and you may offer support or warranty protection for a fee.
 | 
			
		||||
 | 
			
		||||
  5. Conveying Modified Source Versions.
 | 
			
		||||
 | 
			
		||||
  You may convey a work based on the Program, or the modifications to
 | 
			
		||||
produce it from the Program, in the form of source code under the
 | 
			
		||||
terms of section 4, provided that you also meet all of these conditions:
 | 
			
		||||
 | 
			
		||||
    a) The work must carry prominent notices stating that you modified
 | 
			
		||||
    it, and giving a relevant date.
 | 
			
		||||
 | 
			
		||||
    b) The work must carry prominent notices stating that it is
 | 
			
		||||
    released under this License and any conditions added under section
 | 
			
		||||
    7.  This requirement modifies the requirement in section 4 to
 | 
			
		||||
    "keep intact all notices".
 | 
			
		||||
 | 
			
		||||
    c) You must license the entire work, as a whole, under this
 | 
			
		||||
    License to anyone who comes into possession of a copy.  This
 | 
			
		||||
    License will therefore apply, along with any applicable section 7
 | 
			
		||||
    additional terms, to the whole of the work, and all its parts,
 | 
			
		||||
    regardless of how they are packaged.  This License gives no
 | 
			
		||||
    permission to license the work in any other way, but it does not
 | 
			
		||||
    invalidate such permission if you have separately received it.
 | 
			
		||||
 | 
			
		||||
    d) If the work has interactive user interfaces, each must display
 | 
			
		||||
    Appropriate Legal Notices; however, if the Program has interactive
 | 
			
		||||
    interfaces that do not display Appropriate Legal Notices, your
 | 
			
		||||
    work need not make them do so.
 | 
			
		||||
 | 
			
		||||
  A compilation of a covered work with other separate and independent
 | 
			
		||||
works, which are not by their nature extensions of the covered work,
 | 
			
		||||
and which are not combined with it such as to form a larger program,
 | 
			
		||||
in or on a volume of a storage or distribution medium, is called an
 | 
			
		||||
"aggregate" if the compilation and its resulting copyright are not
 | 
			
		||||
used to limit the access or legal rights of the compilation's users
 | 
			
		||||
beyond what the individual works permit.  Inclusion of a covered work
 | 
			
		||||
in an aggregate does not cause this License to apply to the other
 | 
			
		||||
parts of the aggregate.
 | 
			
		||||
 | 
			
		||||
  6. Conveying Non-Source Forms.
 | 
			
		||||
 | 
			
		||||
  You may convey a covered work in object code form under the terms
 | 
			
		||||
of sections 4 and 5, provided that you also convey the
 | 
			
		||||
machine-readable Corresponding Source under the terms of this License,
 | 
			
		||||
in one of these ways:
 | 
			
		||||
 | 
			
		||||
    a) Convey the object code in, or embodied in, a physical product
 | 
			
		||||
    (including a physical distribution medium), accompanied by the
 | 
			
		||||
    Corresponding Source fixed on a durable physical medium
 | 
			
		||||
    customarily used for software interchange.
 | 
			
		||||
 | 
			
		||||
    b) Convey the object code in, or embodied in, a physical product
 | 
			
		||||
    (including a physical distribution medium), accompanied by a
 | 
			
		||||
    written offer, valid for at least three years and valid for as
 | 
			
		||||
    long as you offer spare parts or customer support for that product
 | 
			
		||||
    model, to give anyone who possesses the object code either (1) a
 | 
			
		||||
    copy of the Corresponding Source for all the software in the
 | 
			
		||||
    product that is covered by this License, on a durable physical
 | 
			
		||||
    medium customarily used for software interchange, for a price no
 | 
			
		||||
    more than your reasonable cost of physically performing this
 | 
			
		||||
    conveying of source, or (2) access to copy the
 | 
			
		||||
    Corresponding Source from a network server at no charge.
 | 
			
		||||
 | 
			
		||||
    c) Convey individual copies of the object code with a copy of the
 | 
			
		||||
    written offer to provide the Corresponding Source.  This
 | 
			
		||||
    alternative is allowed only occasionally and noncommercially, and
 | 
			
		||||
    only if you received the object code with such an offer, in accord
 | 
			
		||||
    with subsection 6b.
 | 
			
		||||
 | 
			
		||||
    d) Convey the object code by offering access from a designated
 | 
			
		||||
    place (gratis or for a charge), and offer equivalent access to the
 | 
			
		||||
    Corresponding Source in the same way through the same place at no
 | 
			
		||||
    further charge.  You need not require recipients to copy the
 | 
			
		||||
    Corresponding Source along with the object code.  If the place to
 | 
			
		||||
    copy the object code is a network server, the Corresponding Source
 | 
			
		||||
    may be on a different server (operated by you or a third party)
 | 
			
		||||
    that supports equivalent copying facilities, provided you maintain
 | 
			
		||||
    clear directions next to the object code saying where to find the
 | 
			
		||||
    Corresponding Source.  Regardless of what server hosts the
 | 
			
		||||
    Corresponding Source, you remain obligated to ensure that it is
 | 
			
		||||
    available for as long as needed to satisfy these requirements.
 | 
			
		||||
 | 
			
		||||
    e) Convey the object code using peer-to-peer transmission, provided
 | 
			
		||||
    you inform other peers where the object code and Corresponding
 | 
			
		||||
    Source of the work are being offered to the general public at no
 | 
			
		||||
    charge under subsection 6d.
 | 
			
		||||
 | 
			
		||||
  A separable portion of the object code, whose source code is excluded
 | 
			
		||||
from the Corresponding Source as a System Library, need not be
 | 
			
		||||
included in conveying the object code work.
 | 
			
		||||
 | 
			
		||||
  A "User Product" is either (1) a "consumer product", which means any
 | 
			
		||||
tangible personal property which is normally used for personal, family,
 | 
			
		||||
or household purposes, or (2) anything designed or sold for incorporation
 | 
			
		||||
into a dwelling.  In determining whether a product is a consumer product,
 | 
			
		||||
doubtful cases shall be resolved in favor of coverage.  For a particular
 | 
			
		||||
product received by a particular user, "normally used" refers to a
 | 
			
		||||
typical or common use of that class of product, regardless of the status
 | 
			
		||||
of the particular user or of the way in which the particular user
 | 
			
		||||
actually uses, or expects or is expected to use, the product.  A product
 | 
			
		||||
is a consumer product regardless of whether the product has substantial
 | 
			
		||||
commercial, industrial or non-consumer uses, unless such uses represent
 | 
			
		||||
the only significant mode of use of the product.
 | 
			
		||||
 | 
			
		||||
  "Installation Information" for a User Product means any methods,
 | 
			
		||||
procedures, authorization keys, or other information required to install
 | 
			
		||||
and execute modified versions of a covered work in that User Product from
 | 
			
		||||
a modified version of its Corresponding Source.  The information must
 | 
			
		||||
suffice to ensure that the continued functioning of the modified object
 | 
			
		||||
code is in no case prevented or interfered with solely because
 | 
			
		||||
modification has been made.
 | 
			
		||||
 | 
			
		||||
  If you convey an object code work under this section in, or with, or
 | 
			
		||||
specifically for use in, a User Product, and the conveying occurs as
 | 
			
		||||
part of a transaction in which the right of possession and use of the
 | 
			
		||||
User Product is transferred to the recipient in perpetuity or for a
 | 
			
		||||
fixed term (regardless of how the transaction is characterized), the
 | 
			
		||||
Corresponding Source conveyed under this section must be accompanied
 | 
			
		||||
by the Installation Information.  But this requirement does not apply
 | 
			
		||||
if neither you nor any third party retains the ability to install
 | 
			
		||||
modified object code on the User Product (for example, the work has
 | 
			
		||||
been installed in ROM).
 | 
			
		||||
 | 
			
		||||
  The requirement to provide Installation Information does not include a
 | 
			
		||||
requirement to continue to provide support service, warranty, or updates
 | 
			
		||||
for a work that has been modified or installed by the recipient, or for
 | 
			
		||||
the User Product in which it has been modified or installed.  Access to a
 | 
			
		||||
network may be denied when the modification itself materially and
 | 
			
		||||
adversely affects the operation of the network or violates the rules and
 | 
			
		||||
protocols for communication across the network.
 | 
			
		||||
 | 
			
		||||
  Corresponding Source conveyed, and Installation Information provided,
 | 
			
		||||
in accord with this section must be in a format that is publicly
 | 
			
		||||
documented (and with an implementation available to the public in
 | 
			
		||||
source code form), and must require no special password or key for
 | 
			
		||||
unpacking, reading or copying.
 | 
			
		||||
 | 
			
		||||
  7. Additional Terms.
 | 
			
		||||
 | 
			
		||||
  "Additional permissions" are terms that supplement the terms of this
 | 
			
		||||
License by making exceptions from one or more of its conditions.
 | 
			
		||||
Additional permissions that are applicable to the entire Program shall
 | 
			
		||||
be treated as though they were included in this License, to the extent
 | 
			
		||||
that they are valid under applicable law.  If additional permissions
 | 
			
		||||
apply only to part of the Program, that part may be used separately
 | 
			
		||||
under those permissions, but the entire Program remains governed by
 | 
			
		||||
this License without regard to the additional permissions.
 | 
			
		||||
 | 
			
		||||
  When you convey a copy of a covered work, you may at your option
 | 
			
		||||
remove any additional permissions from that copy, or from any part of
 | 
			
		||||
it.  (Additional permissions may be written to require their own
 | 
			
		||||
removal in certain cases when you modify the work.)  You may place
 | 
			
		||||
additional permissions on material, added by you to a covered work,
 | 
			
		||||
for which you have or can give appropriate copyright permission.
 | 
			
		||||
 | 
			
		||||
  Notwithstanding any other provision of this License, for material you
 | 
			
		||||
add to a covered work, you may (if authorized by the copyright holders of
 | 
			
		||||
that material) supplement the terms of this License with terms:
 | 
			
		||||
 | 
			
		||||
    a) Disclaiming warranty or limiting liability differently from the
 | 
			
		||||
    terms of sections 15 and 16 of this License; or
 | 
			
		||||
 | 
			
		||||
    b) Requiring preservation of specified reasonable legal notices or
 | 
			
		||||
    author attributions in that material or in the Appropriate Legal
 | 
			
		||||
    Notices displayed by works containing it; or
 | 
			
		||||
 | 
			
		||||
    c) Prohibiting misrepresentation of the origin of that material, or
 | 
			
		||||
    requiring that modified versions of such material be marked in
 | 
			
		||||
    reasonable ways as different from the original version; or
 | 
			
		||||
 | 
			
		||||
    d) Limiting the use for publicity purposes of names of licensors or
 | 
			
		||||
    authors of the material; or
 | 
			
		||||
 | 
			
		||||
    e) Declining to grant rights under trademark law for use of some
 | 
			
		||||
    trade names, trademarks, or service marks; or
 | 
			
		||||
 | 
			
		||||
    f) Requiring indemnification of licensors and authors of that
 | 
			
		||||
    material by anyone who conveys the material (or modified versions of
 | 
			
		||||
    it) with contractual assumptions of liability to the recipient, for
 | 
			
		||||
    any liability that these contractual assumptions directly impose on
 | 
			
		||||
    those licensors and authors.
 | 
			
		||||
 | 
			
		||||
  All other non-permissive additional terms are considered "further
 | 
			
		||||
restrictions" within the meaning of section 10.  If the Program as you
 | 
			
		||||
received it, or any part of it, contains a notice stating that it is
 | 
			
		||||
governed by this License along with a term that is a further
 | 
			
		||||
restriction, you may remove that term.  If a license document contains
 | 
			
		||||
a further restriction but permits relicensing or conveying under this
 | 
			
		||||
License, you may add to a covered work material governed by the terms
 | 
			
		||||
of that license document, provided that the further restriction does
 | 
			
		||||
not survive such relicensing or conveying.
 | 
			
		||||
 | 
			
		||||
  If you add terms to a covered work in accord with this section, you
 | 
			
		||||
must place, in the relevant source files, a statement of the
 | 
			
		||||
additional terms that apply to those files, or a notice indicating
 | 
			
		||||
where to find the applicable terms.
 | 
			
		||||
 | 
			
		||||
  Additional terms, permissive or non-permissive, may be stated in the
 | 
			
		||||
form of a separately written license, or stated as exceptions;
 | 
			
		||||
the above requirements apply either way.
 | 
			
		||||
 | 
			
		||||
  8. Termination.
 | 
			
		||||
 | 
			
		||||
  You may not propagate or modify a covered work except as expressly
 | 
			
		||||
provided under this License.  Any attempt otherwise to propagate or
 | 
			
		||||
modify it is void, and will automatically terminate your rights under
 | 
			
		||||
this License (including any patent licenses granted under the third
 | 
			
		||||
paragraph of section 11).
 | 
			
		||||
 | 
			
		||||
  However, if you cease all violation of this License, then your
 | 
			
		||||
license from a particular copyright holder is reinstated (a)
 | 
			
		||||
provisionally, unless and until the copyright holder explicitly and
 | 
			
		||||
finally terminates your license, and (b) permanently, if the copyright
 | 
			
		||||
holder fails to notify you of the violation by some reasonable means
 | 
			
		||||
prior to 60 days after the cessation.
 | 
			
		||||
 | 
			
		||||
  Moreover, your license from a particular copyright holder is
 | 
			
		||||
reinstated permanently if the copyright holder notifies you of the
 | 
			
		||||
violation by some reasonable means, this is the first time you have
 | 
			
		||||
received notice of violation of this License (for any work) from that
 | 
			
		||||
copyright holder, and you cure the violation prior to 30 days after
 | 
			
		||||
your receipt of the notice.
 | 
			
		||||
 | 
			
		||||
  Termination of your rights under this section does not terminate the
 | 
			
		||||
licenses of parties who have received copies or rights from you under
 | 
			
		||||
this License.  If your rights have been terminated and not permanently
 | 
			
		||||
reinstated, you do not qualify to receive new licenses for the same
 | 
			
		||||
material under section 10.
 | 
			
		||||
 | 
			
		||||
  9. Acceptance Not Required for Having Copies.
 | 
			
		||||
 | 
			
		||||
  You are not required to accept this License in order to receive or
 | 
			
		||||
run a copy of the Program.  Ancillary propagation of a covered work
 | 
			
		||||
occurring solely as a consequence of using peer-to-peer transmission
 | 
			
		||||
to receive a copy likewise does not require acceptance.  However,
 | 
			
		||||
nothing other than this License grants you permission to propagate or
 | 
			
		||||
modify any covered work.  These actions infringe copyright if you do
 | 
			
		||||
not accept this License.  Therefore, by modifying or propagating a
 | 
			
		||||
covered work, you indicate your acceptance of this License to do so.
 | 
			
		||||
 | 
			
		||||
  10. Automatic Licensing of Downstream Recipients.
 | 
			
		||||
 | 
			
		||||
  Each time you convey a covered work, the recipient automatically
 | 
			
		||||
receives a license from the original licensors, to run, modify and
 | 
			
		||||
propagate that work, subject to this License.  You are not responsible
 | 
			
		||||
for enforcing compliance by third parties with this License.
 | 
			
		||||
 | 
			
		||||
  An "entity transaction" is a transaction transferring control of an
 | 
			
		||||
organization, or substantially all assets of one, or subdividing an
 | 
			
		||||
organization, or merging organizations.  If propagation of a covered
 | 
			
		||||
work results from an entity transaction, each party to that
 | 
			
		||||
transaction who receives a copy of the work also receives whatever
 | 
			
		||||
licenses to the work the party's predecessor in interest had or could
 | 
			
		||||
give under the previous paragraph, plus a right to possession of the
 | 
			
		||||
Corresponding Source of the work from the predecessor in interest, if
 | 
			
		||||
the predecessor has it or can get it with reasonable efforts.
 | 
			
		||||
 | 
			
		||||
  You may not impose any further restrictions on the exercise of the
 | 
			
		||||
rights granted or affirmed under this License.  For example, you may
 | 
			
		||||
not impose a license fee, royalty, or other charge for exercise of
 | 
			
		||||
rights granted under this License, and you may not initiate litigation
 | 
			
		||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
 | 
			
		||||
any patent claim is infringed by making, using, selling, offering for
 | 
			
		||||
sale, or importing the Program or any portion of it.
 | 
			
		||||
 | 
			
		||||
  11. Patents.
 | 
			
		||||
 | 
			
		||||
  A "contributor" is a copyright holder who authorizes use under this
 | 
			
		||||
License of the Program or a work on which the Program is based.  The
 | 
			
		||||
work thus licensed is called the contributor's "contributor version".
 | 
			
		||||
 | 
			
		||||
  A contributor's "essential patent claims" are all patent claims
 | 
			
		||||
owned or controlled by the contributor, whether already acquired or
 | 
			
		||||
hereafter acquired, that would be infringed by some manner, permitted
 | 
			
		||||
by this License, of making, using, or selling its contributor version,
 | 
			
		||||
but do not include claims that would be infringed only as a
 | 
			
		||||
consequence of further modification of the contributor version.  For
 | 
			
		||||
purposes of this definition, "control" includes the right to grant
 | 
			
		||||
patent sublicenses in a manner consistent with the requirements of
 | 
			
		||||
                    GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 | 
			
		||||
 | 
			
		||||
  0. This License applies to any program or other work which contains
 | 
			
		||||
a notice placed by the copyright holder saying it may be distributed
 | 
			
		||||
under the terms of this General Public License.  The "Program", below,
 | 
			
		||||
refers to any such program or work, and a "work based on the Program"
 | 
			
		||||
means either the Program or any derivative work under copyright law:
 | 
			
		||||
that is to say, a work containing the Program or a portion of it,
 | 
			
		||||
either verbatim or with modifications and/or translated into another
 | 
			
		||||
language.  (Hereinafter, translation is included without limitation in
 | 
			
		||||
the term "modification".)  Each licensee is addressed as "you".
 | 
			
		||||
 | 
			
		||||
Activities other than copying, distribution and modification are not
 | 
			
		||||
covered by this License; they are outside its scope.  The act of
 | 
			
		||||
running the Program is not restricted, and the output from the Program
 | 
			
		||||
is covered only if its contents constitute a work based on the
 | 
			
		||||
Program (independent of having been made by running the Program).
 | 
			
		||||
Whether that is true depends on what the Program does.
 | 
			
		||||
 | 
			
		||||
  1. You may copy and distribute verbatim copies of the Program's
 | 
			
		||||
source code as you receive it, in any medium, provided that you
 | 
			
		||||
conspicuously and appropriately publish on each copy an appropriate
 | 
			
		||||
copyright notice and disclaimer of warranty; keep intact all the
 | 
			
		||||
notices that refer to this License and to the absence of any warranty;
 | 
			
		||||
and give any other recipients of the Program a copy of this License
 | 
			
		||||
along with the Program.
 | 
			
		||||
 | 
			
		||||
You may charge a fee for the physical act of transferring a copy, and
 | 
			
		||||
you may at your option offer warranty protection in exchange for a fee.
 | 
			
		||||
 | 
			
		||||
  2. You may modify your copy or copies of the Program or any portion
 | 
			
		||||
of it, thus forming a work based on the Program, and copy and
 | 
			
		||||
distribute such modifications or work under the terms of Section 1
 | 
			
		||||
above, provided that you also meet all of these conditions:
 | 
			
		||||
 | 
			
		||||
    a) You must cause the modified files to carry prominent notices
 | 
			
		||||
    stating that you changed the files and the date of any change.
 | 
			
		||||
 | 
			
		||||
    b) You must cause any work that you distribute or publish, that in
 | 
			
		||||
    whole or in part contains or is derived from the Program or any
 | 
			
		||||
    part thereof, to be licensed as a whole at no charge to all third
 | 
			
		||||
    parties under the terms of this License.
 | 
			
		||||
 | 
			
		||||
    c) If the modified program normally reads commands interactively
 | 
			
		||||
    when run, you must cause it, when started running for such
 | 
			
		||||
    interactive use in the most ordinary way, to print or display an
 | 
			
		||||
    announcement including an appropriate copyright notice and a
 | 
			
		||||
    notice that there is no warranty (or else, saying that you provide
 | 
			
		||||
    a warranty) and that users may redistribute the program under
 | 
			
		||||
    these conditions, and telling the user how to view a copy of this
 | 
			
		||||
    License.  (Exception: if the Program itself is interactive but
 | 
			
		||||
    does not normally print such an announcement, your work based on
 | 
			
		||||
    the Program is not required to print an announcement.)
 | 
			
		||||
 | 
			
		||||
These requirements apply to the modified work as a whole.  If
 | 
			
		||||
identifiable sections of that work are not derived from the Program,
 | 
			
		||||
and can be reasonably considered independent and separate works in
 | 
			
		||||
themselves, then this License, and its terms, do not apply to those
 | 
			
		||||
sections when you distribute them as separate works.  But when you
 | 
			
		||||
distribute the same sections as part of a whole which is a work based
 | 
			
		||||
on the Program, the distribution of the whole must be on the terms of
 | 
			
		||||
this License, whose permissions for other licensees extend to the
 | 
			
		||||
entire whole, and thus to each and every part regardless of who wrote it.
 | 
			
		||||
 | 
			
		||||
Thus, it is not the intent of this section to claim rights or contest
 | 
			
		||||
your rights to work written entirely by you; rather, the intent is to
 | 
			
		||||
exercise the right to control the distribution of derivative or
 | 
			
		||||
collective works based on the Program.
 | 
			
		||||
 | 
			
		||||
In addition, mere aggregation of another work not based on the Program
 | 
			
		||||
with the Program (or with a work based on the Program) on a volume of
 | 
			
		||||
a storage or distribution medium does not bring the other work under
 | 
			
		||||
the scope of this License.
 | 
			
		||||
 | 
			
		||||
  3. You may copy and distribute the Program (or a work based on it,
 | 
			
		||||
under Section 2) in object code or executable form under the terms of
 | 
			
		||||
Sections 1 and 2 above provided that you also do one of the following:
 | 
			
		||||
 | 
			
		||||
    a) Accompany it with the complete corresponding machine-readable
 | 
			
		||||
    source code, which must be distributed under the terms of Sections
 | 
			
		||||
    1 and 2 above on a medium customarily used for software interchange; or,
 | 
			
		||||
 | 
			
		||||
    b) Accompany it with a written offer, valid for at least three
 | 
			
		||||
    years, to give any third party, for a charge no more than your
 | 
			
		||||
    cost of physically performing source distribution, a complete
 | 
			
		||||
    machine-readable copy of the corresponding source code, to be
 | 
			
		||||
    distributed under the terms of Sections 1 and 2 above on a medium
 | 
			
		||||
    customarily used for software interchange; or,
 | 
			
		||||
 | 
			
		||||
    c) Accompany it with the information you received as to the offer
 | 
			
		||||
    to distribute corresponding source code.  (This alternative is
 | 
			
		||||
    allowed only for noncommercial distribution and only if you
 | 
			
		||||
    received the program in object code or executable form with such
 | 
			
		||||
    an offer, in accord with Subsection b above.)
 | 
			
		||||
 | 
			
		||||
The source code for a work means the preferred form of the work for
 | 
			
		||||
making modifications to it.  For an executable work, complete source
 | 
			
		||||
code means all the source code for all modules it contains, plus any
 | 
			
		||||
associated interface definition files, plus the scripts used to
 | 
			
		||||
control compilation and installation of the executable.  However, as a
 | 
			
		||||
special exception, the source code distributed need not include
 | 
			
		||||
anything that is normally distributed (in either source or binary
 | 
			
		||||
form) with the major components (compiler, kernel, and so on) of the
 | 
			
		||||
operating system on which the executable runs, unless that component
 | 
			
		||||
itself accompanies the executable.
 | 
			
		||||
 | 
			
		||||
If distribution of executable or object code is made by offering
 | 
			
		||||
access to copy from a designated place, then offering equivalent
 | 
			
		||||
access to copy the source code from the same place counts as
 | 
			
		||||
distribution of the source code, even though third parties are not
 | 
			
		||||
compelled to copy the source along with the object code.
 | 
			
		||||
 | 
			
		||||
  4. You may not copy, modify, sublicense, or distribute the Program
 | 
			
		||||
except as expressly provided under this License.  Any attempt
 | 
			
		||||
otherwise to copy, modify, sublicense or distribute the Program is
 | 
			
		||||
void, and will automatically terminate your rights under this License.
 | 
			
		||||
However, parties who have received copies, or rights, from you under
 | 
			
		||||
this License will not have their licenses terminated so long as such
 | 
			
		||||
parties remain in full compliance.
 | 
			
		||||
 | 
			
		||||
  5. You are not required to accept this License, since you have not
 | 
			
		||||
signed it.  However, nothing else grants you permission to modify or
 | 
			
		||||
distribute the Program or its derivative works.  These actions are
 | 
			
		||||
prohibited by law if you do not accept this License.  Therefore, by
 | 
			
		||||
modifying or distributing the Program (or any work based on the
 | 
			
		||||
Program), you indicate your acceptance of this License to do so, and
 | 
			
		||||
all its terms and conditions for copying, distributing or modifying
 | 
			
		||||
the Program or works based on it.
 | 
			
		||||
 | 
			
		||||
  6. Each time you redistribute the Program (or any work based on the
 | 
			
		||||
Program), the recipient automatically receives a license from the
 | 
			
		||||
original licensor to copy, distribute or modify the Program subject to
 | 
			
		||||
these terms and conditions.  You may not impose any further
 | 
			
		||||
restrictions on the recipients' exercise of the rights granted herein.
 | 
			
		||||
You are not responsible for enforcing compliance by third parties to
 | 
			
		||||
this License.
 | 
			
		||||
 | 
			
		||||
  Each contributor grants you a non-exclusive, worldwide, royalty-free
 | 
			
		||||
patent license under the contributor's essential patent claims, to
 | 
			
		||||
make, use, sell, offer for sale, import and otherwise run, modify and
 | 
			
		||||
propagate the contents of its contributor version.
 | 
			
		||||
 | 
			
		||||
  In the following three paragraphs, a "patent license" is any express
 | 
			
		||||
agreement or commitment, however denominated, not to enforce a patent
 | 
			
		||||
(such as an express permission to practice a patent or covenant not to
 | 
			
		||||
sue for patent infringement).  To "grant" such a patent license to a
 | 
			
		||||
party means to make such an agreement or commitment not to enforce a
 | 
			
		||||
patent against the party.
 | 
			
		||||
 | 
			
		||||
  If you convey a covered work, knowingly relying on a patent license,
 | 
			
		||||
and the Corresponding Source of the work is not available for anyone
 | 
			
		||||
to copy, free of charge and under the terms of this License, through a
 | 
			
		||||
publicly available network server or other readily accessible means,
 | 
			
		||||
then you must either (1) cause the Corresponding Source to be so
 | 
			
		||||
available, or (2) arrange to deprive yourself of the benefit of the
 | 
			
		||||
patent license for this particular work, or (3) arrange, in a manner
 | 
			
		||||
consistent with the requirements of this License, to extend the patent
 | 
			
		||||
license to downstream recipients.  "Knowingly relying" means you have
 | 
			
		||||
actual knowledge that, but for the patent license, your conveying the
 | 
			
		||||
covered work in a country, or your recipient's use of the covered work
 | 
			
		||||
in a country, would infringe one or more identifiable patents in that
 | 
			
		||||
country that you have reason to believe are valid.
 | 
			
		||||
 | 
			
		||||
  If, pursuant to or in connection with a single transaction or
 | 
			
		||||
arrangement, you convey, or propagate by procuring conveyance of, a
 | 
			
		||||
covered work, and grant a patent license to some of the parties
 | 
			
		||||
receiving the covered work authorizing them to use, propagate, modify
 | 
			
		||||
or convey a specific copy of the covered work, then the patent license
 | 
			
		||||
you grant is automatically extended to all recipients of the covered
 | 
			
		||||
work and works based on it.
 | 
			
		||||
 | 
			
		||||
  A patent license is "discriminatory" if it does not include within
 | 
			
		||||
the scope of its coverage, prohibits the exercise of, or is
 | 
			
		||||
conditioned on the non-exercise of one or more of the rights that are
 | 
			
		||||
specifically granted under this License.  You may not convey a covered
 | 
			
		||||
work if you are a party to an arrangement with a third party that is
 | 
			
		||||
in the business of distributing software, under which you make payment
 | 
			
		||||
to the third party based on the extent of your activity of conveying
 | 
			
		||||
the work, and under which the third party grants, to any of the
 | 
			
		||||
parties who would receive the covered work from you, a discriminatory
 | 
			
		||||
patent license (a) in connection with copies of the covered work
 | 
			
		||||
conveyed by you (or copies made from those copies), or (b) primarily
 | 
			
		||||
for and in connection with specific products or compilations that
 | 
			
		||||
contain the covered work, unless you entered into that arrangement,
 | 
			
		||||
or that patent license was granted, prior to 28 March 2007.
 | 
			
		||||
 | 
			
		||||
  Nothing in this License shall be construed as excluding or limiting
 | 
			
		||||
any implied license or other defenses to infringement that may
 | 
			
		||||
otherwise be available to you under applicable patent law.
 | 
			
		||||
 | 
			
		||||
  12. No Surrender of Others' Freedom.
 | 
			
		||||
 | 
			
		||||
  If conditions are imposed on you (whether by court order, agreement or
 | 
			
		||||
  7. If, as a consequence of a court judgment or allegation of patent
 | 
			
		||||
infringement or for any other reason (not limited to patent issues),
 | 
			
		||||
conditions are imposed on you (whether by court order, agreement or
 | 
			
		||||
otherwise) that contradict the conditions of this License, they do not
 | 
			
		||||
excuse you from the conditions of this License.  If you cannot convey a
 | 
			
		||||
covered work so as to satisfy simultaneously your obligations under this
 | 
			
		||||
License and any other pertinent obligations, then as a consequence you may
 | 
			
		||||
not convey it at all.  For example, if you agree to terms that obligate you
 | 
			
		||||
to collect a royalty for further conveying from those to whom you convey
 | 
			
		||||
the Program, the only way you could satisfy both those terms and this
 | 
			
		||||
License would be to refrain entirely from conveying the Program.
 | 
			
		||||
excuse you from the conditions of this License.  If you cannot
 | 
			
		||||
distribute so as to satisfy simultaneously your obligations under this
 | 
			
		||||
License and any other pertinent obligations, then as a consequence you
 | 
			
		||||
may not distribute the Program at all.  For example, if a patent
 | 
			
		||||
license would not permit royalty-free redistribution of the Program by
 | 
			
		||||
all those who receive copies directly or indirectly through you, then
 | 
			
		||||
the only way you could satisfy both it and this License would be to
 | 
			
		||||
refrain entirely from distribution of the Program.
 | 
			
		||||
 | 
			
		||||
  13. Use with the GNU Affero General Public License.
 | 
			
		||||
If any portion of this section is held invalid or unenforceable under
 | 
			
		||||
any particular circumstance, the balance of the section is intended to
 | 
			
		||||
apply and the section as a whole is intended to apply in other
 | 
			
		||||
circumstances.
 | 
			
		||||
 | 
			
		||||
  Notwithstanding any other provision of this License, you have
 | 
			
		||||
permission to link or combine any covered work with a work licensed
 | 
			
		||||
under version 3 of the GNU Affero General Public License into a single
 | 
			
		||||
combined work, and to convey the resulting work.  The terms of this
 | 
			
		||||
License will continue to apply to the part which is the covered work,
 | 
			
		||||
but the special requirements of the GNU Affero General Public License,
 | 
			
		||||
section 13, concerning interaction through a network will apply to the
 | 
			
		||||
combination as such.
 | 
			
		||||
It is not the purpose of this section to induce you to infringe any
 | 
			
		||||
patents or other property right claims or to contest validity of any
 | 
			
		||||
such claims; this section has the sole purpose of protecting the
 | 
			
		||||
integrity of the free software distribution system, which is
 | 
			
		||||
implemented by public license practices.  Many people have made
 | 
			
		||||
generous contributions to the wide range of software distributed
 | 
			
		||||
through that system in reliance on consistent application of that
 | 
			
		||||
system; it is up to the author/donor to decide if he or she is willing
 | 
			
		||||
to distribute software through any other system and a licensee cannot
 | 
			
		||||
impose that choice.
 | 
			
		||||
 | 
			
		||||
  14. Revised Versions of this License.
 | 
			
		||||
This section is intended to make thoroughly clear what is believed to
 | 
			
		||||
be a consequence of the rest of this License.
 | 
			
		||||
 | 
			
		||||
  The Free Software Foundation may publish revised and/or new versions of
 | 
			
		||||
the GNU General Public License from time to time.  Such new versions will
 | 
			
		||||
  8. If the distribution and/or use of the Program is restricted in
 | 
			
		||||
certain countries either by patents or by copyrighted interfaces, the
 | 
			
		||||
original copyright holder who places the Program under this License
 | 
			
		||||
may add an explicit geographical distribution limitation excluding
 | 
			
		||||
those countries, so that distribution is permitted only in or among
 | 
			
		||||
countries not thus excluded.  In such case, this License incorporates
 | 
			
		||||
the limitation as if written in the body of this License.
 | 
			
		||||
 | 
			
		||||
  9. The Free Software Foundation may publish revised and/or new versions
 | 
			
		||||
of the General Public License from time to time.  Such new versions will
 | 
			
		||||
be similar in spirit to the present version, but may differ in detail to
 | 
			
		||||
address new problems or concerns.
 | 
			
		||||
 | 
			
		||||
  Each version is given a distinguishing version number.  If the
 | 
			
		||||
Program specifies that a certain numbered version of the GNU General
 | 
			
		||||
Public License "or any later version" applies to it, you have the
 | 
			
		||||
option of following the terms and conditions either of that numbered
 | 
			
		||||
version or of any later version published by the Free Software
 | 
			
		||||
Foundation.  If the Program does not specify a version number of the
 | 
			
		||||
GNU General Public License, you may choose any version ever published
 | 
			
		||||
by the Free Software Foundation.
 | 
			
		||||
Each version is given a distinguishing version number.  If the Program
 | 
			
		||||
specifies a version number of this License which applies to it and "any
 | 
			
		||||
later version", you have the option of following the terms and conditions
 | 
			
		||||
either of that version or of any later version published by the Free
 | 
			
		||||
Software Foundation.  If the Program does not specify a version number of
 | 
			
		||||
this License, you may choose any version ever published by the Free Software
 | 
			
		||||
Foundation.
 | 
			
		||||
 | 
			
		||||
  If the Program specifies that a proxy can decide which future
 | 
			
		||||
versions of the GNU General Public License can be used, that proxy's
 | 
			
		||||
public statement of acceptance of a version permanently authorizes you
 | 
			
		||||
to choose that version for the Program.
 | 
			
		||||
  10. If you wish to incorporate parts of the Program into other free
 | 
			
		||||
programs whose distribution conditions are different, write to the author
 | 
			
		||||
to ask for permission.  For software which is copyrighted by the Free
 | 
			
		||||
Software Foundation, write to the Free Software Foundation; we sometimes
 | 
			
		||||
make exceptions for this.  Our decision will be guided by the two goals
 | 
			
		||||
of preserving the free status of all derivatives of our free software and
 | 
			
		||||
of promoting the sharing and reuse of software generally.
 | 
			
		||||
 | 
			
		||||
  Later license versions may give you additional or different
 | 
			
		||||
permissions.  However, no additional obligations are imposed on any
 | 
			
		||||
author or copyright holder as a result of your choosing to follow a
 | 
			
		||||
later version.
 | 
			
		||||
                            NO WARRANTY
 | 
			
		||||
 | 
			
		||||
  15. Disclaimer of Warranty.
 | 
			
		||||
  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
 | 
			
		||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
 | 
			
		||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
 | 
			
		||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
 | 
			
		||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 | 
			
		||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
 | 
			
		||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
 | 
			
		||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
 | 
			
		||||
REPAIR OR CORRECTION.
 | 
			
		||||
 | 
			
		||||
  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
 | 
			
		||||
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
 | 
			
		||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
 | 
			
		||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
 | 
			
		||||
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
 | 
			
		||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
 | 
			
		||||
 | 
			
		||||
  16. Limitation of Liability.
 | 
			
		||||
 | 
			
		||||
  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 | 
			
		||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
 | 
			
		||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
 | 
			
		||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
 | 
			
		||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
 | 
			
		||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
 | 
			
		||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
 | 
			
		||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
 | 
			
		||||
SUCH DAMAGES.
 | 
			
		||||
 | 
			
		||||
  17. Interpretation of Sections 15 and 16.
 | 
			
		||||
 | 
			
		||||
  If the disclaimer of warranty and limitation of liability provided
 | 
			
		||||
above cannot be given local legal effect according to their terms,
 | 
			
		||||
reviewing courts shall apply local law that most closely approximates
 | 
			
		||||
an absolute waiver of all civil liability in connection with the
 | 
			
		||||
Program, unless a warranty or assumption of liability accompanies a
 | 
			
		||||
copy of the Program in return for a fee.
 | 
			
		||||
  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 | 
			
		||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
 | 
			
		||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
 | 
			
		||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
 | 
			
		||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
 | 
			
		||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
 | 
			
		||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
 | 
			
		||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
 | 
			
		||||
POSSIBILITY OF SUCH DAMAGES.
 | 
			
		||||
 | 
			
		||||
                     END OF TERMS AND CONDITIONS
 | 
			
		||||
 | 
			
		||||
@@ -628,15 +287,15 @@ free software which everyone can redistribute and change under these terms.
 | 
			
		||||
 | 
			
		||||
  To do so, attach the following notices to the program.  It is safest
 | 
			
		||||
to attach them to the start of each source file to most effectively
 | 
			
		||||
state the exclusion of warranty; and each file should have at least
 | 
			
		||||
convey the exclusion of warranty; and each file should have at least
 | 
			
		||||
the "copyright" line and a pointer to where the full notice is found.
 | 
			
		||||
 | 
			
		||||
    <one line to give the program's name and a brief idea of what it does.>
 | 
			
		||||
    Copyright (C) <year>  <name of author>
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
@@ -644,31 +303,38 @@ the "copyright" line and a pointer to where the full notice is found.
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
Also add information on how to contact you by electronic and paper mail.
 | 
			
		||||
 | 
			
		||||
  If the program does terminal interaction, make it output a short
 | 
			
		||||
notice like this when it starts in an interactive mode:
 | 
			
		||||
If the program is interactive, make it output a short notice like this
 | 
			
		||||
when it starts in an interactive mode:
 | 
			
		||||
 | 
			
		||||
    <program>  Copyright (C) <year>  <name of author>
 | 
			
		||||
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
 | 
			
		||||
    Gnomovision version 69, Copyright (C) year name of author
 | 
			
		||||
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
 | 
			
		||||
    This is free software, and you are welcome to redistribute it
 | 
			
		||||
    under certain conditions; type `show c' for details.
 | 
			
		||||
 | 
			
		||||
The hypothetical commands `show w' and `show c' should show the appropriate
 | 
			
		||||
parts of the General Public License.  Of course, your program's commands
 | 
			
		||||
might be different; for a GUI interface, you would use an "about box".
 | 
			
		||||
parts of the General Public License.  Of course, the commands you use may
 | 
			
		||||
be called something other than `show w' and `show c'; they could even be
 | 
			
		||||
mouse-clicks or menu items--whatever suits your program.
 | 
			
		||||
 | 
			
		||||
  You should also get your employer (if you work as a programmer) or school,
 | 
			
		||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
 | 
			
		||||
For more information on this, and how to apply and follow the GNU GPL, see
 | 
			
		||||
<http://www.gnu.org/licenses/>.
 | 
			
		||||
You should also get your employer (if you work as a programmer) or your
 | 
			
		||||
school, if any, to sign a "copyright disclaimer" for the program, if
 | 
			
		||||
necessary.  Here is a sample; alter the names:
 | 
			
		||||
 | 
			
		||||
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
 | 
			
		||||
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
 | 
			
		||||
 | 
			
		||||
  <signature of Ty Coon>, 1 April 1989
 | 
			
		||||
  Ty Coon, President of Vice
 | 
			
		||||
 | 
			
		||||
This General Public License does not permit incorporating your program into
 | 
			
		||||
proprietary programs.  If your program is a subroutine library, you may
 | 
			
		||||
consider it more useful to permit linking proprietary applications with the
 | 
			
		||||
library.  If this is what you want to do, use the GNU Lesser General
 | 
			
		||||
Public License instead of this License.
 | 
			
		||||
 | 
			
		||||
  The GNU General Public License does not permit incorporating your program
 | 
			
		||||
into proprietary programs.  If your program is a subroutine library, you
 | 
			
		||||
may consider it more useful to permit linking proprietary applications with
 | 
			
		||||
the library.  If this is what you want to do, use the GNU Lesser General
 | 
			
		||||
Public License instead of this License.  But first, please read
 | 
			
		||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										10
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								LICENSE
									
									
									
									
									
								
							@@ -1,7 +1,7 @@
 | 
			
		||||
GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                   GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                       Version 2, June 1991
 | 
			
		||||
 | 
			
		||||
 Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
 | 
			
		||||
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
 | 
			
		||||
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 Everyone is permitted to copy and distribute verbatim copies
 | 
			
		||||
 of this license document, but changing it is not allowed.
 | 
			
		||||
@@ -290,8 +290,8 @@ to attach them to the start of each source file to most effectively
 | 
			
		||||
convey the exclusion of warranty; and each file should have at least
 | 
			
		||||
the "copyright" line and a pointer to where the full notice is found.
 | 
			
		||||
 | 
			
		||||
    {description}
 | 
			
		||||
    Copyright (C) {year}  {fullname}
 | 
			
		||||
    <one line to give the program's name and a brief idea of what it does.>
 | 
			
		||||
    Copyright (C) <year>  <name of author>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
@@ -329,7 +329,7 @@ necessary.  Here is a sample; alter the names:
 | 
			
		||||
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
 | 
			
		||||
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
 | 
			
		||||
 | 
			
		||||
  {signature of Ty Coon}, 1 April 1989
 | 
			
		||||
  <signature of Ty Coon>, 1 April 1989
 | 
			
		||||
  Ty Coon, President of Vice
 | 
			
		||||
 | 
			
		||||
This General Public License does not permit incorporating your program into
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/
 | 
			
		||||
SUBDIRS = lib tests benchmarks
 | 
			
		||||
SUBDIRS = lib benchmarks tests
 | 
			
		||||
 | 
			
		||||
filelist: $(SUBDIRS)
 | 
			
		||||
AM_CXXFLAGS += -I$(top_builddir)/include
 | 
			
		||||
ACLOCAL_AMFLAGS = -I m4
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										108
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										108
									
								
								README.md
									
									
									
									
									
								
							@@ -1,6 +1,28 @@
 | 
			
		||||
# Grid
 | 
			
		||||
Data parallel C++ mathematical object library
 | 
			
		||||
<table>
 | 
			
		||||
<tr>
 | 
			
		||||
    <td>Last stable release</td>
 | 
			
		||||
    <td><a href="https://travis-ci.org/paboyle/Grid">
 | 
			
		||||
    <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
 | 
			
		||||
    </td>
 | 
			
		||||
</tr>
 | 
			
		||||
<tr>
 | 
			
		||||
    <td>Development branch</td>
 | 
			
		||||
    <td><a href="https://travis-ci.org/paboyle/Grid">
 | 
			
		||||
    <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
 | 
			
		||||
    </td>
 | 
			
		||||
</tr>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
**Data parallel C++ mathematical object library.**
 | 
			
		||||
 | 
			
		||||
Please send all pull requests to the `develop` branch.
 | 
			
		||||
 | 
			
		||||
License: GPL v2.
 | 
			
		||||
 | 
			
		||||
Last update 2016/08/03.
 | 
			
		||||
 | 
			
		||||
### Description
 | 
			
		||||
This library provides data parallel C++ container classes with internal memory layout
 | 
			
		||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
 | 
			
		||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
 | 
			
		||||
@@ -20,31 +42,75 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
 | 
			
		||||
for most programmers.
 | 
			
		||||
 | 
			
		||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
 | 
			
		||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
 | 
			
		||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way).
 | 
			
		||||
 | 
			
		||||
These are presented as 
 | 
			
		||||
 | 
			
		||||
  vRealF, vRealD, vComplexF, vComplexD 
 | 
			
		||||
 | 
			
		||||
internal vector data types. These may be useful in themselves for other programmers.
 | 
			
		||||
The corresponding scalar types are named
 | 
			
		||||
 | 
			
		||||
  RealF, RealD, ComplexF, ComplexD
 | 
			
		||||
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
 | 
			
		||||
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
 | 
			
		||||
 | 
			
		||||
MPI, OpenMP, and SIMD parallelism are present in the library.
 | 
			
		||||
Please see https://arxiv.org/abs/1512.03487 for more detail.
 | 
			
		||||
 | 
			
		||||
   You can give `configure' initial values for configuration parameters
 | 
			
		||||
by setting variables in the command line or in the environment.  Here
 | 
			
		||||
are examples:
 | 
			
		||||
### Installation
 | 
			
		||||
First, start by cloning the repository:
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
 | 
			
		||||
``` bash
 | 
			
		||||
git clone https://github.com/paboyle/Grid.git
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
 | 
			
		||||
Then enter the cloned directory and set up the build system:
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
 | 
			
		||||
``` bash
 | 
			
		||||
cd Grid
 | 
			
		||||
./bootstrap.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
 | 
			
		||||
     
 | 
			
		||||
     
 | 
			
		||||
For developers:
 | 
			
		||||
Use reconfigure_script in the scripts/ directory to create the autotools environment 
 | 
			
		||||
Now you can execute the `configure` script to generate makefiles (here from a build directory):
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
mkdir build; cd build
 | 
			
		||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
where `--enable-precision=` set the default precision (`single` or `double`),
 | 
			
		||||
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
 | 
			
		||||
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
 | 
			
		||||
`shmem`), and `<path>` should be replaced by the prefix path where you want to
 | 
			
		||||
install Grid. The `mpi-auto` communication option set `configure` to determine
 | 
			
		||||
automatically how to link to MPI. Other options are available, use `configure
 | 
			
		||||
--help` to display them. Like with any other program using GNU autotool, the
 | 
			
		||||
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
 | 
			
		||||
customise the build.
 | 
			
		||||
 | 
			
		||||
Finally, you can build and install Grid:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
make; make install
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
make -C tests/<subdir> tests
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Possible SIMD types
 | 
			
		||||
 | 
			
		||||
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
 | 
			
		||||
 | 
			
		||||
| String      | Description                            |
 | 
			
		||||
| ----------- | -------------------------------------- |
 | 
			
		||||
| `GEN`       | generic portable vector code           |
 | 
			
		||||
| `SSE4`      | SSE 4.2 (128 bit)                      |
 | 
			
		||||
| `AVX`       | AVX (256 bit)                          |
 | 
			
		||||
| `AVXFMA4`   | AVX (256 bit) + FMA                    |
 | 
			
		||||
| `AVX2`      | AVX 2 (256 bit)                        |
 | 
			
		||||
| `AVX512`    | AVX 512 bit                            |
 | 
			
		||||
| `AVX512MIC` | AVX 512 bit for Intel MIC architecture |
 | 
			
		||||
| `ICMI`      | Intel ICMI instructions (512 bit)      |
 | 
			
		||||
 | 
			
		||||
Alternatively, some CPU codenames can be directly used:
 | 
			
		||||
 | 
			
		||||
| String      | Description                            |
 | 
			
		||||
| ----------- | -------------------------------------- |
 | 
			
		||||
| `KNC`       | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
 | 
			
		||||
| `KNL`       | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
 | 
			
		||||
							
								
								
									
										22
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								TODO
									
									
									
									
									
								
							@@ -1,5 +1,27 @@
 | 
			
		||||
TODO:
 | 
			
		||||
---------------
 | 
			
		||||
 | 
			
		||||
* Forces; the UdSdU  term in gauge force term is half of what I think it should
 | 
			
		||||
  be. This is a consequence of taking ONLY the first term in:
 | 
			
		||||
 | 
			
		||||
  dSg/dt = dU/dt dSdU + dUdag/dt dSdUdag
 | 
			
		||||
 | 
			
		||||
  in the fermion force.
 | 
			
		||||
 | 
			
		||||
  Now, S_mom = - tr Pmu Pmu      ; Pmu anti-herm
 | 
			
		||||
 | 
			
		||||
                                  .
 | 
			
		||||
       d Smom/dt = - 2.0 tr Pmu Pmu   = - dSg/dt = - tr Pmu [Umu dSdUmu + UmuDag dSdUmuDag]
 | 
			
		||||
 | 
			
		||||
           .
 | 
			
		||||
       => Pmu =  Umu dSdUmu
 | 
			
		||||
 | 
			
		||||
       Where the norm is half expected.
 | 
			
		||||
 | 
			
		||||
  This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
 | 
			
		||||
  This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Policies:
 | 
			
		||||
 | 
			
		||||
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										4
									
								
								VERSION
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								VERSION
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,4 @@
 | 
			
		||||
Version : 0.5.0
 | 
			
		||||
 | 
			
		||||
- AVX512, AVX2, AVX, SSE good
 | 
			
		||||
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
 | 
			
		||||
@@ -1,4 +1,31 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_comms.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
@@ -167,7 +194,128 @@ int main (int argc, char ** argv)
 | 
			
		||||
    }
 | 
			
		||||
  }  
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  for(int lat=4;lat<=32;lat+=2){
 | 
			
		||||
    for(int Ls=1;Ls<=16;Ls*=2){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
      std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      int ncomm;
 | 
			
		||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      std::vector<CartesianCommunicator::CommsRequest_t> empty;
 | 
			
		||||
      std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_fwd(Nd,empty);
 | 
			
		||||
      std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_bwd(Nd,empty);
 | 
			
		||||
 | 
			
		||||
      for(int mu=0;mu<4;mu++){
 | 
			
		||||
	ncomm=0;
 | 
			
		||||
	if (mpi_layout[mu]>1 ) {
 | 
			
		||||
	  ncomm++;
 | 
			
		||||
 | 
			
		||||
	  int comm_proc;
 | 
			
		||||
	  int xmit_to_rank;
 | 
			
		||||
	  int recv_from_rank;
 | 
			
		||||
 | 
			
		||||
	  comm_proc=1;
 | 
			
		||||
	  Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
	  Grid.SendToRecvFromInit(requests_fwd[mu],
 | 
			
		||||
				  (void *)&xbuf[mu][0],
 | 
			
		||||
				  xmit_to_rank,
 | 
			
		||||
				  (void *)&rbuf[mu][0],
 | 
			
		||||
				  recv_from_rank,
 | 
			
		||||
				  bytes);
 | 
			
		||||
 | 
			
		||||
	  comm_proc = mpi_layout[mu]-1;
 | 
			
		||||
	  Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
	  Grid.SendToRecvFromInit(requests_bwd[mu],
 | 
			
		||||
				  (void *)&xbuf[mu+4][0],
 | 
			
		||||
				  xmit_to_rank,
 | 
			
		||||
				  (void *)&rbuf[mu+4][0],
 | 
			
		||||
				  recv_from_rank,
 | 
			
		||||
				  bytes);
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
	double start=usecond();
 | 
			
		||||
	for(int i=0;i<Nloop;i++){
 | 
			
		||||
	  
 | 
			
		||||
	  for(int mu=0;mu<4;mu++){
 | 
			
		||||
	    
 | 
			
		||||
	    if (mpi_layout[mu]>1 ) {
 | 
			
		||||
	      
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_bwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_bwd[mu]);
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  Grid.Barrier();
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	double stop=usecond();
 | 
			
		||||
	
 | 
			
		||||
	double dbytes    = bytes;
 | 
			
		||||
	double xbytes    = Nloop*dbytes*2.0*ncomm;
 | 
			
		||||
	double rbytes    = xbytes;
 | 
			
		||||
	double bidibytes = xbytes+rbytes;
 | 
			
		||||
	
 | 
			
		||||
	double time = stop-start;
 | 
			
		||||
	
 | 
			
		||||
	std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
	double start=usecond();
 | 
			
		||||
	for(int i=0;i<Nloop;i++){
 | 
			
		||||
	  
 | 
			
		||||
	  for(int mu=0;mu<4;mu++){
 | 
			
		||||
	    
 | 
			
		||||
	    if (mpi_layout[mu]>1 ) {
 | 
			
		||||
	      
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromBegin(requests_bwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_fwd[mu]);
 | 
			
		||||
	      Grid.SendToRecvFromComplete(requests_bwd[mu]);
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  Grid.Barrier();
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	double stop=usecond();
 | 
			
		||||
	
 | 
			
		||||
	double dbytes    = bytes;
 | 
			
		||||
	double xbytes    = Nloop*dbytes*2.0*ncomm;
 | 
			
		||||
	double rbytes    = xbytes;
 | 
			
		||||
	double bidibytes = xbytes+rbytes;
 | 
			
		||||
	
 | 
			
		||||
	double time = stop-start;
 | 
			
		||||
	
 | 
			
		||||
	std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,32 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_dwf.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
@@ -16,10 +44,20 @@ struct scal {
 | 
			
		||||
    Gamma::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
 | 
			
		||||
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
 | 
			
		||||
    overlapComms = true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
 | 
			
		||||
@@ -30,6 +68,12 @@ int main (int argc, char ** argv)
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
 | 
			
		||||
  GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
 | 
			
		||||
  GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
 | 
			
		||||
@@ -42,9 +86,9 @@ int main (int argc, char ** argv)
 | 
			
		||||
  LatticeFermion    tmp(FGrid);
 | 
			
		||||
  LatticeFermion    err(FGrid);
 | 
			
		||||
 | 
			
		||||
  ColourMatrix cm = Complex(1.0,0.0);
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); 
 | 
			
		||||
  random(RNG4,Umu);
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
 | 
			
		||||
  LatticeGaugeField Umu5d(FGrid); 
 | 
			
		||||
 | 
			
		||||
  // replicate across fifth dimension
 | 
			
		||||
@@ -79,14 +123,26 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
 | 
			
		||||
  RealD NP = UGrid->_Nprocessors;
 | 
			
		||||
 | 
			
		||||
  for(int doasm=1;doasm<2;doasm++){
 | 
			
		||||
 | 
			
		||||
    QCD::WilsonKernelsStatic::AsmOpt=doasm;
 | 
			
		||||
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall=10000;
 | 
			
		||||
  {
 | 
			
		||||
  std::cout<<GridLogMessage << "Naive wilson implementation "<<std::endl;
 | 
			
		||||
  std::cout << GridLogMessage<< "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall =100;
 | 
			
		||||
  if (1) {
 | 
			
		||||
 | 
			
		||||
    Dw.ZeroCounters();
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      __SSC_START;
 | 
			
		||||
      Dw.Dhop(src,result,0);
 | 
			
		||||
      __SSC_STOP;
 | 
			
		||||
    }
 | 
			
		||||
    double t1=usecond();
 | 
			
		||||
    
 | 
			
		||||
@@ -97,11 +153,140 @@ int main (int argc, char ** argv)
 | 
			
		||||
    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl;
 | 
			
		||||
    err = ref-result; 
 | 
			
		||||
    std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
    assert (norm2(err)< 1.0e-5 );
 | 
			
		||||
    Dw.Report();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (1)
 | 
			
		||||
  {
 | 
			
		||||
    typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
 | 
			
		||||
    LatticeFermion ssrc(sFGrid);
 | 
			
		||||
    LatticeFermion sref(sFGrid);
 | 
			
		||||
    LatticeFermion sresult(sFGrid);
 | 
			
		||||
 | 
			
		||||
    WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
 | 
			
		||||
  
 | 
			
		||||
    for(int x=0;x<latt4[0];x++){
 | 
			
		||||
    for(int y=0;y<latt4[1];y++){
 | 
			
		||||
    for(int z=0;z<latt4[2];z++){
 | 
			
		||||
    for(int t=0;t<latt4[3];t++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      std::vector<int> site({s,x,y,z,t});
 | 
			
		||||
      SpinColourVector tmp;
 | 
			
		||||
      peekSite(tmp,src,site);
 | 
			
		||||
      pokeSite(tmp,ssrc,site);
 | 
			
		||||
    }}}}}
 | 
			
		||||
    std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    sDw.ZeroCounters();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      __SSC_START;
 | 
			
		||||
      sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
      __SSC_STOP;
 | 
			
		||||
    }
 | 
			
		||||
    double t1=usecond();
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=1344*volume*ncall;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl;
 | 
			
		||||
    sDw.Report();
 | 
			
		||||
  
 | 
			
		||||
    if(0){
 | 
			
		||||
      for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
 | 
			
		||||
  sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
  PerformanceCounter Counter(i);
 | 
			
		||||
  Counter.Start();
 | 
			
		||||
  sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
  Counter.Stop();
 | 
			
		||||
  Counter.Report();
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    RealD sum=0;
 | 
			
		||||
    for(int x=0;x<latt4[0];x++){
 | 
			
		||||
    for(int y=0;y<latt4[1];y++){
 | 
			
		||||
    for(int z=0;z<latt4[2];z++){
 | 
			
		||||
    for(int t=0;t<latt4[3];t++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      std::vector<int> site({s,x,y,z,t});
 | 
			
		||||
      SpinColourVector normal, simd;
 | 
			
		||||
      peekSite(normal,result,site);
 | 
			
		||||
      peekSite(simd,sresult,site);
 | 
			
		||||
      sum=sum+norm2(normal-simd);
 | 
			
		||||
      if (norm2(normal-simd) > 1.0e-6 ) {
 | 
			
		||||
	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
 | 
			
		||||
	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
 | 
			
		||||
	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd   "<<simd<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }}}}}
 | 
			
		||||
    std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
 | 
			
		||||
    assert (sum< 1.0e-5 );
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    if (1) {
 | 
			
		||||
 | 
			
		||||
      LatticeFermion sr_eo(sFGrid);
 | 
			
		||||
 | 
			
		||||
      LatticeFermion ssrc_e (sFrbGrid);
 | 
			
		||||
      LatticeFermion ssrc_o (sFrbGrid);
 | 
			
		||||
      LatticeFermion sr_e   (sFrbGrid);
 | 
			
		||||
      LatticeFermion sr_o   (sFrbGrid);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,ssrc_e,ssrc);
 | 
			
		||||
      pickCheckerboard(Odd,ssrc_o,ssrc);
 | 
			
		||||
 | 
			
		||||
      setCheckerboard(sr_eo,ssrc_o);
 | 
			
		||||
      setCheckerboard(sr_eo,ssrc_e);
 | 
			
		||||
 | 
			
		||||
      sr_e = zero;
 | 
			
		||||
      sr_o = zero;
 | 
			
		||||
 | 
			
		||||
      sDw.ZeroCounters();
 | 
			
		||||
      sDw.stat.init("DhopEO");
 | 
			
		||||
      double t0=usecond();
 | 
			
		||||
      for (int i = 0; i < ncall; i++) {
 | 
			
		||||
        sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
 | 
			
		||||
      }
 | 
			
		||||
      double t1=usecond();
 | 
			
		||||
      sDw.stat.print();
 | 
			
		||||
 | 
			
		||||
      double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
      double flops=(1344.0*volume*ncall)/2;
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage << "sDeo mflop/s =   "<< flops/(t1-t0)<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage << "sDeo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl;
 | 
			
		||||
      sDw.Report();
 | 
			
		||||
 | 
			
		||||
      sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
 | 
			
		||||
      sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
 | 
			
		||||
      sDw.Dhop  (ssrc  ,sresult,DaggerNo);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,ssrc_e,sresult);
 | 
			
		||||
      pickCheckerboard(Odd ,ssrc_o,sresult);
 | 
			
		||||
      ssrc_e = ssrc_e - sr_e;
 | 
			
		||||
      RealD error = norm2(ssrc_e);
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<< "  vec nrm"<<norm2(sr_e) <<std::endl;
 | 
			
		||||
      ssrc_o = ssrc_o - sr_o;
 | 
			
		||||
 | 
			
		||||
      error+= norm2(ssrc_o);
 | 
			
		||||
      std::cout<<GridLogMessage << "sO norm diff   "<< norm2(ssrc_o)<< "  vec nrm"<<norm2(sr_o) <<std::endl;
 | 
			
		||||
      if(error>1.0e-5) { 
 | 
			
		||||
	setCheckerboard(ssrc,ssrc_o);
 | 
			
		||||
	setCheckerboard(ssrc,ssrc_e);
 | 
			
		||||
	std::cout<< ssrc << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (1)
 | 
			
		||||
  { // Naive wilson dag implementation
 | 
			
		||||
@@ -111,24 +296,25 @@ int main (int argc, char ** argv)
 | 
			
		||||
      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
 | 
			
		||||
      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
			
		||||
      for(int i=0;i<ref._odata.size();i++){
 | 
			
		||||
	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
			
		||||
  ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      tmp =adj(U[mu])*src;
 | 
			
		||||
      tmp =Cshift(tmp,mu+1,-1);
 | 
			
		||||
      for(int i=0;i<ref._odata.size();i++){
 | 
			
		||||
	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
			
		||||
  ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    ref = -0.5*ref;
 | 
			
		||||
  }
 | 
			
		||||
  Dw.Dhop(src,result,1);
 | 
			
		||||
  std::cout << GridLogMessage << "Naive wilson implementation Dag" << std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
 | 
			
		||||
  err = ref-result; 
 | 
			
		||||
  std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
 | 
			
		||||
  assert(norm2(err)<1.0e-5);
 | 
			
		||||
  LatticeFermion src_e (FrbGrid);
 | 
			
		||||
  LatticeFermion src_o (FrbGrid);
 | 
			
		||||
  LatticeFermion r_e   (FrbGrid);
 | 
			
		||||
@@ -144,6 +330,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
 | 
			
		||||
 | 
			
		||||
  {
 | 
			
		||||
    Dw.ZeroCounters();
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      Dw.DhopEO(src_o,r_e,DaggerNo);
 | 
			
		||||
@@ -154,8 +341,9 @@ int main (int argc, char ** argv)
 | 
			
		||||
    double flops=(1344.0*volume*ncall)/2;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "Deo mflop/s per rank   "<< flops/(t1-t0)/NP<<std::endl;
 | 
			
		||||
    Dw.Report();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Dw.DhopEO(src_o,r_e,DaggerNo);
 | 
			
		||||
  Dw.DhopOE(src_e,r_o,DaggerNo);
 | 
			
		||||
  Dw.Dhop  (src  ,result,DaggerNo);
 | 
			
		||||
@@ -169,11 +357,17 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
  err = r_eo-result; 
 | 
			
		||||
  std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
  assert(norm2(err)<1.0e-5);
 | 
			
		||||
 | 
			
		||||
  pickCheckerboard(Even,src_e,err);
 | 
			
		||||
  pickCheckerboard(Odd,src_o,err);
 | 
			
		||||
  std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl;
 | 
			
		||||
  assert(norm2(src_e)<1.0e-5);
 | 
			
		||||
  assert(norm2(src_o)<1.0e-5);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										153
									
								
								benchmarks/Benchmark_dwf_ntpf.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								benchmarks/Benchmark_dwf_ntpf.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,153 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_dwf.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class d>
 | 
			
		||||
struct scal {
 | 
			
		||||
  d internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
  Gamma::GammaMatrix Gmu [] = {
 | 
			
		||||
    Gamma::GammaX,
 | 
			
		||||
    Gamma::GammaY,
 | 
			
		||||
    Gamma::GammaZ,
 | 
			
		||||
    Gamma::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
 | 
			
		||||
    overlapComms = true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt4 = GridDefaultLatt();
 | 
			
		||||
  const int Ls=16;
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
 | 
			
		||||
  LatticeFermion src   (FGrid); random(RNG5,src);
 | 
			
		||||
  LatticeFermion result(FGrid); result=zero;
 | 
			
		||||
  LatticeFermion    ref(FGrid);    ref=zero;
 | 
			
		||||
  LatticeFermion    tmp(FGrid);
 | 
			
		||||
  LatticeFermion    err(FGrid);
 | 
			
		||||
 | 
			
		||||
  ColourMatrix cm = Complex(1.0,0.0);
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); 
 | 
			
		||||
  random(RNG4,Umu);
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu5d(FGrid); 
 | 
			
		||||
 | 
			
		||||
  // replicate across fifth dimension
 | 
			
		||||
  for(int ss=0;ss<Umu._grid->oSites();ss++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////
 | 
			
		||||
  // Naive wilson implementation
 | 
			
		||||
  ////////////////////////////////////
 | 
			
		||||
  std::vector<LatticeColourMatrix> U(4,FGrid);
 | 
			
		||||
  for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
    U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (1)
 | 
			
		||||
  {
 | 
			
		||||
    ref = zero;
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
 | 
			
		||||
      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
			
		||||
      ref=ref + tmp - Gamma(Gmu[mu])*tmp;
 | 
			
		||||
 | 
			
		||||
      tmp =adj(U[mu])*src;
 | 
			
		||||
      tmp =Cshift(tmp,mu+1,-1);
 | 
			
		||||
      ref=ref + tmp + Gamma(Gmu[mu])*tmp;
 | 
			
		||||
    }
 | 
			
		||||
    ref = -0.5*ref;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
 | 
			
		||||
  typename DomainWallFermionR::ImplParams params; 
 | 
			
		||||
  params.overlapCommsCompute = overlapComms;
 | 
			
		||||
  
 | 
			
		||||
  RealD NP = UGrid->_Nprocessors;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  QCD::WilsonKernelsStatic::AsmOpt=1;
 | 
			
		||||
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall =50;
 | 
			
		||||
  if (1) {
 | 
			
		||||
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      Dw.Dhop(src,result,0);
 | 
			
		||||
    }
 | 
			
		||||
    double t1=usecond();
 | 
			
		||||
    
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=1344*volume*ncall;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NP<<std::endl;
 | 
			
		||||
    err = ref-result; 
 | 
			
		||||
    std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
    //    Dw.Report();
 | 
			
		||||
  }
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										364
									
								
								benchmarks/Benchmark_dwf_sweep.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										364
									
								
								benchmarks/Benchmark_dwf_sweep.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,364 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_dwf.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class d>
 | 
			
		||||
struct scal {
 | 
			
		||||
  d internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
  Gamma::GammaMatrix Gmu [] = {
 | 
			
		||||
    Gamma::GammaX,
 | 
			
		||||
    Gamma::GammaY,
 | 
			
		||||
    Gamma::GammaZ,
 | 
			
		||||
    Gamma::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
void benchDw(std::vector<int> & L, int Ls, int threads, int report =0 );
 | 
			
		||||
void benchsDw(std::vector<int> & L, int Ls, int threads, int report=0 );
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  const int Ls=8;
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  if ( getenv("ASMOPT") )  {
 | 
			
		||||
    QCD::WilsonKernelsStatic::AsmOpt=1;
 | 
			
		||||
  } else { 
 | 
			
		||||
    QCD::WilsonKernelsStatic::AsmOpt=0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s)  "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
 | 
			
		||||
 | 
			
		||||
  int Lmax=32;
 | 
			
		||||
  int dmin=0;
 | 
			
		||||
  if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
 | 
			
		||||
  if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
 | 
			
		||||
  for (int L=8;L<=Lmax;L*=2){
 | 
			
		||||
    std::vector<int> latt4(4,L);
 | 
			
		||||
    for(int d=4;d>dmin;d--){
 | 
			
		||||
      if ( d<=3 ) latt4[d]*=2;
 | 
			
		||||
      std::cout << GridLogMessage <<"\t";
 | 
			
		||||
      for(int d=0;d<Nd;d++){
 | 
			
		||||
	std::cout<<latt4[d]<<"x";
 | 
			
		||||
      }
 | 
			
		||||
      std::cout <<Ls<<"\t" ;
 | 
			
		||||
      benchDw (latt4,Ls,threads,0);
 | 
			
		||||
      benchsDw(latt4,Ls,threads,0);
 | 
			
		||||
      std::cout<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
 | 
			
		||||
  {
 | 
			
		||||
    std::vector<int> latt4(4,16);
 | 
			
		||||
    std::cout<<GridLogMessage << "16^4 Dw miss rate"<<std::endl;
 | 
			
		||||
    benchDw (latt4,Ls,threads,1);
 | 
			
		||||
    std::cout<<GridLogMessage << "16^4 sDw miss rate"<<std::endl;
 | 
			
		||||
    benchsDw(latt4,Ls,threads,1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#undef CHECK
 | 
			
		||||
 | 
			
		||||
void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
 | 
			
		||||
{
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
 | 
			
		||||
#ifdef CHECK 
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  LatticeFermion src   (FGrid); random(RNG5,src);
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); 
 | 
			
		||||
  random(RNG4,Umu);
 | 
			
		||||
#else 
 | 
			
		||||
  LatticeFermion src   (FGrid); src=zero;
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); Umu=zero;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  LatticeFermion result(FGrid); result=zero;
 | 
			
		||||
  LatticeFermion    ref(FGrid);    ref=zero;
 | 
			
		||||
  LatticeFermion    tmp(FGrid);
 | 
			
		||||
  LatticeFermion    err(FGrid);
 | 
			
		||||
 | 
			
		||||
  ColourMatrix cm = Complex(1.0,0.0);
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu5d(FGrid); 
 | 
			
		||||
 | 
			
		||||
  // replicate across fifth dimension
 | 
			
		||||
  for(int ss=0;ss<Umu._grid->oSites();ss++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////
 | 
			
		||||
  // Naive wilson implementation
 | 
			
		||||
  ////////////////////////////////////
 | 
			
		||||
  std::vector<LatticeColourMatrix> U(4,FGrid);
 | 
			
		||||
  for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
    U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef CHECK
 | 
			
		||||
  if (1) {
 | 
			
		||||
 | 
			
		||||
    ref = zero;
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
      tmp = U[mu]*Cshift(src,mu+1,1);
 | 
			
		||||
      ref=ref + tmp - Gamma(Gmu[mu])*tmp;
 | 
			
		||||
 | 
			
		||||
      tmp =adj(U[mu])*src;
 | 
			
		||||
      tmp =Cshift(tmp,mu+1,-1);
 | 
			
		||||
      ref=ref + tmp + Gamma(Gmu[mu])*tmp;
 | 
			
		||||
    }
 | 
			
		||||
    ref = -0.5*ref;
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
  RealD NP = UGrid->_Nprocessors;
 | 
			
		||||
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
  
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  Dw.Dhop(src,result,0);
 | 
			
		||||
  double t1=usecond();
 | 
			
		||||
 | 
			
		||||
#ifdef TIMERS_OFF
 | 
			
		||||
    int ncall =10;
 | 
			
		||||
#else
 | 
			
		||||
  int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if (ncall < 5 ) exit(0);
 | 
			
		||||
 | 
			
		||||
  Dw.Dhop(src,result,0);
 | 
			
		||||
 | 
			
		||||
  PerformanceCounter Counter(8);
 | 
			
		||||
  Counter.Start();
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    Dw.Dhop(src,result,0);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  Counter.Stop();
 | 
			
		||||
  if ( report ) {
 | 
			
		||||
    Counter.Report();
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if ( ! report ) {
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=1344*volume*ncall;
 | 
			
		||||
    std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
#ifdef CHECK
 | 
			
		||||
  err = ref-result; 
 | 
			
		||||
  RealD errd = norm2(err);
 | 
			
		||||
  if ( errd> 1.0e-4 ) {
 | 
			
		||||
    std::cout<<GridLogMessage << "oops !!! norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
    exit(-1);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
    
 | 
			
		||||
  LatticeFermion src_e (FrbGrid);
 | 
			
		||||
  LatticeFermion src_o (FrbGrid);
 | 
			
		||||
  LatticeFermion r_e   (FrbGrid);
 | 
			
		||||
  LatticeFermion r_o   (FrbGrid);
 | 
			
		||||
  LatticeFermion r_eo  (FGrid);
 | 
			
		||||
  
 | 
			
		||||
  pickCheckerboard(Even,src_e,src);
 | 
			
		||||
  pickCheckerboard(Odd,src_o,src);
 | 
			
		||||
  
 | 
			
		||||
  {
 | 
			
		||||
    Dw.DhopEO(src_o,r_e,DaggerNo);
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
      Dw.DhopEO(src_o,r_e,DaggerNo);
 | 
			
		||||
    }
 | 
			
		||||
    double t1=usecond();
 | 
			
		||||
    
 | 
			
		||||
    if(!report){
 | 
			
		||||
      double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
      double flops=(1344.0*volume*ncall)/2;
 | 
			
		||||
      std::cout<< flops/(t1-t0);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define CHECK_SDW
 | 
			
		||||
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * sUGrid   = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
 | 
			
		||||
  GridCartesian         * sFGrid   = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
 | 
			
		||||
#ifdef CHECK_SDW
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  LatticeFermion src   (FGrid); random(RNG5,src);
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); 
 | 
			
		||||
  random(RNG4,Umu);
 | 
			
		||||
#else 
 | 
			
		||||
  LatticeFermion src   (FGrid); src=zero;
 | 
			
		||||
  LatticeGaugeField Umu(UGrid); Umu=zero;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  LatticeFermion result(FGrid); result=zero;
 | 
			
		||||
  LatticeFermion    ref(FGrid);    ref=zero;
 | 
			
		||||
  LatticeFermion    tmp(FGrid);
 | 
			
		||||
  LatticeFermion    err(FGrid);
 | 
			
		||||
 | 
			
		||||
  ColourMatrix cm = Complex(1.0,0.0);
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu5d(FGrid); 
 | 
			
		||||
 | 
			
		||||
  // replicate across fifth dimension
 | 
			
		||||
  for(int ss=0;ss<Umu._grid->oSites();ss++){
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
 | 
			
		||||
  typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
 | 
			
		||||
  LatticeFermion ssrc(sFGrid);
 | 
			
		||||
  LatticeFermion sref(sFGrid);
 | 
			
		||||
  LatticeFermion sresult(sFGrid);
 | 
			
		||||
  WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
 | 
			
		||||
  
 | 
			
		||||
  for(int x=0;x<latt4[0];x++){
 | 
			
		||||
  for(int y=0;y<latt4[1];y++){
 | 
			
		||||
  for(int z=0;z<latt4[2];z++){
 | 
			
		||||
  for(int t=0;t<latt4[3];t++){
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
    std::vector<int> site({s,x,y,z,t});
 | 
			
		||||
    SpinColourVector tmp;
 | 
			
		||||
    peekSite(tmp,src,site);
 | 
			
		||||
    pokeSite(tmp,ssrc,site);
 | 
			
		||||
  }}}}}
 | 
			
		||||
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
  double t1=usecond();
 | 
			
		||||
 | 
			
		||||
#ifdef TIMERS_OFF
 | 
			
		||||
  int ncall =10;
 | 
			
		||||
#else 
 | 
			
		||||
  int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  PerformanceCounter Counter(8);
 | 
			
		||||
  Counter.Start();
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    sDw.Dhop(ssrc,sresult,0);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  Counter.Stop();
 | 
			
		||||
  
 | 
			
		||||
  if ( report ) {
 | 
			
		||||
    Counter.Report();
 | 
			
		||||
  } else { 
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=1344*volume*ncall;
 | 
			
		||||
    std::cout<<"\t"<< flops/(t1-t0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  LatticeFermion sr_eo(sFGrid);
 | 
			
		||||
  LatticeFermion serr(sFGrid);
 | 
			
		||||
  
 | 
			
		||||
  LatticeFermion ssrc_e (sFrbGrid);
 | 
			
		||||
  LatticeFermion ssrc_o (sFrbGrid);
 | 
			
		||||
  LatticeFermion sr_e   (sFrbGrid);
 | 
			
		||||
  LatticeFermion sr_o   (sFrbGrid);
 | 
			
		||||
      
 | 
			
		||||
  pickCheckerboard(Even,ssrc_e,ssrc);
 | 
			
		||||
  pickCheckerboard(Odd,ssrc_o,ssrc);
 | 
			
		||||
  
 | 
			
		||||
  setCheckerboard(sr_eo,ssrc_o);
 | 
			
		||||
  setCheckerboard(sr_eo,ssrc_e);
 | 
			
		||||
    
 | 
			
		||||
  sr_e = zero;
 | 
			
		||||
  sr_o = zero;
 | 
			
		||||
  
 | 
			
		||||
  sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
 | 
			
		||||
  PerformanceCounter CounterSdw(8);
 | 
			
		||||
  CounterSdw.Start();
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    __SSC_START;
 | 
			
		||||
    sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
 | 
			
		||||
    __SSC_STOP;
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  CounterSdw.Stop();
 | 
			
		||||
 | 
			
		||||
  if ( report ) { 
 | 
			
		||||
    CounterSdw.Report();
 | 
			
		||||
  } else {
 | 
			
		||||
    double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
    double flops=(1344.0*volume*ncall)/2;
 | 
			
		||||
    std::cout<<"\t"<< flops/(t1-t0);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -1,4 +1,32 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_memory_asynch.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,32 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_memory_bandwidth.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,32 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_su3.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,32 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_wilson.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
@@ -16,10 +44,15 @@ struct scal {
 | 
			
		||||
    Gamma::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
 | 
			
		||||
    overlapComms = true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt_size   = GridDefaultLatt();
 | 
			
		||||
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
			
		||||
@@ -57,11 +90,12 @@ int main (int argc, char ** argv)
 | 
			
		||||
  Complex cone(1.0,0.0);
 | 
			
		||||
  for(int nn=0;nn<Nd;nn++){
 | 
			
		||||
    random(pRNG,U[nn]);
 | 
			
		||||
    if(0) {
 | 
			
		||||
      if (nn==-1) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
      else       { U[nn] = cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
    if(1) {
 | 
			
		||||
      if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
      //      else       { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
      else       { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
    }
 | 
			
		||||
    pokeIndex<LorentzIndex>(Umu,U[nn],nn);
 | 
			
		||||
    PokeIndex<LorentzIndex>(Umu,U[nn],nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@@ -87,7 +121,11 @@ int main (int argc, char ** argv)
 | 
			
		||||
  }
 | 
			
		||||
  ref = -0.5*ref;
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 | 
			
		||||
 | 
			
		||||
  typename WilsonFermionR::ImplParams params; 
 | 
			
		||||
  params.overlapCommsCompute = overlapComms;
 | 
			
		||||
 | 
			
		||||
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall=1000;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										117
									
								
								benchmarks/Benchmark_wilson_sweep.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								benchmarks/Benchmark_wilson_sweep.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,117 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_wilson.cc
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Richard Rollins <rprollins@users.noreply.github.com>
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class d>
 | 
			
		||||
struct scal {
 | 
			
		||||
  d internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
Gamma::GammaMatrix Gmu [] = {
 | 
			
		||||
  Gamma::GammaX,
 | 
			
		||||
  Gamma::GammaY,
 | 
			
		||||
  Gamma::GammaZ,
 | 
			
		||||
  Gamma::GammaT
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
 | 
			
		||||
void bench_wilson (
 | 
			
		||||
		   LatticeFermion &    src,
 | 
			
		||||
		   LatticeFermion & result,
 | 
			
		||||
		   WilsonFermionR &     Dw,
 | 
			
		||||
		   double const     volume,
 | 
			
		||||
		   int const           dag );
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
 | 
			
		||||
  typename WilsonFermionR::ImplParams params;
 | 
			
		||||
  params.overlapCommsCompute = overlapComms;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
  std::vector<int> seeds({1,2,3,4});
 | 
			
		||||
  RealD mass = 0.1;
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
 | 
			
		||||
  int Lmax = 32;
 | 
			
		||||
  int dmin = 0;
 | 
			
		||||
  if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
 | 
			
		||||
  if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
 | 
			
		||||
  for (int L=8; L<=Lmax; L*=2)
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<int> latt_size = std::vector<int>(4,L);
 | 
			
		||||
      for(int d=4; d>dmin; d--)
 | 
			
		||||
	{
 | 
			
		||||
	  if ( d<=3 ) { latt_size[d] *= 2; }
 | 
			
		||||
 | 
			
		||||
	  std::cout << GridLogMessage;
 | 
			
		||||
	  std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
 | 
			
		||||
	  std::cout << latt_size.back() << "\t\t";
 | 
			
		||||
 | 
			
		||||
	  GridCartesian           Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
	  GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
	  GridParallelRNG  pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
 | 
			
		||||
	  LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
 | 
			
		||||
	  LatticeFermion    src(&Grid); random(pRNG,src);
 | 
			
		||||
	  LatticeFermion result(&Grid); result=zero;
 | 
			
		||||
 | 
			
		||||
	  double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
 | 
			
		||||
 | 
			
		||||
	  WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
 | 
			
		||||
      
 | 
			
		||||
	  bench_wilson(src,result,Dw,volume,DaggerNo);
 | 
			
		||||
	  bench_wilson(src,result,Dw,volume,DaggerYes);
 | 
			
		||||
	  std::cout << std::endl;
 | 
			
		||||
	}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void bench_wilson (
 | 
			
		||||
		   LatticeFermion &    src,
 | 
			
		||||
		   LatticeFermion & result,
 | 
			
		||||
		   WilsonFermionR &     Dw,
 | 
			
		||||
		   double const     volume,
 | 
			
		||||
		   int const           dag )
 | 
			
		||||
{
 | 
			
		||||
  int ncall    = 1000;
 | 
			
		||||
  double t0    = usecond();
 | 
			
		||||
  for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
 | 
			
		||||
  double t1    = usecond();
 | 
			
		||||
  double flops = 1344 * volume * ncall;
 | 
			
		||||
  std::cout << flops/(t1-t0) << "\t\t";
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										175
									
								
								benchmarks/Benchmark_zmm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								benchmarks/Benchmark_zmm.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,175 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./tests/Test_zmm.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int bench(std::ofstream &os, std::vector<int> &latt4,int Ls);
 | 
			
		||||
 | 
			
		||||
int main(int argc,char **argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
  std::ofstream os("zmm.dat");
 | 
			
		||||
 | 
			
		||||
  os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "====================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "====================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "====================================================================="<<std::endl;
 | 
			
		||||
  for(int L=4;L<=32;L+=4){
 | 
			
		||||
    for(int m=1;m<=2;m++){
 | 
			
		||||
      for(int Ls=8;Ls<=16;Ls+=8){
 | 
			
		||||
	std::vector<int> grid({L,L,m*L,m*L});
 | 
			
		||||
  std::cout << GridLogMessage <<"\t";
 | 
			
		||||
	for(int i=0;i<4;i++) { 
 | 
			
		||||
	  std::cout << grid[i]<<"x";
 | 
			
		||||
	}
 | 
			
		||||
	std::cout << Ls<<"\t\t";
 | 
			
		||||
	bench(os,grid,Ls);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
 | 
			
		||||
  GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4);
 | 
			
		||||
 | 
			
		||||
  LatticeFermion src (FGrid);
 | 
			
		||||
  LatticeFermion tmp (FGrid);
 | 
			
		||||
  LatticeFermion srce(FrbGrid);
 | 
			
		||||
 | 
			
		||||
  LatticeFermion resulto(FrbGrid); resulto=zero;
 | 
			
		||||
  LatticeFermion resulta(FrbGrid); resulta=zero;
 | 
			
		||||
  LatticeFermion junk(FrbGrid); junk=zero;
 | 
			
		||||
  LatticeFermion diff(FrbGrid); 
 | 
			
		||||
  LatticeGaugeField Umu(UGrid);
 | 
			
		||||
 | 
			
		||||
  double mfc, mfa, mfo, mfl1;
 | 
			
		||||
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  random(RNG5,src);
 | 
			
		||||
#if 1
 | 
			
		||||
  random(RNG4,Umu);
 | 
			
		||||
#else
 | 
			
		||||
  int mmu=2;
 | 
			
		||||
  std::vector<LatticeColourMatrix> U(4,UGrid);
 | 
			
		||||
  for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
    if ( mu!=mmu ) U[mu] = zero;
 | 
			
		||||
    if ( mu==mmu ) U[mu] = 1.0;
 | 
			
		||||
    PokeIndex<LorentzIndex>(Umu,U[mu],mu);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 pickCheckerboard(Even,srce,src);
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
 | 
			
		||||
  int ncall=50;
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    Dw.DhopOE(srce,resulto,0);
 | 
			
		||||
  }
 | 
			
		||||
  double t1=usecond();
 | 
			
		||||
 | 
			
		||||
  double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
  double flops=1344*volume/2;
 | 
			
		||||
 | 
			
		||||
  mfc = flops*ncall/(t1-t0);
 | 
			
		||||
  std::cout<<mfc<<"\t\t";
 | 
			
		||||
 | 
			
		||||
  QCD::WilsonKernelsStatic::AsmOpt=1;
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    Dw.DhopOE(srce,resulta,0);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  mfa = flops*ncall/(t1-t0);
 | 
			
		||||
  std::cout<<mfa<<"\t\t";
 | 
			
		||||
  /*
 | 
			
		||||
  int dag=DaggerNo;
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<1;i++){
 | 
			
		||||
    Dw.DhopInternalOMPbench(Dw.StencilEven,Dw.LebesgueEvenOdd,Dw.UmuOdd,srce,resulta,dag);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  mfo = flops*100/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called ASM-OMP Dw"<< " mflop/s =   "<< mfo<<std::endl;
 | 
			
		||||
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<1;i++){
 | 
			
		||||
    Dw.DhopInternalL1bench(Dw.StencilEven,Dw.LebesgueEvenOdd,Dw.UmuOdd,srce,resulta,dag);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  mfl1= flops*100/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called ASM-L1 Dw"<< " mflop/s =   "<< mfl1<<std::endl;
 | 
			
		||||
  os << latt4[0]*latt4[1]*latt4[2]*latt4[3]<< " "<<Ls<<" "<< latt4[0] <<" " <<latt4[2]<< " "
 | 
			
		||||
     << mfc<<" "
 | 
			
		||||
     << mfa<<" "
 | 
			
		||||
     << mfo<<" "
 | 
			
		||||
     << mfl1<<std::endl;
 | 
			
		||||
  */
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
 | 
			
		||||
    Dw.DhopOE(srce,resulta,0);
 | 
			
		||||
    PerformanceCounter Counter(i);
 | 
			
		||||
    Counter.Start();
 | 
			
		||||
    Dw.DhopOE(srce,resulta,0);
 | 
			
		||||
    Counter.Stop();
 | 
			
		||||
    Counter.Report();
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
  //resulta = (-0.5) * resulta;
 | 
			
		||||
 | 
			
		||||
  diff = resulto-resulta;
 | 
			
		||||
  std::cout<<norm2(diff)<<std::endl;
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -1,27 +0,0 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
 | 
			
		||||
Benchmark_comms_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_dwf_SOURCES=Benchmark_dwf.cc
 | 
			
		||||
Benchmark_dwf_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
 | 
			
		||||
Benchmark_memory_asynch_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
 | 
			
		||||
Benchmark_memory_bandwidth_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_su3_SOURCES=Benchmark_su3.cc
 | 
			
		||||
Benchmark_su3_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
 | 
			
		||||
Benchmark_wilson_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
@@ -1,8 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/lib
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Test code
 | 
			
		||||
#
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/simple_su3_expr.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/simple_su3_test.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										19
									
								
								bootstrap.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										19
									
								
								bootstrap.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
#!/usr/bin/env bash
 | 
			
		||||
 | 
			
		||||
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
 | 
			
		||||
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
 | 
			
		||||
 | 
			
		||||
echo "-- deploying Eigen source..."
 | 
			
		||||
wget ${EIGEN_URL} --no-check-certificate
 | 
			
		||||
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
 | 
			
		||||
rm `basename ${EIGEN_URL}`
 | 
			
		||||
 | 
			
		||||
echo "-- copying fftw prototypes..."
 | 
			
		||||
wget ${FFTW_URL}
 | 
			
		||||
./scripts/update_fftw.sh `basename ${FFTW_URL}`
 | 
			
		||||
rm `basename ${FFTW_URL}`
 | 
			
		||||
 | 
			
		||||
echo '-- generating Make.inc files...'
 | 
			
		||||
./scripts/filelist
 | 
			
		||||
echo '-- generating configure script...'
 | 
			
		||||
autoreconf -fvi
 | 
			
		||||
							
								
								
									
										426
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										426
									
								
								configure.ac
									
									
									
									
									
								
							@@ -1,226 +1,366 @@
 | 
			
		||||
#                         -*- Autoconf -*-
 | 
			
		||||
# Process this file with autoconf to produce a configure script.
 | 
			
		||||
#
 | 
			
		||||
# Project Grid package  
 | 
			
		||||
# 
 | 
			
		||||
# Time-stamp: <2015-07-10 17:46:21 neo>
 | 
			
		||||
 | 
			
		||||
AC_PREREQ([2.63])
 | 
			
		||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
 | 
			
		||||
AC_CANONICAL_SYSTEM
 | 
			
		||||
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid])
 | 
			
		||||
AC_CANONICAL_BUILD
 | 
			
		||||
AC_CANONICAL_HOST
 | 
			
		||||
AC_CANONICAL_TARGET
 | 
			
		||||
AM_INIT_AUTOMAKE(subdir-objects)
 | 
			
		||||
AC_CONFIG_MACRO_DIR([m4])
 | 
			
		||||
AC_CONFIG_SRCDIR([lib/Grid.h])
 | 
			
		||||
AC_CONFIG_HEADERS([lib/Config.h])
 | 
			
		||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 | 
			
		||||
 | 
			
		||||
AC_MSG_NOTICE([
 | 
			
		||||
 | 
			
		||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
Configuring $PACKAGE v$VERSION  for $host
 | 
			
		||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
# Checks for programs.
 | 
			
		||||
############### Checks for programs
 | 
			
		||||
AC_LANG(C++)
 | 
			
		||||
CXXFLAGS="-O3 $CXXFLAGS"
 | 
			
		||||
AC_PROG_CXX
 | 
			
		||||
AC_OPENMP
 | 
			
		||||
AC_PROG_RANLIB
 | 
			
		||||
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
 | 
			
		||||
AX_EXT
 | 
			
		||||
 | 
			
		||||
# Checks for libraries.
 | 
			
		||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
 | 
			
		||||
############ openmp  ###############
 | 
			
		||||
AC_OPENMP
 | 
			
		||||
 | 
			
		||||
# Checks for header files.
 | 
			
		||||
ac_openmp=no
 | 
			
		||||
 | 
			
		||||
if test "${OPENMP_CXXFLAGS}X" != "X"; then
 | 
			
		||||
ac_openmp=yes
 | 
			
		||||
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
 | 
			
		||||
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
############### Checks for header files
 | 
			
		||||
AC_CHECK_HEADERS(stdint.h)
 | 
			
		||||
AC_CHECK_HEADERS(mm_malloc.h)
 | 
			
		||||
AC_CHECK_HEADERS(malloc/malloc.h)
 | 
			
		||||
AC_CHECK_HEADERS(malloc.h)
 | 
			
		||||
AC_CHECK_HEADERS(endian.h)
 | 
			
		||||
AC_CHECK_HEADERS(execinfo.h)
 | 
			
		||||
AC_CHECK_HEADERS(gmp.h)
 | 
			
		||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
 | 
			
		||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
 | 
			
		||||
 | 
			
		||||
# Checks for typedefs, structures, and compiler characteristics.
 | 
			
		||||
############### Checks for typedefs, structures, and compiler characteristics
 | 
			
		||||
AC_TYPE_SIZE_T
 | 
			
		||||
AC_TYPE_UINT32_T
 | 
			
		||||
AC_TYPE_UINT64_T
 | 
			
		||||
 | 
			
		||||
# Checks for library functions.
 | 
			
		||||
echo
 | 
			
		||||
echo Checking libraries 
 | 
			
		||||
echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
############### GMP and MPFR #################
 | 
			
		||||
AC_ARG_WITH([gmp],
 | 
			
		||||
    [AS_HELP_STRING([--with-gmp=prefix],
 | 
			
		||||
    [try this for a non-standard install prefix of the GMP library])],
 | 
			
		||||
    [AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
 | 
			
		||||
    [AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"])
 | 
			
		||||
AC_ARG_WITH([mpfr],
 | 
			
		||||
    [AS_HELP_STRING([--with-mpfr=prefix],
 | 
			
		||||
    [try this for a non-standard install prefix of the MPFR library])],
 | 
			
		||||
    [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
 | 
			
		||||
    [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
 | 
			
		||||
 | 
			
		||||
################## lapack ####################
 | 
			
		||||
AC_ARG_ENABLE([lapack],
 | 
			
		||||
    [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], 
 | 
			
		||||
    [ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
 | 
			
		||||
 | 
			
		||||
AC_CHECK_FUNCS([gettimeofday])
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.gmplib.org)])
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.mpfr.org/)])
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
 | 
			
		||||
	[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
 | 
			
		||||
	[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
 | 
			
		||||
 | 
			
		||||
supported=no
 | 
			
		||||
 | 
			
		||||
case ${ac_SIMD} in
 | 
			
		||||
     SSE4)
 | 
			
		||||
       echo Configuring for SSE4
 | 
			
		||||
       AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
 | 
			
		||||
       if test x"$ax_cv_support_ssse3_ext" = x"yes"; then  dnl minimal support for SSE4
 | 
			
		||||
         supported=yes
 | 
			
		||||
       else
 | 
			
		||||
  	AC_MSG_WARN([Your processor does not support SSE4 instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVX)
 | 
			
		||||
       echo Configuring for AVX
 | 
			
		||||
       AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
 | 
			
		||||
       if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX
 | 
			
		||||
       supported=yes			  
 | 
			
		||||
       else
 | 
			
		||||
       	AC_MSG_WARN([Your processor does not support AVX instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVXFMA4)
 | 
			
		||||
       echo Configuring for AVX
 | 
			
		||||
       AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
 | 
			
		||||
       if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX
 | 
			
		||||
       supported=yes			  
 | 
			
		||||
       else
 | 
			
		||||
       	AC_MSG_WARN([Your processor does not support AVX instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVX2)
 | 
			
		||||
       echo Configuring for AVX2
 | 
			
		||||
       AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
 | 
			
		||||
       if test x"$ax_cv_support_avx2_ext" = x"yes"; then  dnl minimal support for AVX2
 | 
			
		||||
       supported=yes
 | 
			
		||||
       else
 | 
			
		||||
       AC_MSG_WARN([Your processor does not support AVX2 instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVX512)
 | 
			
		||||
       echo Configuring for AVX512 
 | 
			
		||||
       AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
     ;;
 | 
			
		||||
     IMCI)
 | 
			
		||||
       echo Configuring for IMCI
 | 
			
		||||
       AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
     ;;
 | 
			
		||||
     NEONv8)
 | 
			
		||||
       echo Configuring for experimental ARMv8a support 
 | 
			
		||||
       AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
     ;;
 | 
			
		||||
     DEBUG)
 | 
			
		||||
       echo Configuring without SIMD support - only for compiler DEBUGGING!
 | 
			
		||||
       AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
 | 
			
		||||
      ;;     
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]); 
 | 
			
		||||
     ;;
 | 
			
		||||
case ${ac_LAPACK} in
 | 
			
		||||
    no)
 | 
			
		||||
        ;;
 | 
			
		||||
    yes)
 | 
			
		||||
        AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
 | 
			
		||||
    *)
 | 
			
		||||
        AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
 | 
			
		||||
        AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
 | 
			
		||||
        AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
################## first-touch ####################
 | 
			
		||||
AC_ARG_ENABLE([numa],
 | 
			
		||||
    [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], 
 | 
			
		||||
    [ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
 | 
			
		||||
 | 
			
		||||
case ${ac_NUMA} in
 | 
			
		||||
    no)
 | 
			
		||||
        ;;
 | 
			
		||||
    yes)
 | 
			
		||||
        AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
 | 
			
		||||
    *)
 | 
			
		||||
        AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
################## FFTW3 ####################
 | 
			
		||||
AC_ARG_WITH([fftw],    
 | 
			
		||||
            [AS_HELP_STRING([--with-fftw=prefix],
 | 
			
		||||
            [try this for a non-standard install prefix of the FFTW3 library])],
 | 
			
		||||
            [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
 | 
			
		||||
            [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
 | 
			
		||||
 | 
			
		||||
################ Get compiler informations
 | 
			
		||||
AC_LANG([C++])
 | 
			
		||||
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
 | 
			
		||||
AX_COMPILER_VENDOR
 | 
			
		||||
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
 | 
			
		||||
      [vendor of C++ compiler that will compile the code])
 | 
			
		||||
AX_GXX_VERSION
 | 
			
		||||
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
 | 
			
		||||
      [version of g++ that will compile the code])
 | 
			
		||||
 | 
			
		||||
############### Checks for library functions
 | 
			
		||||
CXXFLAGS_CPY=$CXXFLAGS
 | 
			
		||||
LDFLAGS_CPY=$LDFLAGS
 | 
			
		||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
 | 
			
		||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
 | 
			
		||||
AC_CHECK_FUNCS([gettimeofday])
 | 
			
		||||
AC_CHECK_LIB([gmp],[__gmpf_init],
 | 
			
		||||
             [AC_CHECK_LIB([mpfr],[mpfr_init],
 | 
			
		||||
                 [AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])]
 | 
			
		||||
                 [have_mpfr=true]
 | 
			
		||||
                 [LIBS="$LIBS -lmpfr"],
 | 
			
		||||
                 [AC_MSG_ERROR([MPFR library not found])])]
 | 
			
		||||
   	     [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
 | 
			
		||||
             [have_gmp=true]
 | 
			
		||||
             [LIBS="$LIBS -lgmp"],
 | 
			
		||||
             [AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
 | 
			
		||||
 | 
			
		||||
if test "${ac_LAPACK}x" != "nox"; then
 | 
			
		||||
    AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
 | 
			
		||||
                 [AC_MSG_ERROR("LAPACK enabled but library not found")])
 | 
			
		||||
fi
 | 
			
		||||
AC_CHECK_LIB([fftw3],[fftw_execute],
 | 
			
		||||
  [AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
 | 
			
		||||
  [have_fftw=true]
 | 
			
		||||
  [LIBS="$LIBS -lfftw3 -lfftw3f"],
 | 
			
		||||
  [AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
 | 
			
		||||
CXXFLAGS=$CXXFLAGS_CPY
 | 
			
		||||
LDFLAGS=$LDFLAGS_CPY
 | 
			
		||||
 | 
			
		||||
############### SIMD instruction selection
 | 
			
		||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\
 | 
			
		||||
	[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
 | 
			
		||||
	[ac_SIMD=${enable_simd}],[ac_SIMD=GEN])
 | 
			
		||||
 | 
			
		||||
case ${ax_cv_cxx_compiler_vendor} in
 | 
			
		||||
  clang|gnu)
 | 
			
		||||
    case ${ac_SIMD} in
 | 
			
		||||
      SSE4)
 | 
			
		||||
        AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-msse4.2';;
 | 
			
		||||
      AVX)
 | 
			
		||||
        AC_DEFINE([AVX1],[1],[AVX intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx';;
 | 
			
		||||
      AVXFMA4)
 | 
			
		||||
        AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
 | 
			
		||||
        SIMD_FLAGS='-mavx -mfma4';;
 | 
			
		||||
      AVXFMA)
 | 
			
		||||
        AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
 | 
			
		||||
        SIMD_FLAGS='-mavx -mfma';;
 | 
			
		||||
      AVX2)
 | 
			
		||||
        AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx2 -mfma';;
 | 
			
		||||
      AVX512|AVX512MIC|KNL)
 | 
			
		||||
        AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
 | 
			
		||||
      IMCI|KNC)
 | 
			
		||||
        AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      GEN)
 | 
			
		||||
        AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      QPX|BGQ)
 | 
			
		||||
        AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      *)
 | 
			
		||||
        AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
 | 
			
		||||
    esac;;
 | 
			
		||||
  intel)
 | 
			
		||||
    case ${ac_SIMD} in
 | 
			
		||||
      SSE4)
 | 
			
		||||
        AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-msse4.2 -xsse4.2';;
 | 
			
		||||
      AVX)
 | 
			
		||||
        AC_DEFINE([AVX1],[1],[AVX intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx -xavx';;
 | 
			
		||||
      AVXFMA4)
 | 
			
		||||
        AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
 | 
			
		||||
        SIMD_FLAGS='-mavx -mfma';;
 | 
			
		||||
      AVXFMA)
 | 
			
		||||
        AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
 | 
			
		||||
        SIMD_FLAGS='-mavx -mfma';;
 | 
			
		||||
      AVX2)
 | 
			
		||||
        AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
 | 
			
		||||
      AVX512)
 | 
			
		||||
        AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-xcore-avx512';;
 | 
			
		||||
      AVX512MIC|KNL)
 | 
			
		||||
        AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
 | 
			
		||||
        SIMD_FLAGS='-xmic-avx512';;
 | 
			
		||||
      IMCI|KNC)
 | 
			
		||||
        AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      GEN)
 | 
			
		||||
        AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      *)
 | 
			
		||||
        AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);;
 | 
			
		||||
    esac;;
 | 
			
		||||
  *)
 | 
			
		||||
    AC_MSG_WARN([Compiler unknown, using generic vector code])
 | 
			
		||||
    AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);;
 | 
			
		||||
esac
 | 
			
		||||
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
 | 
			
		||||
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
 | 
			
		||||
 | 
			
		||||
case ${ac_SIMD} in
 | 
			
		||||
  AVX512|AVX512MIC|KNL)
 | 
			
		||||
    AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
 | 
			
		||||
  *)
 | 
			
		||||
	;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
############### precision selection
 | 
			
		||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
 | 
			
		||||
case ${ac_PRECISION} in
 | 
			
		||||
     single)
 | 
			
		||||
       echo default precision is single
 | 
			
		||||
       AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
 | 
			
		||||
     ;;
 | 
			
		||||
     double)
 | 
			
		||||
       echo default precision is double
 | 
			
		||||
       AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
 | 
			
		||||
############### communication type selection
 | 
			
		||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
 | 
			
		||||
 | 
			
		||||
case ${ac_COMMS} in
 | 
			
		||||
     none)
 | 
			
		||||
       echo Configuring for NO communications
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
 | 
			
		||||
     ;;
 | 
			
		||||
     mpi)
 | 
			
		||||
       echo Configuring for MPI communications
 | 
			
		||||
     mpi-auto)
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
 | 
			
		||||
       LX_FIND_MPI
 | 
			
		||||
       if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
 | 
			
		||||
       AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
 | 
			
		||||
       AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
 | 
			
		||||
       AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
 | 
			
		||||
       LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS"
 | 
			
		||||
     ;;
 | 
			
		||||
     mpi)
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
 | 
			
		||||
     ;;
 | 
			
		||||
     mpi3)
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
 | 
			
		||||
     ;;
 | 
			
		||||
     shmem)
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_MPI3,[ test "X${ac_COMMS}X" == "Xmpi3X"] )
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
 | 
			
		||||
############### RNG selection
 | 
			
		||||
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
 | 
			
		||||
	[Select Random Number Generator to be used])],\
 | 
			
		||||
	[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
 | 
			
		||||
 | 
			
		||||
case ${ac_CHROMA} in
 | 
			
		||||
     yes)
 | 
			
		||||
       echo Enabling tests regressing to Chroma
 | 
			
		||||
case ${ac_RNG} in
 | 
			
		||||
     ranlux48)
 | 
			
		||||
      AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
 | 
			
		||||
     ;;
 | 
			
		||||
     no)
 | 
			
		||||
       echo Disabling tests regressing to Chroma
 | 
			
		||||
     mt19937)
 | 
			
		||||
      AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); 
 | 
			
		||||
      AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
############### timer option
 | 
			
		||||
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
 | 
			
		||||
	[Enable system dependent high res timers])],\
 | 
			
		||||
	[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
 | 
			
		||||
case ${ac_TIMERS} in
 | 
			
		||||
     yes)
 | 
			
		||||
      AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
 | 
			
		||||
     ;;
 | 
			
		||||
     no)
 | 
			
		||||
      AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
      AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
############### Chroma regression test
 | 
			
		||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
 | 
			
		||||
case ${ac_CHROMA} in
 | 
			
		||||
     yes|no)
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
       AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
 | 
			
		||||
 | 
			
		||||
###################################################################
 | 
			
		||||
# Checks for doxygen support
 | 
			
		||||
# if present enables the "make doxyfile" command
 | 
			
		||||
#echo
 | 
			
		||||
#echo Checking doxygen support 
 | 
			
		||||
#echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
#AC_PROG_DOXYGEN
 | 
			
		||||
############### Doxygen
 | 
			
		||||
AC_PROG_DOXYGEN
 | 
			
		||||
 | 
			
		||||
#if test -n "$DOXYGEN"
 | 
			
		||||
#then
 | 
			
		||||
#AC_CONFIG_FILES([docs/doxy.cfg])
 | 
			
		||||
#fi
 | 
			
		||||
if test -n "$DOXYGEN"
 | 
			
		||||
then
 | 
			
		||||
AC_CONFIG_FILES([docs/doxy.cfg])
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
echo
 | 
			
		||||
echo Creating configuration files
 | 
			
		||||
echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
############### Ouput
 | 
			
		||||
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
 | 
			
		||||
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
 | 
			
		||||
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
 | 
			
		||||
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
 | 
			
		||||
AC_SUBST([AM_CFLAGS])
 | 
			
		||||
AC_SUBST([AM_CXXFLAGS])
 | 
			
		||||
AC_SUBST([AM_LDFLAGS])
 | 
			
		||||
AC_CONFIG_FILES(Makefile)
 | 
			
		||||
AC_CONFIG_FILES(lib/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/IO/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/core/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/debug/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/forces/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/hmc/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/solver/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(benchmarks/Makefile)
 | 
			
		||||
AC_OUTPUT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
echo "
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
Summary of configuration for $PACKAGE v$VERSION
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
The following features are enabled:
 | 
			
		||||
 | 
			
		||||
----- PLATFORM ----------------------------------------
 | 
			
		||||
- architecture (build)          : $build_cpu
 | 
			
		||||
- os (build)                    : $build_os
 | 
			
		||||
- architecture (target)         : $target_cpu
 | 
			
		||||
- os (target)                   : $target_os
 | 
			
		||||
- compiler vendor               : ${ax_cv_cxx_compiler_vendor}
 | 
			
		||||
- compiler version              : ${ax_cv_gxx_version}
 | 
			
		||||
----- BUILD OPTIONS -----------------------------------
 | 
			
		||||
- SIMD                          : ${ac_SIMD}
 | 
			
		||||
- Threading                     : ${ac_openmp} 
 | 
			
		||||
- Communications type           : ${ac_COMMS}
 | 
			
		||||
- Default precision             : ${ac_PRECISION}
 | 
			
		||||
- RNG choice                    : ${ac_RNG} 
 | 
			
		||||
- GMP                           : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
 | 
			
		||||
- LAPACK                        : ${ac_LAPACK}
 | 
			
		||||
- FFTW                          : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
 | 
			
		||||
- build DOXYGEN documentation   : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
 | 
			
		||||
- graphs and diagrams           : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
 | 
			
		||||
- Supported SIMD flags          : $SIMD_FLAGS
 | 
			
		||||
----------------------------------------------------------
 | 
			
		||||
- enabled simd support          : ${ac_SIMD}   (supported: $supported )
 | 
			
		||||
- communications type           : ${ac_COMMS}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
----- BUILD FLAGS -------------------------------------
 | 
			
		||||
- CXXFLAGS:
 | 
			
		||||
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'`
 | 
			
		||||
- LDFLAGS:
 | 
			
		||||
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/    -/g'`
 | 
			
		||||
- LIBS:
 | 
			
		||||
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/    -/g'`
 | 
			
		||||
-------------------------------------------------------
 | 
			
		||||
"
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./gcc-bug-report/broken.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <complex>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								include/Grid
									
									
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								include/Grid
									
									
									
									
									
										Symbolic link
									
								
							@@ -0,0 +1 @@
 | 
			
		||||
../lib
 | 
			
		||||
@@ -1,27 +1,56 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Algorithms.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ALGORITHMS_H
 | 
			
		||||
#define GRID_ALGORITHMS_H
 | 
			
		||||
 | 
			
		||||
#include <algorithms/SparseMatrix.h>
 | 
			
		||||
#include <algorithms/LinearOperator.h>
 | 
			
		||||
#include <algorithms/Preconditioner.h>
 | 
			
		||||
#include <Grid/algorithms/SparseMatrix.h>
 | 
			
		||||
#include <Grid/algorithms/LinearOperator.h>
 | 
			
		||||
#include <Grid/algorithms/Preconditioner.h>
 | 
			
		||||
 | 
			
		||||
#include <algorithms/approx/Zolotarev.h>
 | 
			
		||||
#include <algorithms/approx/Chebyshev.h>
 | 
			
		||||
#include <algorithms/approx/Remez.h>
 | 
			
		||||
#include <algorithms/approx/MultiShiftFunction.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Zolotarev.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Chebyshev.h>
 | 
			
		||||
#include <Grid/algorithms/approx/Remez.h>
 | 
			
		||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
 | 
			
		||||
 | 
			
		||||
#include <algorithms/iterative/ConjugateGradient.h>
 | 
			
		||||
#include <algorithms/iterative/ConjugateResidual.h>
 | 
			
		||||
#include <algorithms/iterative/NormalEquations.h>
 | 
			
		||||
#include <algorithms/iterative/SchurRedBlack.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/NormalEquations.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
 | 
			
		||||
 | 
			
		||||
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
 | 
			
		||||
 | 
			
		||||
// Lanczos support
 | 
			
		||||
#include <algorithms/iterative/MatrixUtils.h>
 | 
			
		||||
#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/MatrixUtils.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 | 
			
		||||
 | 
			
		||||
#include <algorithms/CoarsenedMatrix.h>
 | 
			
		||||
#include <Grid/algorithms/CoarsenedMatrix.h>
 | 
			
		||||
 | 
			
		||||
// Eigen/lanczos
 | 
			
		||||
// EigCg
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/AlignedAllocator.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ALIGNED_ALLOCATOR_H
 | 
			
		||||
#define GRID_ALIGNED_ALLOCATOR_H
 | 
			
		||||
 | 
			
		||||
@@ -8,7 +36,6 @@
 | 
			
		||||
#include <malloc.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
#ifdef HAVE_MM_MALLOC_H
 | 
			
		||||
#include <mm_malloc.h>
 | 
			
		||||
#endif
 | 
			
		||||
@@ -30,27 +57,28 @@ public:
 | 
			
		||||
  typedef _Tp        value_type;
 | 
			
		||||
 | 
			
		||||
  template<typename _Tp1>  struct rebind { typedef alignedAllocator<_Tp1> other; };
 | 
			
		||||
 | 
			
		||||
  alignedAllocator() throw() { }
 | 
			
		||||
 | 
			
		||||
  alignedAllocator(const alignedAllocator&) throw() { }
 | 
			
		||||
 | 
			
		||||
  template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
 | 
			
		||||
 | 
			
		||||
  ~alignedAllocator() throw() { }
 | 
			
		||||
 | 
			
		||||
  pointer       address(reference __x)       const { return &__x; }
 | 
			
		||||
  //  const_pointer address(const_reference __x) const { return &__x; }
 | 
			
		||||
 | 
			
		||||
  size_type  max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
 | 
			
		||||
 | 
			
		||||
  pointer allocate(size_type __n, const void* = 0)
 | 
			
		||||
  pointer allocate(size_type __n, const void* _p= 0)
 | 
			
		||||
  { 
 | 
			
		||||
#ifdef HAVE_MM_MALLOC_H
 | 
			
		||||
    _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
 | 
			
		||||
#else
 | 
			
		||||
    _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    _Tp tmp;
 | 
			
		||||
#ifdef GRID_NUMA
 | 
			
		||||
#pragma omp parallel for schedule(static)
 | 
			
		||||
  for(int i=0;i<__n;i++){
 | 
			
		||||
    ptr[i]=tmp;
 | 
			
		||||
  }
 | 
			
		||||
#endif 
 | 
			
		||||
    return ptr;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -63,15 +91,101 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  void construct(pointer __p, const _Tp& __val) { };
 | 
			
		||||
  void construct(pointer __p) { };
 | 
			
		||||
 | 
			
		||||
  void destroy(pointer __p) { };
 | 
			
		||||
};
 | 
			
		||||
template<typename _Tp>  inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
 | 
			
		||||
template<typename _Tp>  inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
 | 
			
		||||
 | 
			
		||||
template<typename _Tp>  inline bool
 | 
			
		||||
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// MPI3 : comms must use shm region
 | 
			
		||||
// SHMEM: comms must use symmetric heap
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
extern "C" { 
 | 
			
		||||
#include <mpp/shmem.h>
 | 
			
		||||
extern void * shmem_align(size_t, size_t);
 | 
			
		||||
extern void  shmem_free(void *);
 | 
			
		||||
}
 | 
			
		||||
#define PARANOID_SYMMETRIC_HEAP
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
template<typename _Tp>  inline bool
 | 
			
		||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
 | 
			
		||||
template<typename _Tp>
 | 
			
		||||
class commAllocator {
 | 
			
		||||
public: 
 | 
			
		||||
  typedef std::size_t     size_type;
 | 
			
		||||
  typedef std::ptrdiff_t  difference_type;
 | 
			
		||||
  typedef _Tp*       pointer;
 | 
			
		||||
  typedef const _Tp* const_pointer;
 | 
			
		||||
  typedef _Tp&       reference;
 | 
			
		||||
  typedef const _Tp& const_reference;
 | 
			
		||||
  typedef _Tp        value_type;
 | 
			
		||||
 | 
			
		||||
  template<typename _Tp1>  struct rebind { typedef commAllocator<_Tp1> other; };
 | 
			
		||||
  commAllocator() throw() { }
 | 
			
		||||
  commAllocator(const commAllocator&) throw() { }
 | 
			
		||||
  template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
 | 
			
		||||
  ~commAllocator() throw() { }
 | 
			
		||||
  pointer       address(reference __x)       const { return &__x; }
 | 
			
		||||
  size_type  max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
  pointer allocate(size_type __n, const void* _p= 0)
 | 
			
		||||
  {
 | 
			
		||||
#ifdef CRAY
 | 
			
		||||
    _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
 | 
			
		||||
#else
 | 
			
		||||
    _Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef PARANOID_SYMMETRIC_HEAP
 | 
			
		||||
    static void * bcast;
 | 
			
		||||
    static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
    bcast = (void *) ptr;
 | 
			
		||||
    shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
 | 
			
		||||
 | 
			
		||||
    if ( bcast != ptr ) {
 | 
			
		||||
      std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
 | 
			
		||||
      //      BACKTRACEFILE();
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
    assert( bcast == (void *) ptr);
 | 
			
		||||
#endif 
 | 
			
		||||
    return ptr;
 | 
			
		||||
  }
 | 
			
		||||
  void deallocate(pointer __p, size_type) { 
 | 
			
		||||
    shmem_free((void *)__p);
 | 
			
		||||
  }
 | 
			
		||||
#else
 | 
			
		||||
  pointer allocate(size_type __n, const void* _p= 0) 
 | 
			
		||||
  {
 | 
			
		||||
#ifdef HAVE_MM_MALLOC_H
 | 
			
		||||
    _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
 | 
			
		||||
#else
 | 
			
		||||
    _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
 | 
			
		||||
#endif
 | 
			
		||||
    return ptr;
 | 
			
		||||
  }
 | 
			
		||||
  void deallocate(pointer __p, size_type) { 
 | 
			
		||||
#ifdef HAVE_MM_MALLOC_H
 | 
			
		||||
    _mm_free((void *)__p); 
 | 
			
		||||
#else
 | 
			
		||||
    free((void *)__p);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
  void construct(pointer __p, const _Tp& __val) { };
 | 
			
		||||
  void construct(pointer __p) { };
 | 
			
		||||
  void destroy(pointer __p) { };
 | 
			
		||||
};
 | 
			
		||||
template<typename _Tp>  inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
 | 
			
		||||
template<typename _Tp>  inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Template typedefs
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class T> using Vector     = std::vector<T,alignedAllocator<T> >;           
 | 
			
		||||
template<class T> using commVector = std::vector<T,commAllocator<T> >;              
 | 
			
		||||
template<class T> using Matrix     = std::vector<std::vector<T,alignedAllocator<T> > >;
 | 
			
		||||
    
 | 
			
		||||
}; // namespace Grid
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,35 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Cartesian.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_H
 | 
			
		||||
#define GRID_CARTESIAN_H
 | 
			
		||||
 | 
			
		||||
#include <cartesian/Cartesian_base.h>
 | 
			
		||||
#include <cartesian/Cartesian_full.h>
 | 
			
		||||
#include <cartesian/Cartesian_red_black.h> 
 | 
			
		||||
#include <Grid/cartesian/Cartesian_base.h>
 | 
			
		||||
#include <Grid/cartesian/Cartesian_full.h>
 | 
			
		||||
#include <Grid/cartesian/Cartesian_red_black.h> 
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,33 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Communicator.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_COMMUNICATOR_H
 | 
			
		||||
#define GRID_COMMUNICATOR_H
 | 
			
		||||
 | 
			
		||||
#include <communicator/Communicator_base.h>
 | 
			
		||||
#include <Grid/communicator/Communicator_base.h>
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										174
									
								
								lib/Config.h.in
									
									
									
									
									
								
							
							
						
						
									
										174
									
								
								lib/Config.h.in
									
									
									
									
									
								
							@@ -1,174 +0,0 @@
 | 
			
		||||
/* lib/Config.h.in.  Generated from configure.ac by autoheader.  */
 | 
			
		||||
 | 
			
		||||
/* AVX Intrinsics */
 | 
			
		||||
#undef AVX1
 | 
			
		||||
 | 
			
		||||
/* AVX2 Intrinsics */
 | 
			
		||||
#undef AVX2
 | 
			
		||||
 | 
			
		||||
/* AVX512 Intrinsics for Knights Landing */
 | 
			
		||||
#undef AVX512
 | 
			
		||||
 | 
			
		||||
/* AVX Intrinsics with FMA4 */
 | 
			
		||||
#undef AVXFMA4
 | 
			
		||||
 | 
			
		||||
/* EMPTY_SIMD only for DEBUGGING */
 | 
			
		||||
#undef EMPTY_SIMD
 | 
			
		||||
 | 
			
		||||
/* GRID_COMMS_MPI */
 | 
			
		||||
#undef GRID_COMMS_MPI
 | 
			
		||||
 | 
			
		||||
/* GRID_COMMS_NONE */
 | 
			
		||||
#undef GRID_COMMS_NONE
 | 
			
		||||
 | 
			
		||||
/* GRID_DEFAULT_PRECISION is DOUBLE */
 | 
			
		||||
#undef GRID_DEFAULT_PRECISION_DOUBLE
 | 
			
		||||
 | 
			
		||||
/* GRID_DEFAULT_PRECISION is SINGLE */
 | 
			
		||||
#undef GRID_DEFAULT_PRECISION_SINGLE
 | 
			
		||||
 | 
			
		||||
/* Support Altivec instructions */
 | 
			
		||||
#undef HAVE_ALTIVEC
 | 
			
		||||
 | 
			
		||||
/* Support AVX (Advanced Vector Extensions) instructions */
 | 
			
		||||
#undef HAVE_AVX
 | 
			
		||||
 | 
			
		||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
 | 
			
		||||
#undef HAVE_AVX2
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
 | 
			
		||||
   don't. */
 | 
			
		||||
#undef HAVE_DECL_BE64TOH
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't.
 | 
			
		||||
   */
 | 
			
		||||
#undef HAVE_DECL_NTOHLL
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <endian.h> header file. */
 | 
			
		||||
#undef HAVE_ENDIAN_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <execinfo.h> header file. */
 | 
			
		||||
#undef HAVE_EXECINFO_H
 | 
			
		||||
 | 
			
		||||
/* Support FMA3 (Fused Multiply-Add) instructions */
 | 
			
		||||
#undef HAVE_FMA
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `gettimeofday' function. */
 | 
			
		||||
#undef HAVE_GETTIMEOFDAY
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <gmp.h> header file. */
 | 
			
		||||
#undef HAVE_GMP_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <inttypes.h> header file. */
 | 
			
		||||
#undef HAVE_INTTYPES_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <malloc.h> header file. */
 | 
			
		||||
#undef HAVE_MALLOC_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <malloc/malloc.h> header file. */
 | 
			
		||||
#undef HAVE_MALLOC_MALLOC_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <memory.h> header file. */
 | 
			
		||||
#undef HAVE_MEMORY_H
 | 
			
		||||
 | 
			
		||||
/* Support mmx instructions */
 | 
			
		||||
#undef HAVE_MMX
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
 | 
			
		||||
#undef HAVE_MM_MALLOC_H
 | 
			
		||||
 | 
			
		||||
/* Support SSE (Streaming SIMD Extensions) instructions */
 | 
			
		||||
#undef HAVE_SSE
 | 
			
		||||
 | 
			
		||||
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
 | 
			
		||||
#undef HAVE_SSE2
 | 
			
		||||
 | 
			
		||||
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
 | 
			
		||||
#undef HAVE_SSE3
 | 
			
		||||
 | 
			
		||||
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
 | 
			
		||||
#undef HAVE_SSE4_1
 | 
			
		||||
 | 
			
		||||
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
 | 
			
		||||
#undef HAVE_SSE4_2
 | 
			
		||||
 | 
			
		||||
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
 | 
			
		||||
#undef HAVE_SSSE3
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdint.h> header file. */
 | 
			
		||||
#undef HAVE_STDINT_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdlib.h> header file. */
 | 
			
		||||
#undef HAVE_STDLIB_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <strings.h> header file. */
 | 
			
		||||
#undef HAVE_STRINGS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <string.h> header file. */
 | 
			
		||||
#undef HAVE_STRING_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/stat.h> header file. */
 | 
			
		||||
#undef HAVE_SYS_STAT_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/types.h> header file. */
 | 
			
		||||
#undef HAVE_SYS_TYPES_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <unistd.h> header file. */
 | 
			
		||||
#undef HAVE_UNISTD_H
 | 
			
		||||
 | 
			
		||||
/* IMCI Intrinsics for Knights Corner */
 | 
			
		||||
#undef IMCI
 | 
			
		||||
 | 
			
		||||
/* NEON ARMv8 Experimental support */
 | 
			
		||||
#undef NEONv8
 | 
			
		||||
 | 
			
		||||
/* Name of package */
 | 
			
		||||
#undef PACKAGE
 | 
			
		||||
 | 
			
		||||
/* Define to the address where bug reports for this package should be sent. */
 | 
			
		||||
#undef PACKAGE_BUGREPORT
 | 
			
		||||
 | 
			
		||||
/* Define to the full name of this package. */
 | 
			
		||||
#undef PACKAGE_NAME
 | 
			
		||||
 | 
			
		||||
/* Define to the full name and version of this package. */
 | 
			
		||||
#undef PACKAGE_STRING
 | 
			
		||||
 | 
			
		||||
/* Define to the one symbol short name of this package. */
 | 
			
		||||
#undef PACKAGE_TARNAME
 | 
			
		||||
 | 
			
		||||
/* Define to the home page for this package. */
 | 
			
		||||
#undef PACKAGE_URL
 | 
			
		||||
 | 
			
		||||
/* Define to the version of this package. */
 | 
			
		||||
#undef PACKAGE_VERSION
 | 
			
		||||
 | 
			
		||||
/* SSE4 Intrinsics */
 | 
			
		||||
#undef SSE4
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the ANSI C header files. */
 | 
			
		||||
#undef STDC_HEADERS
 | 
			
		||||
 | 
			
		||||
/* Version number of package */
 | 
			
		||||
#undef VERSION
 | 
			
		||||
 | 
			
		||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
 | 
			
		||||
   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
 | 
			
		||||
   #define below would cause a syntax error. */
 | 
			
		||||
#undef _UINT32_T
 | 
			
		||||
 | 
			
		||||
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
 | 
			
		||||
   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
 | 
			
		||||
   #define below would cause a syntax error. */
 | 
			
		||||
#undef _UINT64_T
 | 
			
		||||
 | 
			
		||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
 | 
			
		||||
#undef size_t
 | 
			
		||||
 | 
			
		||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
#undef uint32_t
 | 
			
		||||
 | 
			
		||||
/* Define to the type of an unsigned integer type of width exactly 64 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
#undef uint64_t
 | 
			
		||||
							
								
								
									
										41
									
								
								lib/Cshift.h
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								lib/Cshift.h
									
									
									
									
									
								
							@@ -1,13 +1,48 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Cshift.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_H_
 | 
			
		||||
#define _GRID_CSHIFT_H_
 | 
			
		||||
 | 
			
		||||
#include <cshift/Cshift_common.h>
 | 
			
		||||
#include <Grid/cshift/Cshift_common.h>
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_NONE
 | 
			
		||||
#include <cshift/Cshift_none.h>
 | 
			
		||||
#include <Grid/cshift/Cshift_none.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
#include <cshift/Cshift_mpi.h>
 | 
			
		||||
#include <Grid/cshift/Cshift_mpi.h>
 | 
			
		||||
#endif 
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_MPI3
 | 
			
		||||
#include <Grid/cshift/Cshift_mpi.h>
 | 
			
		||||
#endif 
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
 | 
			
		||||
#endif 
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										271
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										271
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,271 @@
 | 
			
		||||
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Cshift.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_FFT_H_
 | 
			
		||||
#define _GRID_FFT_H_
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_FFTW	
 | 
			
		||||
#include <fftw3.h>
 | 
			
		||||
#endif
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  template<class scalar> struct FFTW { };
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_FFTW	
 | 
			
		||||
  template<> struct FFTW<ComplexD> {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    typedef fftw_complex FFTW_scalar;
 | 
			
		||||
    typedef fftw_plan    FFTW_plan;
 | 
			
		||||
 | 
			
		||||
    static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
 | 
			
		||||
					FFTW_scalar *in, const int *inembed,		
 | 
			
		||||
					int istride, int idist,		
 | 
			
		||||
					FFTW_scalar *out, const int *onembed,		
 | 
			
		||||
					int ostride, int odist,		
 | 
			
		||||
					int sign, unsigned flags) {
 | 
			
		||||
      return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
 | 
			
		||||
    }	  
 | 
			
		||||
    
 | 
			
		||||
    static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
 | 
			
		||||
      ::fftw_flops(p,add,mul,fmas);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
 | 
			
		||||
      ::fftw_execute_dft(p,in,out);
 | 
			
		||||
    }
 | 
			
		||||
    inline static void fftw_destroy_plan(const FFTW_plan p) {
 | 
			
		||||
      ::fftw_destroy_plan(p);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<> struct FFTW<ComplexF> {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    typedef fftwf_complex FFTW_scalar;
 | 
			
		||||
    typedef fftwf_plan    FFTW_plan;
 | 
			
		||||
 | 
			
		||||
    static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
 | 
			
		||||
					FFTW_scalar *in, const int *inembed,		
 | 
			
		||||
					int istride, int idist,		
 | 
			
		||||
					FFTW_scalar *out, const int *onembed,		
 | 
			
		||||
					int ostride, int odist,		
 | 
			
		||||
					int sign, unsigned flags) {
 | 
			
		||||
      return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
 | 
			
		||||
    }	  
 | 
			
		||||
    
 | 
			
		||||
    static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
 | 
			
		||||
      ::fftwf_flops(p,add,mul,fmas);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
 | 
			
		||||
      ::fftwf_execute_dft(p,in,out);
 | 
			
		||||
    }
 | 
			
		||||
    inline static void fftw_destroy_plan(const FFTW_plan p) {
 | 
			
		||||
      ::fftwf_destroy_plan(p);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef FFTW_FORWARD
 | 
			
		||||
#define FFTW_FORWARD (-1)
 | 
			
		||||
#define FFTW_BACKWARD (+1)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  class FFT { 
 | 
			
		||||
  private:
 | 
			
		||||
 | 
			
		||||
    GridCartesian *vgrid;
 | 
			
		||||
    GridCartesian *sgrid;
 | 
			
		||||
 | 
			
		||||
    int Nd;
 | 
			
		||||
    double flops;
 | 
			
		||||
    double flops_call;
 | 
			
		||||
    uint64_t usec;
 | 
			
		||||
 | 
			
		||||
    std::vector<int> dimensions;
 | 
			
		||||
    std::vector<int> processors;
 | 
			
		||||
    std::vector<int> processor_coor;
 | 
			
		||||
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    static const int forward=FFTW_FORWARD;
 | 
			
		||||
    static const int backward=FFTW_BACKWARD;
 | 
			
		||||
 | 
			
		||||
    double Flops(void) {return flops;}
 | 
			
		||||
    double MFlops(void) {return flops/usec;}
 | 
			
		||||
 | 
			
		||||
    FFT ( GridCartesian * grid ) : 
 | 
			
		||||
      vgrid(grid),
 | 
			
		||||
      Nd(grid->_ndimension),
 | 
			
		||||
      dimensions(grid->_fdimensions),
 | 
			
		||||
      processors(grid->_processors),
 | 
			
		||||
      processor_coor(grid->_processor_coor)
 | 
			
		||||
    {
 | 
			
		||||
      flops=0;
 | 
			
		||||
      usec =0;
 | 
			
		||||
      std::vector<int> layout(Nd,1);
 | 
			
		||||
      sgrid = new GridCartesian(dimensions,layout,processors);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    ~FFT ( void)  { 
 | 
			
		||||
      delete sgrid; 
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
 | 
			
		||||
 | 
			
		||||
      conformable(result._grid,vgrid);
 | 
			
		||||
      conformable(source._grid,vgrid);
 | 
			
		||||
 | 
			
		||||
      int L = vgrid->_ldimensions[dim];
 | 
			
		||||
      int G = vgrid->_fdimensions[dim];
 | 
			
		||||
 | 
			
		||||
      std::vector<int> layout(Nd,1);
 | 
			
		||||
      std::vector<int> pencil_gd(vgrid->_fdimensions);
 | 
			
		||||
 | 
			
		||||
      pencil_gd[dim] = G*processors[dim];    
 | 
			
		||||
 | 
			
		||||
      // Pencil global vol LxLxGxLxL per node
 | 
			
		||||
      GridCartesian pencil_g(pencil_gd,layout,processors);
 | 
			
		||||
 | 
			
		||||
      // Construct pencils
 | 
			
		||||
      typedef typename vobj::scalar_object sobj;
 | 
			
		||||
      typedef typename sobj::scalar_type   scalar;
 | 
			
		||||
 | 
			
		||||
      Lattice<vobj> ssource(vgrid); ssource =source;
 | 
			
		||||
      Lattice<sobj> pgsource(&pencil_g);
 | 
			
		||||
      Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
 | 
			
		||||
 | 
			
		||||
#ifndef HAVE_FFTW	
 | 
			
		||||
      assert(0);
 | 
			
		||||
#else 
 | 
			
		||||
      typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
 | 
			
		||||
      typedef typename FFTW<scalar>::FFTW_plan   FFTW_plan;
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
	int Ncomp = sizeof(sobj)/sizeof(scalar);
 | 
			
		||||
	int Nlow  = 1;
 | 
			
		||||
	for(int d=0;d<dim;d++){
 | 
			
		||||
	  Nlow*=vgrid->_ldimensions[d];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	int rank = 1;  /* 1d transforms */
 | 
			
		||||
	int n[] = {G}; /* 1d transforms of length G */
 | 
			
		||||
	int howmany = Ncomp;
 | 
			
		||||
	int odist,idist,istride,ostride;
 | 
			
		||||
	idist   = odist   = 1;          /* Distance between consecutive FT's */
 | 
			
		||||
	istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
 | 
			
		||||
	int *inembed = n, *onembed = n;
 | 
			
		||||
 | 
			
		||||
	
 | 
			
		||||
	int sign = FFTW_FORWARD;
 | 
			
		||||
	if (inverse) sign = FFTW_BACKWARD;
 | 
			
		||||
 | 
			
		||||
	FFTW_plan p;
 | 
			
		||||
	{
 | 
			
		||||
	  FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
 | 
			
		||||
	  FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
 | 
			
		||||
	  p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
 | 
			
		||||
					       in,inembed,
 | 
			
		||||
					       istride,idist,
 | 
			
		||||
					       out,onembed,
 | 
			
		||||
					       ostride, odist,
 | 
			
		||||
					       sign,FFTW_ESTIMATE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
    std::vector<int> lcoor(Nd), gcoor(Nd);
 | 
			
		||||
 | 
			
		||||
	// Barrel shift and collect global pencil
 | 
			
		||||
	for(int p=0;p<processors[dim];p++) { 
 | 
			
		||||
 | 
			
		||||
	  for(int idx=0;idx<sgrid->lSites();idx++) { 
 | 
			
		||||
 | 
			
		||||
	    
 | 
			
		||||
    	    sgrid->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
 | 
			
		||||
	    sobj s;
 | 
			
		||||
 | 
			
		||||
	    peekLocalSite(s,ssource,lcoor);
 | 
			
		||||
 | 
			
		||||
	    lcoor[dim]+=p*L;
 | 
			
		||||
	   
 | 
			
		||||
	    pokeLocalSite(s,pgsource,lcoor);
 | 
			
		||||
	  }
 | 
			
		||||
 | 
			
		||||
	  ssource = Cshift(ssource,dim,L);
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	// Loop over orthog coords
 | 
			
		||||
	int NN=pencil_g.lSites();
 | 
			
		||||
	GridStopWatch timer;
 | 
			
		||||
	timer.Start();
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
	for(int idx=0;idx<NN;idx++) {
 | 
			
		||||
	  pencil_g.LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
 | 
			
		||||
	  if ( lcoor[dim] == 0 ) {  // restricts loop to plane at lcoor[dim]==0
 | 
			
		||||
	    FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
 | 
			
		||||
	    FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
 | 
			
		||||
	    FFTW<scalar>::fftw_execute_dft(p,in,out);
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
        timer.Stop();
 | 
			
		||||
 | 
			
		||||
          double add,mul,fma;
 | 
			
		||||
          FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
 | 
			
		||||
          flops_call = add+mul+2.0*fma;
 | 
			
		||||
          usec += timer.useconds();
 | 
			
		||||
          flops+= flops_call*NN;
 | 
			
		||||
        int pc = processor_coor[dim];
 | 
			
		||||
        for(int idx=0;idx<sgrid->lSites();idx++) {
 | 
			
		||||
	  sgrid->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
	  gcoor = lcoor;
 | 
			
		||||
	  // extract the result
 | 
			
		||||
	  sobj s;
 | 
			
		||||
	  gcoor[dim] = lcoor[dim]+L*pc;
 | 
			
		||||
	  peekLocalSite(s,pgresult,gcoor);
 | 
			
		||||
	  pokeLocalSite(s,result,lcoor);
 | 
			
		||||
	}
 | 
			
		||||
      	  
 | 
			
		||||
	FFTW<scalar>::fftw_destroy_plan(p);
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										72
									
								
								lib/Grid.h
									
									
									
									
									
								
							
							
						
						
									
										72
									
								
								lib/Grid.h
									
									
									
									
									
								
							@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Grid.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
//
 | 
			
		||||
//  Grid.h
 | 
			
		||||
//  simd
 | 
			
		||||
@@ -30,25 +59,32 @@
 | 
			
		||||
///////////////////
 | 
			
		||||
// Grid headers
 | 
			
		||||
///////////////////
 | 
			
		||||
#include <serialisation/Serialisation.h>
 | 
			
		||||
#include <Config.h>
 | 
			
		||||
#include <Timer.h>
 | 
			
		||||
#include <Log.h>
 | 
			
		||||
#include <AlignedAllocator.h>
 | 
			
		||||
#include <Simd.h>
 | 
			
		||||
#include <Threads.h>
 | 
			
		||||
#include <Communicator.h> 
 | 
			
		||||
#include <Cartesian.h>    
 | 
			
		||||
#include <Tensors.h>      
 | 
			
		||||
#include <Lattice.h>      
 | 
			
		||||
#include <Cshift.h>       
 | 
			
		||||
#include <Stencil.h>      
 | 
			
		||||
#include <Algorithms.h>   
 | 
			
		||||
#include <qcd/QCD.h>
 | 
			
		||||
#include <parallelIO/BinaryIO.h>
 | 
			
		||||
#include <parallelIO/NerscIO.h>
 | 
			
		||||
#include <Grid/serialisation/Serialisation.h>
 | 
			
		||||
#include "Config.h"
 | 
			
		||||
#include <Grid/Timer.h>
 | 
			
		||||
#include <Grid/PerfCount.h>
 | 
			
		||||
#include <Grid/Log.h>
 | 
			
		||||
#include <Grid/AlignedAllocator.h>
 | 
			
		||||
#include <Grid/Simd.h>
 | 
			
		||||
#include <Grid/Threads.h>
 | 
			
		||||
#include <Grid/Lexicographic.h>
 | 
			
		||||
#include <Grid/Init.h>
 | 
			
		||||
#include <Grid/Communicator.h> 
 | 
			
		||||
#include <Grid/Cartesian.h>    
 | 
			
		||||
#include <Grid/Tensors.h>      
 | 
			
		||||
#include <Grid/Lattice.h>      
 | 
			
		||||
#include <Grid/Cshift.h>       
 | 
			
		||||
#include <Grid/Stencil.h>      
 | 
			
		||||
#include <Grid/Algorithms.h>   
 | 
			
		||||
#include <Grid/parallelIO/BinaryIO.h>
 | 
			
		||||
#include <Grid/qcd/QCD.h>
 | 
			
		||||
#include <Grid/parallelIO/NerscIO.h>
 | 
			
		||||
 | 
			
		||||
#include <Grid/FFT.h>
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/hmc/NerscCheckpointer.h>
 | 
			
		||||
#include <Grid/qcd/hmc/HmcRunner.h>
 | 
			
		||||
 | 
			
		||||
#include <Init.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										138
									
								
								lib/Init.cc
									
									
									
									
									
								
							
							
						
						
									
										138
									
								
								lib/Init.cc
									
									
									
									
									
								
							@@ -1,3 +1,33 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Init.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
/****************************************************************************/
 | 
			
		||||
/* pab: Signal magic. Processor state dump is x86-64 specific               */
 | 
			
		||||
/****************************************************************************/
 | 
			
		||||
@@ -15,12 +45,6 @@
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <iterator>
 | 
			
		||||
 | 
			
		||||
#define __X86_64
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
#include <execinfo.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
@@ -120,11 +144,16 @@ void GridParseLayout(char **argv,int argc,
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
 | 
			
		||||
    std::vector<int> ompthreads(0);
 | 
			
		||||
#ifndef GRID_OMP
 | 
			
		||||
    std::cout << GridLogWarning << "'--threads' option used but Grid was"
 | 
			
		||||
              << " not compiled with thread support" << std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
    arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
 | 
			
		||||
    GridCmdOptionIntVector(arg,ompthreads);
 | 
			
		||||
    assert(ompthreads.size()==1);
 | 
			
		||||
    GridThread::SetThreads(ompthreads[0]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
 | 
			
		||||
    std::vector<int> cores(0);
 | 
			
		||||
    arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
 | 
			
		||||
@@ -142,15 +171,17 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
 | 
			
		||||
/////////////////////////////////////////////////////////
 | 
			
		||||
//
 | 
			
		||||
/////////////////////////////////////////////////////////
 | 
			
		||||
static int Grid_is_initialised = 0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void Grid_init(int *argc,char ***argv)
 | 
			
		||||
{
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
  MPI_Init(argc,argv);
 | 
			
		||||
#endif
 | 
			
		||||
  // Parse command line args.
 | 
			
		||||
 | 
			
		||||
  GridLogger::StopWatch.Start();
 | 
			
		||||
 | 
			
		||||
  CartesianCommunicator::Init(argc,argv);
 | 
			
		||||
 | 
			
		||||
  // Parse command line args.
 | 
			
		||||
 | 
			
		||||
  std::string arg;
 | 
			
		||||
  std::vector<std::string> logstreams;
 | 
			
		||||
  std::string defaultLog("Error,Warning,Message,Performance");
 | 
			
		||||
@@ -164,9 +195,10 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    std::cout<<GridLogMessage<<"--debug-stdout  : print stdout from EVERY node"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--mpi n.n.n.n   : default MPI decomposition"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--omp n         : default number of OMP threads"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--threads n     : default number of OMP threads"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"--grid n.n.n.n  : default Grid size"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Debug"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
 | 
			
		||||
@@ -175,7 +207,6 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    GridLogConfigure(logstreams);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
 | 
			
		||||
    Grid_debug_handler_init();
 | 
			
		||||
  }
 | 
			
		||||
@@ -183,17 +214,19 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    Grid_quiesce_nodes();
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
 | 
			
		||||
    QCD::WilsonFermionStatic::HandOptDslash=1;
 | 
			
		||||
    QCD::WilsonFermion5DStatic::HandOptDslash=1;
 | 
			
		||||
    QCD::WilsonKernelsStatic::HandOpt=1;
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
 | 
			
		||||
    LebesgueOrder::UseLebesgueOrder=1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
 | 
			
		||||
    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
 | 
			
		||||
    GridCmdOptionIntVector(arg,LebesgueOrder::Block);
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--timestamp") ){
 | 
			
		||||
    GridLogTimestamp(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GridParseLayout(*argv,*argc,
 | 
			
		||||
		  Grid_default_latt,
 | 
			
		||||
		  Grid_default_mpi);
 | 
			
		||||
@@ -207,24 +240,59 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::string COL_RED    = GridLogColours.colour["RED"];
 | 
			
		||||
  std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
 | 
			
		||||
  std::string COL_BLACK  = GridLogColours.colour["BLACK"];
 | 
			
		||||
  std::string COL_GREEN  = GridLogColours.colour["GREEN"];
 | 
			
		||||
  std::string COL_BLUE   = GridLogColours.colour["BLUE"];
 | 
			
		||||
  std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
 | 
			
		||||
  std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  std::cout <<std::endl;
 | 
			
		||||
  std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<COL_RED  << "__|_ |  |  |  "<<             "|  |  | "<<COL_PURPLE<<" |  |  |"<<                "  |  |  | _|__"<<std::endl; 
 | 
			
		||||
  std::cout <<COL_RED  << "__|_          "<<             "        "<<COL_PURPLE<<"        "<<                "          _|__"<<std::endl; 
 | 
			
		||||
  std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_PURPLE<<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_PURPLE<<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_PURPLE<<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G  GG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_GREEN <<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G   G   "<<COL_RED<<" R  R   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_GREEN <<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" R   R  "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_GREEN <<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<COL_BLUE << "__|_          "<<             "        "<<COL_GREEN <<"        "<<                "          _|__"<<std::endl; 
 | 
			
		||||
  std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<COL_BLUE << "  |  |  |  |  "<<             "|  |  | "<<COL_GREEN <<" |  |  |"<<                "  |  |  |  |  "<<std::endl; 
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout <<COL_YELLOW<< std::endl;
 | 
			
		||||
  std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
 | 
			
		||||
  std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
 | 
			
		||||
  std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
 | 
			
		||||
  std::cout << "(at your option) any later version."<<std::endl;
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
 | 
			
		||||
  std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
 | 
			
		||||
  std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl;
 | 
			
		||||
  std::cout << "GNU General Public License for more details."<<std::endl;
 | 
			
		||||
  std::cout << COL_BACKGROUND <<std::endl;
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
 | 
			
		||||
  Grid_is_initialised = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
void Grid_finalize(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
 | 
			
		||||
  MPI_Finalize();
 | 
			
		||||
  Grid_unquiesce_nodes();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
double usecond(void) {
 | 
			
		||||
  struct timeval tv;
 | 
			
		||||
  gettimeofday(&tv,NULL);
 | 
			
		||||
  return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define _NBACKTRACE (256)
 | 
			
		||||
void * Grid_backtrace_buffer[_NBACKTRACE];
 | 
			
		||||
 | 
			
		||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
			
		||||
@@ -234,13 +302,13 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
			
		||||
  printf("         code %d\n",si->si_code);
 | 
			
		||||
 | 
			
		||||
  // Linux/Posix
 | 
			
		||||
#ifdef __linux__ 
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
  // And x86 64bit
 | 
			
		||||
    ucontext_t * uc= (ucontext_t *)ptr;
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  ucontext_t * uc= (ucontext_t *)ptr;
 | 
			
		||||
  struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
 | 
			
		||||
  printf("  instruction %llx\n",(unsigned long long)sc->rip);
 | 
			
		||||
#define REG(A)  printf("  %s %lx\n",#A,sc-> A);
 | 
			
		||||
 | 
			
		||||
  REG(rdi);
 | 
			
		||||
  REG(rsi);
 | 
			
		||||
  REG(rbp);
 | 
			
		||||
@@ -261,17 +329,15 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
			
		||||
  REG(r14);
 | 
			
		||||
  REG(r15);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
  int symbols    = backtrace        (Grid_backtrace_buffer,_NBACKTRACE);
 | 
			
		||||
  char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);
 | 
			
		||||
  for (int i = 0; i < symbols; i++){
 | 
			
		||||
    printf ("%s\n", strings[i]);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
  BACKTRACE();
 | 
			
		||||
  exit(0);
 | 
			
		||||
  return;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_FPE
 | 
			
		||||
#define _GNU_SOURCE
 | 
			
		||||
#include <fenv.h>
 | 
			
		||||
#endif
 | 
			
		||||
void Grid_debug_handler_init(void)
 | 
			
		||||
{
 | 
			
		||||
  struct sigaction sa,osa;
 | 
			
		||||
@@ -280,5 +346,9 @@ void Grid_debug_handler_init(void)
 | 
			
		||||
  sa.sa_flags    = SA_SIGINFO;
 | 
			
		||||
  sigaction(SIGSEGV,&sa,NULL);
 | 
			
		||||
  sigaction(SIGTRAP,&sa,NULL);
 | 
			
		||||
#ifdef GRID_FPE
 | 
			
		||||
  feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
 | 
			
		||||
  sigaction(SIGFPE,&sa,NULL);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										32
									
								
								lib/Init.h
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								lib/Init.h
									
									
									
									
									
								
							@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Init.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_INIT_H
 | 
			
		||||
#define GRID_INIT_H
 | 
			
		||||
 | 
			
		||||
@@ -5,6 +33,7 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
  void Grid_init(int *argc,char ***argv);
 | 
			
		||||
  void Grid_finalize(void);
 | 
			
		||||
 | 
			
		||||
  // internal, controled with --handle
 | 
			
		||||
  void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
 | 
			
		||||
  void Grid_debug_handler_init(void);
 | 
			
		||||
@@ -16,11 +45,14 @@ namespace Grid {
 | 
			
		||||
  const std::vector<int> &GridDefaultMpi(void);
 | 
			
		||||
  const int              &GridThreads(void)  ;
 | 
			
		||||
  void                    GridSetThreads(int t) ;
 | 
			
		||||
  void GridLogTimestamp(int);
 | 
			
		||||
 | 
			
		||||
  // Common parsing chores
 | 
			
		||||
  std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
 | 
			
		||||
  bool        GridCmdOptionExists(char** begin, char** end, const std::string& option);
 | 
			
		||||
  std::string GridCmdVectorIntToString(const std::vector<int> & vec);
 | 
			
		||||
  void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
 | 
			
		||||
  void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec);
 | 
			
		||||
 | 
			
		||||
  void GridParseLayout(char **argv,int argc,
 | 
			
		||||
		       std::vector<int> &latt,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,33 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Lattice.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_H
 | 
			
		||||
#define GRID_LATTICE_H
 | 
			
		||||
 | 
			
		||||
#include <lattice/Lattice_base.h>
 | 
			
		||||
#include <Grid/lattice/Lattice_base.h>
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										32
									
								
								lib/Lexicographic.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								lib/Lexicographic.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
			
		||||
#ifndef GRID_LEXICOGRAPHIC_H
 | 
			
		||||
#define GRID_LEXICOGRAPHIC_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
 | 
			
		||||
  class Lexicographic {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
 | 
			
		||||
      int nd= dims.size();
 | 
			
		||||
      coor.resize(nd);
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	coor[d] = index % dims[d];
 | 
			
		||||
	index   = index / dims[d];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
 | 
			
		||||
      int nd=dims.size();
 | 
			
		||||
      int stride=1;
 | 
			
		||||
      index=0;
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	index = index+stride*coor[d];
 | 
			
		||||
	stride=stride*dims[d];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										105
									
								
								lib/Log.cc
									
									
									
									
									
								
							
							
						
						
									
										105
									
								
								lib/Log.cc
									
									
									
									
									
								
							@@ -1,62 +1,97 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/Log.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
GridStopWatch Logger::StopWatch;
 | 
			
		||||
std::ostream  Logger::devnull(0);
 | 
			
		||||
int Logger::timestamp;
 | 
			
		||||
std::ostream Logger::devnull(0);
 | 
			
		||||
 | 
			
		||||
GridLogger GridLogError      (1,"Error");
 | 
			
		||||
GridLogger GridLogWarning    (1,"Warning");
 | 
			
		||||
GridLogger GridLogMessage    (1,"Message");
 | 
			
		||||
GridLogger GridLogDebug      (1,"Debug");
 | 
			
		||||
GridLogger GridLogPerformance(1,"Performance");
 | 
			
		||||
GridLogger GridLogIterative  (1,"Iterative");
 | 
			
		||||
void GridLogTimestamp(int on){
 | 
			
		||||
  Logger::Timestamp(on);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void GridLogConfigure(std::vector<std::string> &logstreams)
 | 
			
		||||
{
 | 
			
		||||
Colours GridLogColours(0);
 | 
			
		||||
GridLogger GridLogError(1, "Error", GridLogColours, "RED");
 | 
			
		||||
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
 | 
			
		||||
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
 | 
			
		||||
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE");
 | 
			
		||||
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
 | 
			
		||||
GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE");
 | 
			
		||||
GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE");
 | 
			
		||||
 | 
			
		||||
void GridLogConfigure(std::vector<std::string> &logstreams) {
 | 
			
		||||
  GridLogError.Active(0);
 | 
			
		||||
  GridLogWarning.Active(0);
 | 
			
		||||
  GridLogMessage.Active(0);
 | 
			
		||||
  GridLogMessage.Active(1); // at least the messages should be always on
 | 
			
		||||
  GridLogIterative.Active(0);
 | 
			
		||||
  GridLogDebug.Active(0);
 | 
			
		||||
  GridLogPerformance.Active(0);
 | 
			
		||||
  GridLogIntegrator.Active(0);
 | 
			
		||||
  GridLogColours.Active(0);
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<logstreams.size();i++){
 | 
			
		||||
    if ( logstreams[i]== std::string("Error")       ) GridLogError.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Warning")     ) GridLogWarning.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Message")     ) GridLogMessage.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Iterative")   ) GridLogIterative.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Debug")       ) GridLogDebug.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
 | 
			
		||||
  for (int i = 0; i < logstreams.size(); i++) {
 | 
			
		||||
    if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
 | 
			
		||||
    if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
 | 
			
		||||
    if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
 | 
			
		||||
    if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
 | 
			
		||||
    if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
 | 
			
		||||
    if (logstreams[i] == std::string("Performance"))
 | 
			
		||||
      GridLogPerformance.Active(1);
 | 
			
		||||
    if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
 | 
			
		||||
    if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////
 | 
			
		||||
// Verbose limiter on MPI tasks
 | 
			
		||||
////////////////////////////////////////////////////////////
 | 
			
		||||
void Grid_quiesce_nodes(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
  int me;
 | 
			
		||||
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
 | 
			
		||||
  if ( me ) { 
 | 
			
		||||
void Grid_quiesce_nodes(void) {
 | 
			
		||||
  int me = 0;
 | 
			
		||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3)
 | 
			
		||||
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
  me = shmem_my_pe();
 | 
			
		||||
#endif
 | 
			
		||||
  if (me) {
 | 
			
		||||
    std::cout.setstate(std::ios::badbit);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Grid_unquiesce_nodes(void)
 | 
			
		||||
{
 | 
			
		||||
void Grid_unquiesce_nodes(void) {
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
    std::cout.clear();
 | 
			
		||||
  std::cout.clear();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<< (std::ostream& stream, const GridTime& time)
 | 
			
		||||
{
 | 
			
		||||
  stream << time.count()<<" ms";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										181
									
								
								lib/Log.h
									
									
									
									
									
								
							
							
						
						
									
										181
									
								
								lib/Log.h
									
									
									
									
									
								
							@@ -1,44 +1,136 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Log.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
    Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
    Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <map>
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_LOG_H
 | 
			
		||||
#define GRID_LOG_H
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
#include <execinfo.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Colours{
 | 
			
		||||
protected:
 | 
			
		||||
  bool is_active;
 | 
			
		||||
public:
 | 
			
		||||
  std::map<std::string, std::string> colour;
 | 
			
		||||
 | 
			
		||||
  Colours(bool activate=false){
 | 
			
		||||
    Active(activate);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Active(bool activate){
 | 
			
		||||
    is_active=activate;
 | 
			
		||||
    if (is_active){
 | 
			
		||||
     colour["BLACK"]  ="\033[30m";
 | 
			
		||||
     colour["RED"]    ="\033[31m";
 | 
			
		||||
     colour["GREEN"]  ="\033[32m";
 | 
			
		||||
     colour["YELLOW"] ="\033[33m";
 | 
			
		||||
     colour["BLUE"]   ="\033[34m";
 | 
			
		||||
     colour["PURPLE"] ="\033[35m";
 | 
			
		||||
     colour["CYAN"]   ="\033[36m";
 | 
			
		||||
     colour["WHITE"]  ="\033[37m";
 | 
			
		||||
     colour["NORMAL"] ="\033[0;39m";
 | 
			
		||||
    } else {
 | 
			
		||||
      colour["BLACK"] ="";
 | 
			
		||||
      colour["RED"]   ="";
 | 
			
		||||
      colour["GREEN"] ="";
 | 
			
		||||
      colour["YELLOW"]="";
 | 
			
		||||
      colour["BLUE"]  ="";
 | 
			
		||||
      colour["PURPLE"]="";
 | 
			
		||||
      colour["CYAN"]  ="";
 | 
			
		||||
      colour["WHITE"] ="";
 | 
			
		||||
      colour["NORMAL"]="";
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<< (std::ostream& stream, const GridTime& time);
 | 
			
		||||
 | 
			
		||||
class Logger {
 | 
			
		||||
protected:
 | 
			
		||||
    int active;
 | 
			
		||||
    std::string name, topName;
 | 
			
		||||
  Colours &Painter;
 | 
			
		||||
  int active;
 | 
			
		||||
  static int timestamp;
 | 
			
		||||
  std::string name, topName;
 | 
			
		||||
  std::string COLOUR;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    static GridStopWatch StopWatch;
 | 
			
		||||
    static std::ostream devnull;
 | 
			
		||||
    
 | 
			
		||||
    Logger(std::string topNm, int on, std::string nm)
 | 
			
		||||
    : active(on), name(nm), topName(topNm) {};
 | 
			
		||||
    
 | 
			
		||||
    void Active(int on) {active = on;};
 | 
			
		||||
    int  isActive(void) {return active;};
 | 
			
		||||
    
 | 
			
		||||
    friend std::ostream& operator<< (std::ostream& stream, const Logger& log){
 | 
			
		||||
        if ( log.active ) {
 | 
			
		||||
            StopWatch.Stop();
 | 
			
		||||
            GridTime now = StopWatch.Elapsed();
 | 
			
		||||
            StopWatch.Start();
 | 
			
		||||
            stream << std::setw(8) << std::left << log.topName << " : ";
 | 
			
		||||
            stream << std::setw(12) << std::left << log.name << " : ";
 | 
			
		||||
            stream << now << " : ";
 | 
			
		||||
            return stream;
 | 
			
		||||
        } else { 
 | 
			
		||||
            return devnull;
 | 
			
		||||
        }
 | 
			
		||||
  static GridStopWatch StopWatch;
 | 
			
		||||
  static std::ostream devnull;
 | 
			
		||||
 | 
			
		||||
  std::string background() {return Painter.colour["NORMAL"];}
 | 
			
		||||
  std::string evidence() {return Painter.colour["YELLOW"];}
 | 
			
		||||
  std::string colour() {return Painter.colour[COLOUR];}
 | 
			
		||||
 | 
			
		||||
  Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col)  : active(on),
 | 
			
		||||
    name(nm),
 | 
			
		||||
    topName(topNm),
 | 
			
		||||
    Painter(col_class),
 | 
			
		||||
    COLOUR(col) {} ;
 | 
			
		||||
  
 | 
			
		||||
  void Active(int on) {active = on;};
 | 
			
		||||
  int  isActive(void) {return active;};
 | 
			
		||||
  static void Timestamp(int on) {timestamp = on;};
 | 
			
		||||
  
 | 
			
		||||
  friend std::ostream& operator<< (std::ostream& stream, Logger& log){
 | 
			
		||||
 | 
			
		||||
    if ( log.active ) {
 | 
			
		||||
      stream << log.background()<< log.topName << log.background()<< " : ";
 | 
			
		||||
      stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : ";
 | 
			
		||||
      if ( log.timestamp ) {
 | 
			
		||||
	StopWatch.Stop();
 | 
			
		||||
	GridTime now = StopWatch.Elapsed();
 | 
			
		||||
	StopWatch.Start();
 | 
			
		||||
	stream << log.evidence()<< now << log.background() << " : " ;
 | 
			
		||||
      }
 | 
			
		||||
      stream << log.colour();
 | 
			
		||||
      return stream;
 | 
			
		||||
    } else { 
 | 
			
		||||
      return devnull;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
class GridLogger: public Logger {
 | 
			
		||||
public:
 | 
			
		||||
  GridLogger(int on, std::string nm): Logger("Grid", on, nm){};
 | 
			
		||||
  GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"):
 | 
			
		||||
  Logger("Grid", on, nm, col_class, col_key){};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void GridLogConfigure(std::vector<std::string> &logstreams);
 | 
			
		||||
@@ -49,6 +141,41 @@ extern GridLogger GridLogMessage;
 | 
			
		||||
extern GridLogger GridLogDebug  ;
 | 
			
		||||
extern GridLogger GridLogPerformance;
 | 
			
		||||
extern GridLogger GridLogIterative  ;
 | 
			
		||||
extern GridLogger GridLogIntegrator  ;
 | 
			
		||||
extern Colours    GridLogColours;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define _NBACKTRACE (256)
 | 
			
		||||
extern void * Grid_backtrace_buffer[_NBACKTRACE];
 | 
			
		||||
 | 
			
		||||
#define BACKTRACEFILE() {\
 | 
			
		||||
char string[20];					\
 | 
			
		||||
std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \
 | 
			
		||||
std::FILE * fp = std::fopen(string,"w");				\
 | 
			
		||||
BACKTRACEFP(fp)\
 | 
			
		||||
std::fclose(fp);	    \
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
#define BACKTRACEFP(fp) { \
 | 
			
		||||
int symbols    = backtrace        (Grid_backtrace_buffer,_NBACKTRACE);\
 | 
			
		||||
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
 | 
			
		||||
for (int i = 0; i < symbols; i++){\
 | 
			
		||||
  std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \
 | 
			
		||||
}\
 | 
			
		||||
}
 | 
			
		||||
#else 
 | 
			
		||||
#define BACKTRACEFP(fp) { \
 | 
			
		||||
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
 | 
			
		||||
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
 | 
			
		||||
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
 | 
			
		||||
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define BACKTRACE() BACKTRACEFP(stdout) 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +0,0 @@
 | 
			
		||||
 | 
			
		||||
HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/CoarsenedMatrix.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/Francis.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./Init.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./Lattice.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./PerfCount.h ./pugixml/pugixml.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/QCD.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/SUn.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Avx512Asm.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_Ta.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h ./Tensors.h ./Threads.h ./Timer.h
 | 
			
		||||
 | 
			
		||||
CCFILES=./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./Init.cc ./Log.cc ./PerfCount.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
 | 
			
		||||
@@ -1,28 +1,32 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/
 | 
			
		||||
 | 
			
		||||
extra_sources=
 | 
			
		||||
if BUILD_COMMS_MPI
 | 
			
		||||
  extra_sources+=communicator/Communicator_mpi.cc
 | 
			
		||||
  extra_sources+=communicator/Communicator_base.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
if BUILD_COMMS_MPI3
 | 
			
		||||
  extra_sources+=communicator/Communicator_mpi3.cc
 | 
			
		||||
  extra_sources+=communicator/Communicator_base.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
if BUILD_COMMS_SHMEM
 | 
			
		||||
  extra_sources+=communicator/Communicator_shmem.cc
 | 
			
		||||
  extra_sources+=communicator/Communicator_base.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
if BUILD_COMMS_NONE
 | 
			
		||||
  extra_sources+=communicator/Communicator_none.cc
 | 
			
		||||
  extra_sources+=communicator/Communicator_base.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Libraries
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
include Eigen.inc
 | 
			
		||||
 | 
			
		||||
lib_LIBRARIES = libGrid.a
 | 
			
		||||
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#	qcd/action/fermion/PartialFractionFermion5D.cc\	\
 | 
			
		||||
#
 | 
			
		||||
# Include files
 | 
			
		||||
#
 | 
			
		||||
nobase_include_HEADERS=$(HFILES)
 | 
			
		||||
 | 
			
		||||
libGrid_a_SOURCES              = $(CCFILES) $(extra_sources)
 | 
			
		||||
libGrid_adir                   = $(pkgincludedir)
 | 
			
		||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								lib/Old/Endeavour.tgz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								lib/Old/Endeavour.tgz
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Old/Tensor_peek.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATH_PEEK_H
 | 
			
		||||
#define GRID_MATH_PEEK_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Old/Tensor_poke.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATH_POKE_H
 | 
			
		||||
#define GRID_MATH_POKE_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/PerfCount.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <PerfCount.h>
 | 
			
		||||
@@ -5,28 +32,44 @@
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
 | 
			
		||||
 | 
			
		||||
#define RawConfig(A,B) (A<<8|B)
 | 
			
		||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." },
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." },
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES    ,  "CACHE_REFERENCES..." },
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES        ,  "CACHE_MISSES......." },
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS)       ,  "L1D_READ_MISS......"},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS)     ,  "L1D_READ_ACCESS...."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS)      ,  "L1D_WRITE_MISS....."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS)    ,  "L1D_WRITE_ACCESS..."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS)   ,  "L1D_PREFETCH_MISS.."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS"},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS)        ,  "LL_READ_MISS......."},
 | 
			
		||||
  //  { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS)      ,  "LL_READ_ACCESS....."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS)       ,  "LL_WRITE_MISS......"},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS)     ,  "LL_WRITE_ACCESS...."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS)    ,  "LL_PREFETCH_MISS..."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS)  ,  "LL_PREFETCH_ACCESS."},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS)       ,  "L1I_READ_MISS......"},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS)     ,  "L1I_READ_ACCESS...."}
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES    ,  "CACHE_REFERENCES..." , INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES        ,  "CACHE_MISSES......." , CACHE_REFERENCES},
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." , INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." , CPUCYCLES   },
 | 
			
		||||
    // 4
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES    },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS  },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS    },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS  },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS  },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS },
 | 
			
		||||
    // 11
 | 
			
		||||
#else
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS)     ,  "L1D_READ_ACCESS....",INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS)       ,  "L1D_READ_MISS......",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS)      ,  "L1D_WRITE_MISS.....",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS)    ,  "L1D_WRITE_ACCESS...",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS)   ,  "L1D_PREFETCH_MISS..",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
 | 
			
		||||
    // 11
 | 
			
		||||
#endif
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS)        ,  "LL_READ_MISS.......",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS)      ,  "LL_READ_ACCESS.....",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS)       ,  "LL_WRITE_MISS......",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS)     ,  "LL_WRITE_ACCESS....",L1D_READ_ACCESS},
 | 
			
		||||
    //15
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS)    ,  "LL_PREFETCH_MISS...",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS)  ,  "LL_PREFETCH_ACCESS.",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS)       ,  "L1I_READ_MISS......",INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS)     ,  "L1I_READ_ACCESS....",INSTRUCTIONS}
 | 
			
		||||
    //19
 | 
			
		||||
  //  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
 | 
			
		||||
#endif
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										143
									
								
								lib/PerfCount.h
									
									
									
									
									
								
							
							
						
						
									
										143
									
								
								lib/PerfCount.h
									
									
									
									
									
								
							@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/PerfCount.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PERFCOUNT_H
 | 
			
		||||
#define GRID_PERFCOUNT_H
 | 
			
		||||
 | 
			
		||||
@@ -5,7 +34,7 @@
 | 
			
		||||
#include <ctime>
 | 
			
		||||
#include <chrono>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/ioctl.h>
 | 
			
		||||
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
@@ -14,8 +43,8 @@
 | 
			
		||||
#else
 | 
			
		||||
#include <sys/syscall.h>
 | 
			
		||||
#endif
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
			
		||||
@@ -29,6 +58,49 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef TIMERS_OFF
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
 | 
			
		||||
#define __SSC_STOP  __SSC_MARK(0x110)
 | 
			
		||||
#define __SSC_START __SSC_MARK(0x111)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#define __SSC_MARK(mark) 
 | 
			
		||||
#define __SSC_STOP  
 | 
			
		||||
#define __SSC_START 
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * cycle counters arch dependent
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifdef __bgq__
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
   uint64_t tmp;
 | 
			
		||||
   asm volatile ("mfspr %0,0x10C" : "=&r" (tmp)  );
 | 
			
		||||
   return tmp;
 | 
			
		||||
}
 | 
			
		||||
#elif defined __x86_64__
 | 
			
		||||
#include <x86intrin.h>
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
  return __rdtsc();
 | 
			
		||||
  //  unsigned int dummy;
 | 
			
		||||
  // return __rdtscp(&dummy);
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
   return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
class PerformanceCounter {
 | 
			
		||||
private:
 | 
			
		||||
@@ -38,6 +110,7 @@ private:
 | 
			
		||||
    uint32_t type;
 | 
			
		||||
    uint64_t config;
 | 
			
		||||
    const char *name;
 | 
			
		||||
    int normalisation;
 | 
			
		||||
  } PerformanceCounterConfig; 
 | 
			
		||||
  
 | 
			
		||||
  static const PerformanceCounterConfig PerformanceCounterConfigs [];
 | 
			
		||||
@@ -45,26 +118,12 @@ private:
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  enum PerformanceCounterType {
 | 
			
		||||
    CPUCYCLES=0,
 | 
			
		||||
    INSTRUCTIONS,
 | 
			
		||||
    //    STALL_CYCLES,
 | 
			
		||||
    CACHE_REFERENCES,
 | 
			
		||||
    CACHE_MISSES,
 | 
			
		||||
    L1D_READ_MISS,
 | 
			
		||||
    L1D_READ_ACCESS,
 | 
			
		||||
    L1D_WRITE_MISS,
 | 
			
		||||
    L1D_WRITE_ACCESS,
 | 
			
		||||
    L1D_PREFETCH_MISS,
 | 
			
		||||
    L1D_PREFETCH_ACCESS,
 | 
			
		||||
    LL_READ_MISS,
 | 
			
		||||
    //    LL_READ_ACCESS,
 | 
			
		||||
    LL_WRITE_MISS,
 | 
			
		||||
    LL_WRITE_ACCESS,
 | 
			
		||||
    LL_PREFETCH_MISS,
 | 
			
		||||
    LL_PREFETCH_ACCESS,
 | 
			
		||||
    L1I_READ_MISS,
 | 
			
		||||
    L1I_READ_ACCESS,
 | 
			
		||||
    PERFORMANCE_COUNTER_NUM_TYPES
 | 
			
		||||
    CACHE_REFERENCES=0,
 | 
			
		||||
    CACHE_MISSES=1,
 | 
			
		||||
    CPUCYCLES=2,
 | 
			
		||||
    INSTRUCTIONS=3,
 | 
			
		||||
    L1D_READ_ACCESS=4,
 | 
			
		||||
    PERFORMANCE_COUNTER_NUM_TYPES=19
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
@@ -72,8 +131,10 @@ public:
 | 
			
		||||
  int PCT;
 | 
			
		||||
 | 
			
		||||
  long long count;
 | 
			
		||||
  long long cycles;
 | 
			
		||||
  int fd;
 | 
			
		||||
  uint64_t elapsed;
 | 
			
		||||
  int cyclefd;
 | 
			
		||||
  unsigned long long elapsed;
 | 
			
		||||
  uint64_t begin;
 | 
			
		||||
 | 
			
		||||
  static int NumTypes(void){ 
 | 
			
		||||
@@ -85,7 +146,9 @@ public:
 | 
			
		||||
    assert(_pct>=0);
 | 
			
		||||
    assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
 | 
			
		||||
    fd=-1;
 | 
			
		||||
    cyclefd=-1;
 | 
			
		||||
    count=0;
 | 
			
		||||
    cycles=0;
 | 
			
		||||
    PCT =_pct;
 | 
			
		||||
    Open();
 | 
			
		||||
#endif
 | 
			
		||||
@@ -110,6 +173,15 @@ public:
 | 
			
		||||
      fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
 | 
			
		||||
      perror("Error is");
 | 
			
		||||
    }
 | 
			
		||||
    int norm = PerformanceCounterConfigs[PCT].normalisation;
 | 
			
		||||
    pe.type  = PerformanceCounterConfigs[norm].type;
 | 
			
		||||
    pe.config= PerformanceCounterConfigs[norm].config;
 | 
			
		||||
    name = PerformanceCounterConfigs[norm].name;
 | 
			
		||||
    cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
 | 
			
		||||
    if (cyclefd == -1) {
 | 
			
		||||
      fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
 | 
			
		||||
      perror("Error is");
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -117,10 +189,12 @@ public:
 | 
			
		||||
  {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    if ( fd!= -1) {
 | 
			
		||||
      ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 | 
			
		||||
      ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
 | 
			
		||||
      ::ioctl(cyclefd, PERF_EVENT_IOC_RESET, 0);
 | 
			
		||||
      ::ioctl(cyclefd, PERF_EVENT_IOC_ENABLE, 0);
 | 
			
		||||
    }
 | 
			
		||||
    begin  =__rdtsc();
 | 
			
		||||
    begin  =cyclecount();
 | 
			
		||||
#else
 | 
			
		||||
    begin = 0;
 | 
			
		||||
#endif
 | 
			
		||||
@@ -128,12 +202,15 @@ public:
 | 
			
		||||
 | 
			
		||||
  void Stop(void) {
 | 
			
		||||
    count=0;
 | 
			
		||||
    cycles=0;
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    if ( fd!= -1) {
 | 
			
		||||
      ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 | 
			
		||||
      ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
 | 
			
		||||
      ::read(fd, &count, sizeof(long long));
 | 
			
		||||
      ::read(cyclefd, &cycles, sizeof(long long));
 | 
			
		||||
    }
 | 
			
		||||
    elapsed = __rdtsc() - begin;
 | 
			
		||||
    elapsed = cyclecount() - begin;
 | 
			
		||||
#else
 | 
			
		||||
    elapsed = 0;
 | 
			
		||||
#endif
 | 
			
		||||
@@ -141,16 +218,20 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  void Report(void) {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count);
 | 
			
		||||
    int N = PerformanceCounterConfigs[PCT].normalisation;
 | 
			
		||||
    const char * sn = PerformanceCounterConfigs[N].name ;
 | 
			
		||||
    const char * sc = PerformanceCounterConfigs[PCT].name;
 | 
			
		||||
      std::printf("tsc = %llu %s = %llu  %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles, 
 | 
			
		||||
		  sc, count, sc,sn, (double)count/(double)cycles);
 | 
			
		||||
#else
 | 
			
		||||
    printf("%llu cycles \n", elapsed );
 | 
			
		||||
    std::printf("%llu cycles \n", elapsed );
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ~PerformanceCounter()
 | 
			
		||||
  {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    close(fd);
 | 
			
		||||
    ::close(fd);    ::close(cyclefd);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										45
									
								
								lib/Simd.h
									
									
									
									
									
								
							
							
						
						
									
										45
									
								
								lib/Simd.h
									
									
									
									
									
								
							@@ -1,3 +1,33 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/Simd.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_SIMD_H
 | 
			
		||||
#define GRID_SIMD_H
 | 
			
		||||
 | 
			
		||||
@@ -13,10 +43,13 @@
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
 | 
			
		||||
#define _MM_SELECT_FOUR_FOUR_STRING(A,B,C,D) "((" #A "<<6)|(" #B "<<4)|(" #C "<<2)|(" #D "))"
 | 
			
		||||
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
 | 
			
		||||
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
 | 
			
		||||
#define _MM_SELECT_TWO_TWO  (A,B)     _MM_SELECT_FOUR_TWO(0,0,A,B)
 | 
			
		||||
 | 
			
		||||
#define RotateBit (0x100)
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  typedef uint32_t Integer;
 | 
			
		||||
@@ -86,6 +119,14 @@ namespace Grid {
 | 
			
		||||
  inline ComplexD timesI(const ComplexD &r)     { return(r*ComplexD(0.0,1.0));}
 | 
			
		||||
  inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
 | 
			
		||||
  inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
 | 
			
		||||
 | 
			
		||||
  // define projections to real and imaginay parts
 | 
			
		||||
  inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
 | 
			
		||||
  inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
 | 
			
		||||
  inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
 | 
			
		||||
  inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
 | 
			
		||||
 | 
			
		||||
  // define auxiliary functions for complex computations
 | 
			
		||||
  inline void timesI(ComplexF &ret,const ComplexF &r)     { ret = timesI(r);}
 | 
			
		||||
  inline void timesI(ComplexD &ret,const ComplexD &r)     { ret = timesI(r);}
 | 
			
		||||
  inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
 | 
			
		||||
@@ -131,8 +172,8 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#include <simd/Grid_vector_types.h>
 | 
			
		||||
#include <simd/Grid_vector_unops.h>
 | 
			
		||||
#include "simd/Grid_vector_types.h"
 | 
			
		||||
#include "simd/Grid_vector_unops.h"
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  // Default precision
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										247
									
								
								lib/Stat.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										247
									
								
								lib/Stat.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,247 @@
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <PerfCount.h>
 | 
			
		||||
#include <Stat.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
bool PmuStat::pmu_initialized=false;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void PmuStat::init(const char *regname)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  name = regname;
 | 
			
		||||
  if (!pmu_initialized)
 | 
			
		||||
    {
 | 
			
		||||
      std::cout<<"initialising pmu"<<std::endl;
 | 
			
		||||
      pmu_initialized = true;
 | 
			
		||||
      pmu_init();
 | 
			
		||||
    }
 | 
			
		||||
  clear();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::clear(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  count = 0;
 | 
			
		||||
  tregion = 0;
 | 
			
		||||
  pmc0 = 0;
 | 
			
		||||
  pmc1 = 0;
 | 
			
		||||
  inst = 0;
 | 
			
		||||
  cyc = 0;
 | 
			
		||||
  ref = 0;
 | 
			
		||||
  tcycles = 0;
 | 
			
		||||
  reads = 0;
 | 
			
		||||
  writes = 0;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::print(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  std::cout <<"Reg "<<std::string(name)<<":\n";
 | 
			
		||||
  std::cout <<"  region "<<tregion<<std::endl;
 | 
			
		||||
  std::cout <<"  cycles "<<tcycles<<std::endl;
 | 
			
		||||
  std::cout <<"  inst   "<<inst   <<std::endl;
 | 
			
		||||
  std::cout <<"  cyc    "<<cyc    <<std::endl;
 | 
			
		||||
  std::cout <<"  ref    "<<ref    <<std::endl;
 | 
			
		||||
  std::cout <<"  pmc0   "<<pmc0   <<std::endl;
 | 
			
		||||
  std::cout <<"  pmc1   "<<pmc1   <<std::endl;
 | 
			
		||||
  std::cout <<"  count  "<<count  <<std::endl;
 | 
			
		||||
  std::cout <<"  reads  "<<reads  <<std::endl;
 | 
			
		||||
  std::cout <<"  writes "<<writes <<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::start(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  pmu_start();
 | 
			
		||||
  ++count;
 | 
			
		||||
  xmemctrs(&mrstart, &mwstart);
 | 
			
		||||
  tstart = __rdtsc();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::enter(int t)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  counters[0][t] = __rdpmc(0);
 | 
			
		||||
  counters[1][t] = __rdpmc(1);
 | 
			
		||||
  counters[2][t] = __rdpmc((1<<30)|0);
 | 
			
		||||
  counters[3][t] = __rdpmc((1<<30)|1);
 | 
			
		||||
  counters[4][t] = __rdpmc((1<<30)|2);
 | 
			
		||||
  counters[5][t] = __rdtsc();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::exit(int t)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  counters[0][t] = __rdpmc(0) - counters[0][t];
 | 
			
		||||
  counters[1][t] = __rdpmc(1) - counters[1][t];
 | 
			
		||||
  counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
 | 
			
		||||
  counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
 | 
			
		||||
  counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
 | 
			
		||||
  counters[5][t] = __rdtsc() - counters[5][t];
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::accum(int nthreads)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  tend = __rdtsc();
 | 
			
		||||
  xmemctrs(&mrend, &mwend);
 | 
			
		||||
  pmu_stop();
 | 
			
		||||
  for (int t = 0; t < nthreads; ++t) {
 | 
			
		||||
    pmc0 += counters[0][t];
 | 
			
		||||
    pmc1 += counters[1][t];
 | 
			
		||||
    inst += counters[2][t];
 | 
			
		||||
    cyc += counters[3][t];
 | 
			
		||||
    ref += counters[4][t];
 | 
			
		||||
    tcycles += counters[5][t];
 | 
			
		||||
  }
 | 
			
		||||
  uint64_t region = tend - tstart;
 | 
			
		||||
  tregion += region;
 | 
			
		||||
  uint64_t mreads = mrend - mrstart;
 | 
			
		||||
  reads += mreads;
 | 
			
		||||
  uint64_t mwrites = mwend - mwstart;
 | 
			
		||||
  writes += mwrites;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void PmuStat::pmu_fini(void) {}
 | 
			
		||||
void PmuStat::pmu_start(void) {};
 | 
			
		||||
void PmuStat::pmu_stop(void) {};
 | 
			
		||||
void PmuStat::pmu_init(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
  KNLsetup();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
 | 
			
		||||
{
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
  ctrs c;
 | 
			
		||||
  KNLreadctrs(c);
 | 
			
		||||
  uint64_t emr = 0, emw = 0;
 | 
			
		||||
  for (int i = 0; i < NEDC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      emr += c.edcrd[i];
 | 
			
		||||
      emw += c.edcwr[i];
 | 
			
		||||
    }
 | 
			
		||||
  *mr = emr;
 | 
			
		||||
  *mw = emw;
 | 
			
		||||
#else
 | 
			
		||||
  *mr = *mw = 0;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
 | 
			
		||||
struct knl_gbl_ PmuStat::gbl;
 | 
			
		||||
 | 
			
		||||
#define PMU_MEM
 | 
			
		||||
 | 
			
		||||
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
 | 
			
		||||
{
 | 
			
		||||
  char fname[1024];
 | 
			
		||||
  snprintf(fname, sizeof(fname), "%s/type", ename);
 | 
			
		||||
  FILE *fp = fopen(fname, "r");
 | 
			
		||||
  if (fp == 0) {
 | 
			
		||||
    ::printf("open %s", fname);
 | 
			
		||||
    ::exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  int type;
 | 
			
		||||
  int ret = fscanf(fp, "%d", &type);
 | 
			
		||||
  assert(ret == 1);
 | 
			
		||||
  fclose(fp);
 | 
			
		||||
  //  std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
 | 
			
		||||
 | 
			
		||||
  struct perf_event_attr hw = {};
 | 
			
		||||
  hw.size = sizeof(hw);
 | 
			
		||||
  hw.type = type;
 | 
			
		||||
  // see /sys/devices/uncore_*/format/*
 | 
			
		||||
  // All of the events we are interested in are configured the same way, but
 | 
			
		||||
  // that isn't always true. Proper code would parse the format files
 | 
			
		||||
  hw.config = event | (umask << 8);
 | 
			
		||||
  //hw.read_format = PERF_FORMAT_GROUP;
 | 
			
		||||
  // unfortunately the above only works within a single PMU; might
 | 
			
		||||
  // as well just read them one at a time
 | 
			
		||||
  int cpu = 0;
 | 
			
		||||
  fd = perf_event_open(&hw, -1, cpu, -1, 0);
 | 
			
		||||
  if (fd == -1) {
 | 
			
		||||
    ::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
 | 
			
		||||
    ::exit(0);
 | 
			
		||||
  } else { 
 | 
			
		||||
    //    std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 void PmuStat::KNLsetup(void){
 | 
			
		||||
 | 
			
		||||
   int ret;
 | 
			
		||||
   char fname[1024];
 | 
			
		||||
 | 
			
		||||
   // MC RPQ inserts and WPQ inserts (reads & writes)
 | 
			
		||||
   for (int mc = 0; mc < NMC; ++mc)
 | 
			
		||||
     {
 | 
			
		||||
       ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
 | 
			
		||||
       // RPQ Inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
 | 
			
		||||
       // WPQ Inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
 | 
			
		||||
     }
 | 
			
		||||
   // EDC RPQ inserts and WPQ inserts
 | 
			
		||||
   for (int edc=0; edc < NEDC; ++edc)
 | 
			
		||||
     {
 | 
			
		||||
       ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
 | 
			
		||||
       // RPQ inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
 | 
			
		||||
       // WPQ inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
 | 
			
		||||
     }
 | 
			
		||||
   // EDC HitE, HitM, MissE, MissM
 | 
			
		||||
   for (int edc=0; edc < NEDC; ++edc)
 | 
			
		||||
     {
 | 
			
		||||
       ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
 | 
			
		||||
     }
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
uint64_t PmuStat::KNLreadctr(int fd)
 | 
			
		||||
{
 | 
			
		||||
  uint64_t data;
 | 
			
		||||
  size_t s = ::read(fd, &data, sizeof(data));
 | 
			
		||||
  if (s != sizeof(uint64_t)){
 | 
			
		||||
    ::printf("read counter %lu", s);
 | 
			
		||||
    ::exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  return data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void PmuStat::KNLreadctrs(ctrs &c)
 | 
			
		||||
{
 | 
			
		||||
  for (int i = 0; i < NMC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
 | 
			
		||||
      c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
 | 
			
		||||
    }
 | 
			
		||||
  for (int i = 0; i < NEDC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
 | 
			
		||||
      c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
 | 
			
		||||
    }
 | 
			
		||||
  for (int i = 0; i < NEDC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
 | 
			
		||||
      c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
 | 
			
		||||
      c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
 | 
			
		||||
      c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										104
									
								
								lib/Stat.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								lib/Stat.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,104 @@
 | 
			
		||||
#ifndef _GRID_STAT_H
 | 
			
		||||
#define _GRID_STAT_H
 | 
			
		||||
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
#define _KNIGHTS_LANDING_ROOTONLY
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Extra KNL counters from MCDRAM
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
#define NMC 6
 | 
			
		||||
#define NEDC 8
 | 
			
		||||
struct ctrs
 | 
			
		||||
{
 | 
			
		||||
    uint64_t mcrd[NMC];
 | 
			
		||||
    uint64_t mcwr[NMC];
 | 
			
		||||
    uint64_t edcrd[NEDC]; 
 | 
			
		||||
    uint64_t edcwr[NEDC];
 | 
			
		||||
    uint64_t edchite[NEDC];
 | 
			
		||||
    uint64_t edchitm[NEDC];
 | 
			
		||||
    uint64_t edcmisse[NEDC];
 | 
			
		||||
    uint64_t edcmissm[NEDC];
 | 
			
		||||
};
 | 
			
		||||
// Peter/Azusa:
 | 
			
		||||
// Our modification of a code provided by Larry Meadows from Intel
 | 
			
		||||
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
 | 
			
		||||
// so is already public and in the linux kernel for KNL.
 | 
			
		||||
struct knl_gbl_
 | 
			
		||||
{
 | 
			
		||||
  int mc_rd[NMC];
 | 
			
		||||
  int mc_wr[NMC];
 | 
			
		||||
  int edc_rd[NEDC];
 | 
			
		||||
  int edc_wr[NEDC];
 | 
			
		||||
  int edc_hite[NEDC];
 | 
			
		||||
  int edc_hitm[NEDC];
 | 
			
		||||
  int edc_misse[NEDC];
 | 
			
		||||
  int edc_missm[NEDC];
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
class PmuStat
 | 
			
		||||
{
 | 
			
		||||
    uint64_t counters[8][256];
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
    static struct knl_gbl_ gbl;
 | 
			
		||||
#endif
 | 
			
		||||
    const char *name;
 | 
			
		||||
 | 
			
		||||
    uint64_t reads;     // memory reads
 | 
			
		||||
    uint64_t writes;    // memory writes
 | 
			
		||||
    uint64_t mrstart;   // memory read counter at start of parallel region
 | 
			
		||||
    uint64_t mrend;     // memory read counter at end of parallel region
 | 
			
		||||
    uint64_t mwstart;   // memory write counter at start of parallel region
 | 
			
		||||
    uint64_t mwend;     // memory write counter at end of parallel region
 | 
			
		||||
 | 
			
		||||
    // cumulative counters
 | 
			
		||||
    uint64_t count;     // number of invocations
 | 
			
		||||
    uint64_t tregion;   // total time in parallel region (from thread 0)
 | 
			
		||||
    uint64_t tcycles;   // total cycles inside parallel region
 | 
			
		||||
    uint64_t inst, ref, cyc;   // fixed counters
 | 
			
		||||
    uint64_t pmc0, pmc1;// pmu
 | 
			
		||||
    // add memory counters here
 | 
			
		||||
    // temp variables
 | 
			
		||||
    uint64_t tstart;    // tsc at start of parallel region
 | 
			
		||||
    uint64_t tend;      // tsc at end of parallel region
 | 
			
		||||
    // map for ctrs values
 | 
			
		||||
    // 0 pmc0 start
 | 
			
		||||
    // 1 pmc0 end
 | 
			
		||||
    // 2 pmc1 start
 | 
			
		||||
    // 3 pmc1 end
 | 
			
		||||
    // 4 tsc start
 | 
			
		||||
    // 5 tsc end
 | 
			
		||||
    static bool pmu_initialized;
 | 
			
		||||
public:
 | 
			
		||||
    static bool is_init(void){ return pmu_initialized;}
 | 
			
		||||
    static void pmu_init(void);
 | 
			
		||||
    static void pmu_fini(void);
 | 
			
		||||
    static void pmu_start(void);
 | 
			
		||||
    static void pmu_stop(void);
 | 
			
		||||
    void accum(int nthreads);
 | 
			
		||||
    static void xmemctrs(uint64_t *mr, uint64_t *mw);
 | 
			
		||||
    void start(void);
 | 
			
		||||
    void enter(int t);
 | 
			
		||||
    void exit(int t);
 | 
			
		||||
    void print(void);
 | 
			
		||||
    void init(const char *regname);
 | 
			
		||||
    void clear(void);
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
    static void     KNLsetup(void);
 | 
			
		||||
    static uint64_t KNLreadctr(int fd);
 | 
			
		||||
    static void     KNLreadctrs(ctrs &c);
 | 
			
		||||
    static void     KNLevsetup(const char *ename, int &fd, int event, int umask);
 | 
			
		||||
#endif
 | 
			
		||||
    
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1505
									
								
								lib/Stencil.h
									
									
									
									
									
								
							
							
						
						
									
										1505
									
								
								lib/Stencil.h
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -1,22 +1,51 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Tensors.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATH_H
 | 
			
		||||
#define GRID_MATH_H
 | 
			
		||||
 | 
			
		||||
#include <tensors/Tensor_traits.h>
 | 
			
		||||
#include <tensors/Tensor_class.h>
 | 
			
		||||
#include <tensors/Tensor_arith.h>
 | 
			
		||||
#include <tensors/Tensor_inner.h>
 | 
			
		||||
#include <tensors/Tensor_outer.h>
 | 
			
		||||
#include <tensors/Tensor_transpose.h>
 | 
			
		||||
#include <tensors/Tensor_trace.h>
 | 
			
		||||
#include <tensors/Tensor_index.h>
 | 
			
		||||
#include <tensors/Tensor_Ta.h>
 | 
			
		||||
#include <tensors/Tensor_determinant.h>
 | 
			
		||||
#include <tensors/Tensor_exp.h>
 | 
			
		||||
//#include <tensors/Tensor_peek.h>
 | 
			
		||||
//#include <tensors/Tensor_poke.h>
 | 
			
		||||
#include <tensors/Tensor_reality.h>
 | 
			
		||||
#include <tensors/Tensor_unary.h>
 | 
			
		||||
#include <tensors/Tensor_extract_merge.h>
 | 
			
		||||
#include <tensors/Tensor_logical.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_traits.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_class.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_arith.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_inner.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_outer.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_transpose.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_trace.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_index.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_Ta.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_determinant.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_exp.h>
 | 
			
		||||
//#include <Grid/tensors/Tensor_peek.h>
 | 
			
		||||
//#include <Grid/tensors/Tensor_poke.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_reality.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_unary.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_extract_merge.h>
 | 
			
		||||
#include <Grid/tensors/Tensor_logical.h>
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Threads.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_THREADS_H
 | 
			
		||||
#define GRID_THREADS_H
 | 
			
		||||
 | 
			
		||||
@@ -9,7 +37,11 @@
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#include <omp.h>
 | 
			
		||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for ")
 | 
			
		||||
#ifdef GRID_NUMA
 | 
			
		||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
 | 
			
		||||
#else
 | 
			
		||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
 | 
			
		||||
#endif
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
 | 
			
		||||
#else
 | 
			
		||||
#define PARALLEL_FOR_LOOP 
 | 
			
		||||
@@ -95,6 +127,22 @@ class GridThread {
 | 
			
		||||
    ThreadBarrier();
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static void bcopy(const void *src, void *dst, size_t len) {
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#pragma omp parallel 
 | 
			
		||||
    {
 | 
			
		||||
      const char *c_src =(char *) src;
 | 
			
		||||
      char *c_dest=(char *) dst;
 | 
			
		||||
      int me,mywork,myoff;
 | 
			
		||||
      GridThread::GetWorkBarrier(len,me, mywork,myoff);
 | 
			
		||||
      bcopy(&c_src[myoff],&c_dest[myoff],mywork);
 | 
			
		||||
    }
 | 
			
		||||
#else 
 | 
			
		||||
    bcopy(src,dst,len);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										60
									
								
								lib/Timer.h
									
									
									
									
									
								
							
							
						
						
									
										60
									
								
								lib/Timer.h
									
									
									
									
									
								
							@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Timer.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_TIME_H
 | 
			
		||||
#define GRID_TIME_H
 | 
			
		||||
 | 
			
		||||
@@ -11,40 +39,62 @@ namespace Grid {
 | 
			
		||||
  // Dress the output; use std::chrono
 | 
			
		||||
 | 
			
		||||
// C++11 time facilities better?
 | 
			
		||||
double usecond(void);
 | 
			
		||||
inline double usecond(void) {
 | 
			
		||||
  struct timeval tv;
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
  gettimeofday(&tv,NULL);
 | 
			
		||||
#endif
 | 
			
		||||
  return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
typedef  std::chrono::system_clock          GridClock;
 | 
			
		||||
typedef  std::chrono::time_point<GridClock> GridTimePoint;
 | 
			
		||||
typedef  std::chrono::milliseconds          GridTime;
 | 
			
		||||
typedef  std::chrono::microseconds          GridUsecs;
 | 
			
		||||
 | 
			
		||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
 | 
			
		||||
{
 | 
			
		||||
  stream << time.count()<<" ms";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
class GridStopWatch {
 | 
			
		||||
private:
 | 
			
		||||
  bool running;
 | 
			
		||||
  GridTimePoint start;
 | 
			
		||||
  GridTime accumulator;
 | 
			
		||||
  GridUsecs accumulator;
 | 
			
		||||
public:
 | 
			
		||||
  GridStopWatch () { 
 | 
			
		||||
    Reset();
 | 
			
		||||
  }
 | 
			
		||||
  void     Start(void) { 
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
    start = GridClock::now(); 
 | 
			
		||||
#endif
 | 
			
		||||
    running = true;
 | 
			
		||||
  }
 | 
			
		||||
  void     Stop(void)  { 
 | 
			
		||||
    assert(running == true);
 | 
			
		||||
    accumulator+= std::chrono::duration_cast<GridTime>(GridClock::now()-start); 
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
    accumulator+= std::chrono::duration_cast<GridUsecs>(GridClock::now()-start); 
 | 
			
		||||
#endif
 | 
			
		||||
    running = false; 
 | 
			
		||||
  };
 | 
			
		||||
  void     Reset(void){
 | 
			
		||||
    running = false;
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
    start = GridClock::now();
 | 
			
		||||
    accumulator = std::chrono::duration_cast<GridTime>(start-start); 
 | 
			
		||||
#endif
 | 
			
		||||
    accumulator = std::chrono::duration_cast<GridUsecs>(start-start); 
 | 
			
		||||
  }
 | 
			
		||||
  GridTime Elapsed(void) {
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
    return accumulator;
 | 
			
		||||
    return std::chrono::duration_cast<GridTime>( accumulator );
 | 
			
		||||
  }
 | 
			
		||||
  uint64_t useconds(void){
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
    return (uint64_t) accumulator.count();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,36 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/CoarsenedMatrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_ALGORITHM_COARSENED_MATRIX_H
 | 
			
		||||
#define  GRID_ALGORITHM_COARSENED_MATRIX_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
@@ -117,6 +146,56 @@ namespace Grid {
 | 
			
		||||
      }
 | 
			
		||||
      Orthogonalise();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) 
 | 
			
		||||
    {
 | 
			
		||||
      // Run a Lanczos with sloppy convergence
 | 
			
		||||
	const int Nstop = nn;
 | 
			
		||||
	const int Nk = nn+20;
 | 
			
		||||
	const int Np = nn+20;
 | 
			
		||||
	const int Nm = Nk+Np;
 | 
			
		||||
	const int MaxIt= 10000;
 | 
			
		||||
	RealD resid = 1.0e-3;
 | 
			
		||||
 | 
			
		||||
	Chebyshev<FineField> Cheb(0.5,64.0,21);
 | 
			
		||||
	ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
 | 
			
		||||
	//	IRL.lock = 1;
 | 
			
		||||
 | 
			
		||||
	FineField noise(FineGrid); gaussian(RNG,noise);
 | 
			
		||||
	FineField tmp(FineGrid); 
 | 
			
		||||
	std::vector<RealD>     eval(Nm);
 | 
			
		||||
	std::vector<FineField> evec(Nm,FineGrid);
 | 
			
		||||
 | 
			
		||||
	int Nconv;
 | 
			
		||||
	IRL.calc(eval,evec,
 | 
			
		||||
		 noise,
 | 
			
		||||
		 Nconv);
 | 
			
		||||
 | 
			
		||||
    	// pull back nn vectors
 | 
			
		||||
	for(int b=0;b<nn;b++){
 | 
			
		||||
 | 
			
		||||
	  subspace[b]   = evec[b];
 | 
			
		||||
 | 
			
		||||
	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  hermop.Op(subspace[b],tmp); 
 | 
			
		||||
	  std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  noise = tmp -  sqrt(eval[b])*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  noise = tmp +  eval[b]*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
	Orthogonalise();
 | 
			
		||||
	for(int b=0;b<nn;b++){
 | 
			
		||||
	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
	}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
 | 
			
		||||
 | 
			
		||||
      RealD scale;
 | 
			
		||||
@@ -170,11 +249,10 @@ namespace Grid {
 | 
			
		||||
    ////////////////////
 | 
			
		||||
    Geometry         geom;
 | 
			
		||||
    GridBase *       _grid; 
 | 
			
		||||
    CartesianStencil<siteVector,siteVector,SimpleCompressor<siteVector> > Stencil; 
 | 
			
		||||
    CartesianStencil<siteVector,siteVector> Stencil; 
 | 
			
		||||
 | 
			
		||||
    std::vector<CoarseMatrix> A;
 | 
			
		||||
 | 
			
		||||
    std::vector<siteVector,alignedAllocator<siteVector> >   comm_buf;
 | 
			
		||||
      
 | 
			
		||||
    ///////////////////////
 | 
			
		||||
    // Interface
 | 
			
		||||
@@ -187,7 +265,7 @@ namespace Grid {
 | 
			
		||||
      conformable(in._grid,out._grid);
 | 
			
		||||
 | 
			
		||||
      SimpleCompressor<siteVector> compressor;
 | 
			
		||||
      Stencil.HaloExchange(in,comm_buf,compressor);
 | 
			
		||||
      Stencil.HaloExchange(in,compressor);
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
      for(int ss=0;ss<Grid()->oSites();ss++){
 | 
			
		||||
@@ -204,7 +282,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
	  } else if(SE->_is_local) { 
 | 
			
		||||
	    nbr = in._odata[SE->_offset];
 | 
			
		||||
	  } else {
 | 
			
		||||
	    nbr = comm_buf[SE->_offset];
 | 
			
		||||
	    nbr = Stencil.comm_buf[SE->_offset];
 | 
			
		||||
	  }
 | 
			
		||||
	  res = res + A[point]._odata[ss]*nbr;
 | 
			
		||||
	}
 | 
			
		||||
@@ -228,7 +306,6 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
      Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
 | 
			
		||||
      A(geom.npoint,&CoarseGrid)
 | 
			
		||||
    {
 | 
			
		||||
      comm_buf.resize(Stencil._unified_buffer_size);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/LinearOperator.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_ALGORITHM_LINEAR_OP_H
 | 
			
		||||
#define  GRID_ALGORITHM_LINEAR_OP_H
 | 
			
		||||
 | 
			
		||||
@@ -194,6 +222,7 @@ namespace Grid {
 | 
			
		||||
      SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
      virtual  RealD Mpc      (const Field &in, Field &out) {
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
//	std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << "  _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
 | 
			
		||||
 | 
			
		||||
	_Mat.Meooe(in,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
@@ -223,10 +252,10 @@ namespace Grid {
 | 
			
		||||
      virtual  RealD Mpc      (const Field &in, Field &out) {
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
 | 
			
		||||
	_Mat.Meooe(in,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
	_Mat.Meooe(in,out);
 | 
			
		||||
	_Mat.MooeeInv(out,tmp);
 | 
			
		||||
	_Mat.Meooe(tmp,out);
 | 
			
		||||
	_Mat.MooeeInv(out,tmp);
 | 
			
		||||
 | 
			
		||||
	return axpy_norm(out,-1.0,tmp,in);
 | 
			
		||||
      }
 | 
			
		||||
@@ -242,6 +271,35 @@ namespace Grid {
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Matrix,class Field>
 | 
			
		||||
      class SchurDiagTwoOperator :  public SchurOperatorBase<Field> {
 | 
			
		||||
    protected:
 | 
			
		||||
      Matrix &_Mat;
 | 
			
		||||
    public:
 | 
			
		||||
      SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
 | 
			
		||||
      virtual  RealD Mpc      (const Field &in, Field &out) {
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
 | 
			
		||||
	_Mat.MooeeInv(in,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
 | 
			
		||||
	return axpy_norm(out,-1.0,tmp,in);
 | 
			
		||||
      }
 | 
			
		||||
      virtual  RealD MpcDag   (const Field &in, Field &out){
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
 | 
			
		||||
	_Mat.MeooeDag(in,out);
 | 
			
		||||
	_Mat.MooeeInvDag(out,tmp);
 | 
			
		||||
	_Mat.MeooeDag(tmp,out);
 | 
			
		||||
	_Mat.MooeeInvDag(out,tmp);
 | 
			
		||||
 | 
			
		||||
	return axpy_norm(out,-1.0,tmp,in);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    // Base classes for functions of operators
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/Preconditioner.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PRECONDITIONER_H
 | 
			
		||||
#define GRID_PRECONDITIONER_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,33 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/SparseMatrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_ALGORITHM_SPARSE_MATRIX_H
 | 
			
		||||
#define  GRID_ALGORITHM_SPARSE_MATRIX_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,35 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/Chebyshev.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CHEBYSHEV_H
 | 
			
		||||
#define GRID_CHEBYSHEV_H
 | 
			
		||||
 | 
			
		||||
#include<Grid.h>
 | 
			
		||||
#include<algorithms/LinearOperator.h>
 | 
			
		||||
#include <Grid/algorithms/LinearOperator.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
@@ -30,13 +57,14 @@ namespace Grid {
 | 
			
		||||
      Field Mtmp(in._grid);
 | 
			
		||||
      AtoN = in;
 | 
			
		||||
      out = AtoN*Coeffs[0];
 | 
			
		||||
      //      std::cout <<"Poly in " <<norm2(in)<<std::endl;
 | 
			
		||||
      //      std::cout <<"0 " <<norm2(out)<<std::endl;
 | 
			
		||||
//            std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl;
 | 
			
		||||
//            std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl;
 | 
			
		||||
      for(int n=1;n<Coeffs.size();n++){
 | 
			
		||||
	Mtmp = AtoN;
 | 
			
		||||
	Linop.HermOp(Mtmp,AtoN);
 | 
			
		||||
	out=out+AtoN*Coeffs[n];
 | 
			
		||||
	//	std::cout << n<<" " <<norm2(out)<<std::endl;
 | 
			
		||||
//            std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl;
 | 
			
		||||
//		std::cout << n<<" " <<norm2(out)<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
@@ -54,7 +82,8 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
  public:
 | 
			
		||||
    void csv(std::ostream &out){
 | 
			
		||||
      for (RealD x=lo; x<hi; x+=(hi-lo)/1000) {
 | 
			
		||||
	RealD diff = hi-lo;
 | 
			
		||||
      for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) {
 | 
			
		||||
	RealD f = approx(x);
 | 
			
		||||
	out<< x<<" "<<f<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
@@ -71,10 +100,24 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
    Chebyshev(){};
 | 
			
		||||
    Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
 | 
			
		||||
    
 | 
			
		||||
    Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// CJ: the one we need for Lanczos
 | 
			
		||||
    void Init(RealD _lo,RealD _hi,int _order)
 | 
			
		||||
    {
 | 
			
		||||
      lo=_lo;
 | 
			
		||||
      hi=_hi;
 | 
			
		||||
      order=_order;
 | 
			
		||||
      
 | 
			
		||||
      if(order < 2) exit(-1);
 | 
			
		||||
      Coeffs.resize(order);
 | 
			
		||||
      Coeffs.assign(0.,order);
 | 
			
		||||
      Coeffs[order-1] = 1.;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
 | 
			
		||||
    {
 | 
			
		||||
      lo=_lo;
 | 
			
		||||
@@ -154,6 +197,8 @@ namespace Grid {
 | 
			
		||||
    void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
 | 
			
		||||
 | 
			
		||||
      GridBase *grid=in._grid;
 | 
			
		||||
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
 | 
			
		||||
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
 | 
			
		||||
 | 
			
		||||
      int vol=grid->gSites();
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/MultiShiftFunction.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/MultiShiftFunction.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef MULTI_SHIFT_FUNCTION
 | 
			
		||||
#define MULTI_SHIFT_FUNCTION
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -16,9 +16,13 @@
 | 
			
		||||
#define INCLUDED_ALG_REMEZ_H
 | 
			
		||||
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include <Config.h>
 | 
			
		||||
 | 
			
		||||
//#include <algorithms/approx/bigfloat.h>
 | 
			
		||||
#include <algorithms/approx/bigfloat_double.h>
 | 
			
		||||
#ifdef HAVE_LIBGMP
 | 
			
		||||
#include "bigfloat.h"
 | 
			
		||||
#else
 | 
			
		||||
#include "bigfloat_double.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
 | 
			
		||||
#define SUM_MAX 10 // Maximum number of terms in exponential
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/bigfloat_double.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
typedef double mfloat; 
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/AdefGeneric.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
			
		||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,105 +1,168 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_GRADIENT_H
 | 
			
		||||
#define GRID_CONJUGATE_GRADIENT_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    // Base classes for iterative processes based on operators
 | 
			
		||||
    // single input vec, single output vec.
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
// Base classes for iterative processes based on operators
 | 
			
		||||
// single input vec, single output vec.
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  template<class Field> 
 | 
			
		||||
    class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
public:                                                
 | 
			
		||||
    RealD   Tolerance;
 | 
			
		||||
    Integer MaxIterations;
 | 
			
		||||
    ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { 
 | 
			
		||||
    };
 | 
			
		||||
template <class Field>
 | 
			
		||||
class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
 | 
			
		||||
                           // Defaults true.
 | 
			
		||||
  RealD Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
  ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
 | 
			
		||||
      : Tolerance(tol),
 | 
			
		||||
        MaxIterations(maxit),
 | 
			
		||||
        ErrorOnNoConverge(err_on_no_conv){};
 | 
			
		||||
 | 
			
		||||
  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
 | 
			
		||||
                  Field &psi) {
 | 
			
		||||
    psi.checkerboard = src.checkerboard;
 | 
			
		||||
    conformable(psi, src);
 | 
			
		||||
 | 
			
		||||
    void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
 | 
			
		||||
    RealD cp, c, a, d, b, ssq, qq, b_pred;
 | 
			
		||||
 | 
			
		||||
      psi.checkerboard = src.checkerboard;
 | 
			
		||||
      conformable(psi,src);
 | 
			
		||||
    Field p(src);
 | 
			
		||||
    Field mmp(src);
 | 
			
		||||
    Field r(src);
 | 
			
		||||
 | 
			
		||||
      RealD cp,c,a,d,b,ssq,qq,b_pred;
 | 
			
		||||
      
 | 
			
		||||
      Field   p(src);
 | 
			
		||||
      Field mmp(src);
 | 
			
		||||
      Field   r(src);
 | 
			
		||||
      
 | 
			
		||||
      //Initial residual computation & set up
 | 
			
		||||
      RealD guess = norm2(psi);
 | 
			
		||||
      
 | 
			
		||||
      Linop.HermOpAndNorm(psi,mmp,d,b);
 | 
			
		||||
      
 | 
			
		||||
      r= src-mmp;
 | 
			
		||||
      p= r;
 | 
			
		||||
      
 | 
			
		||||
      a  =norm2(p);
 | 
			
		||||
      cp =a;
 | 
			
		||||
      ssq=norm2(src);
 | 
			
		||||
    // Initial residual computation & set up
 | 
			
		||||
    RealD guess = norm2(psi);
 | 
			
		||||
    assert(std::isnan(guess) == 0);
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   src "<<ssq  <<std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:    mp "<<d    <<std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:   mmp "<<b    <<std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:  cp,r "<<cp   <<std::endl;
 | 
			
		||||
      std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient:     p "<<a    <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    Linop.HermOpAndNorm(psi, mmp, d, b);
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
      RealD rsq =  Tolerance* Tolerance*ssq;
 | 
			
		||||
      
 | 
			
		||||
      //Check if guess is really REALLY good :)
 | 
			
		||||
      if ( cp <= rsq ) {
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
      int k;
 | 
			
		||||
      for (k=1;k<=MaxIterations;k++){
 | 
			
		||||
	
 | 
			
		||||
	c=cp;
 | 
			
		||||
	
 | 
			
		||||
	Linop.HermOpAndNorm(p,mmp,d,qq);
 | 
			
		||||
    r = src - mmp;
 | 
			
		||||
    p = r;
 | 
			
		||||
 | 
			
		||||
	RealD    qqck = norm2(mmp);
 | 
			
		||||
	ComplexD dck  = innerProduct(p,mmp);
 | 
			
		||||
      
 | 
			
		||||
	a      = c/d;
 | 
			
		||||
	b_pred = a*(a*qq-d)/c;
 | 
			
		||||
    a = norm2(p);
 | 
			
		||||
    cp = a;
 | 
			
		||||
    ssq = norm2(src);
 | 
			
		||||
 | 
			
		||||
	cp = axpy_norm(r,-a,mmp,r);
 | 
			
		||||
	b = cp/c;
 | 
			
		||||
	
 | 
			
		||||
	// Fuse these loops ; should be really easy
 | 
			
		||||
	psi= a*p+psi;
 | 
			
		||||
	p  = p*b+r;
 | 
			
		||||
	  
 | 
			
		||||
	std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
 | 
			
		||||
	
 | 
			
		||||
	// Stopping condition
 | 
			
		||||
	if ( cp <= rsq ) { 
 | 
			
		||||
	  
 | 
			
		||||
	  Linop.HermOpAndNorm(psi,mmp,d,qq);
 | 
			
		||||
	  p=mmp-src;
 | 
			
		||||
	  
 | 
			
		||||
	  RealD mmpnorm = sqrt(norm2(mmp));
 | 
			
		||||
	  RealD psinorm = sqrt(norm2(psi));
 | 
			
		||||
	  RealD srcnorm = sqrt(norm2(src));
 | 
			
		||||
	  RealD resnorm = sqrt(norm2(p));
 | 
			
		||||
	  RealD true_residual = resnorm/srcnorm;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient: guess " << guess << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:   src " << ssq << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:    mp " << d << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:   mmp " << b << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:  cp,r " << cp << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:     p " << a << std::endl;
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
 | 
			
		||||
		   <<" computed residual "<<sqrt(cp/ssq)
 | 
			
		||||
		   <<" true residual     "<<true_residual
 | 
			
		||||
		   <<" target "<<Tolerance<<std::endl;
 | 
			
		||||
	  return;
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
 | 
			
		||||
      assert(0);
 | 
			
		||||
    RealD rsq = Tolerance * Tolerance * ssq;
 | 
			
		||||
 | 
			
		||||
    // Check if guess is really REALLY good :)
 | 
			
		||||
    if (cp <= rsq) {
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient: k=0 residual " << cp << " target " << rsq
 | 
			
		||||
              << std::endl;
 | 
			
		||||
 | 
			
		||||
    GridStopWatch LinalgTimer;
 | 
			
		||||
    GridStopWatch MatrixTimer;
 | 
			
		||||
    GridStopWatch SolverTimer;
 | 
			
		||||
 | 
			
		||||
    SolverTimer.Start();
 | 
			
		||||
    int k;
 | 
			
		||||
    for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
      c = cp;
 | 
			
		||||
 | 
			
		||||
      MatrixTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(p, mmp, d, qq);
 | 
			
		||||
      MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      LinalgTimer.Start();
 | 
			
		||||
      //  RealD    qqck = norm2(mmp);
 | 
			
		||||
      //  ComplexD dck  = innerProduct(p,mmp);
 | 
			
		||||
 | 
			
		||||
      a = c / d;
 | 
			
		||||
      b_pred = a * (a * qq - d) / c;
 | 
			
		||||
 | 
			
		||||
      cp = axpy_norm(r, -a, mmp, r);
 | 
			
		||||
      b = cp / c;
 | 
			
		||||
 | 
			
		||||
      // Fuse these loops ; should be really easy
 | 
			
		||||
      psi = a * p + psi;
 | 
			
		||||
      p = p * b + r;
 | 
			
		||||
 | 
			
		||||
      LinalgTimer.Stop();
 | 
			
		||||
      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
 | 
			
		||||
                << " residual " << cp << " target " << rsq << std::endl;
 | 
			
		||||
 | 
			
		||||
      // Stopping condition
 | 
			
		||||
      if (cp <= rsq) {
 | 
			
		||||
        SolverTimer.Stop();
 | 
			
		||||
        Linop.HermOpAndNorm(psi, mmp, d, qq);
 | 
			
		||||
        p = mmp - src;
 | 
			
		||||
 | 
			
		||||
        RealD mmpnorm = sqrt(norm2(mmp));
 | 
			
		||||
        RealD psinorm = sqrt(norm2(psi));
 | 
			
		||||
        RealD srcnorm = sqrt(norm2(src));
 | 
			
		||||
        RealD resnorm = sqrt(norm2(p));
 | 
			
		||||
        RealD true_residual = resnorm / srcnorm;
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage
 | 
			
		||||
                  << "ConjugateGradient: Converged on iteration " << k << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
 | 
			
		||||
                  << " true residual " << true_residual << " target "
 | 
			
		||||
                  << Tolerance << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage << "Time elapsed: Iterations "
 | 
			
		||||
                  << SolverTimer.Elapsed() << " Matrix  "
 | 
			
		||||
                  << MatrixTimer.Elapsed() << " Linalg "
 | 
			
		||||
                  << LinalgTimer.Elapsed();
 | 
			
		||||
        std::cout << std::endl;
 | 
			
		||||
 | 
			
		||||
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
 | 
			
		||||
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
 | 
			
		||||
              << std::endl;
 | 
			
		||||
    if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										142
									
								
								lib/algorithms/iterative/ConjugateGradientMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								lib/algorithms/iterative/ConjugateGradientMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,142 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
 | 
			
		||||
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  //Mixed precision restarted defect correction CG
 | 
			
		||||
  template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
 | 
			
		||||
  class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
 | 
			
		||||
  public:                                                
 | 
			
		||||
    RealD   Tolerance;
 | 
			
		||||
    Integer MaxInnerIterations;
 | 
			
		||||
    Integer MaxOuterIterations;
 | 
			
		||||
    GridBase* SinglePrecGrid; //Grid for single-precision fields
 | 
			
		||||
    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
 | 
			
		||||
    LinearOperatorBase<FieldF> &Linop_f;
 | 
			
		||||
    LinearOperatorBase<FieldD> &Linop_d;
 | 
			
		||||
 | 
			
		||||
    //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
 | 
			
		||||
    LinearFunction<FieldF> *guesser;
 | 
			
		||||
    
 | 
			
		||||
    MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
 | 
			
		||||
      Linop_f(_Linop_f), Linop_d(_Linop_d),
 | 
			
		||||
      Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
 | 
			
		||||
      OuterLoopNormMult(100.), guesser(NULL){ };
 | 
			
		||||
 | 
			
		||||
    void useGuesser(LinearFunction<FieldF> &g){
 | 
			
		||||
      guesser = &g;
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
    void operator() (const FieldD &src_d_in, FieldD &sol_d){
 | 
			
		||||
      GridStopWatch TotalTimer;
 | 
			
		||||
      TotalTimer.Start();
 | 
			
		||||
    
 | 
			
		||||
      int cb = src_d_in.checkerboard;
 | 
			
		||||
      sol_d.checkerboard = cb;
 | 
			
		||||
    
 | 
			
		||||
      RealD src_norm = norm2(src_d_in);
 | 
			
		||||
      RealD stop = src_norm * Tolerance*Tolerance;
 | 
			
		||||
 | 
			
		||||
      GridBase* DoublePrecGrid = src_d_in._grid;
 | 
			
		||||
      FieldD tmp_d(DoublePrecGrid);
 | 
			
		||||
      tmp_d.checkerboard = cb;
 | 
			
		||||
    
 | 
			
		||||
      FieldD tmp2_d(DoublePrecGrid);
 | 
			
		||||
      tmp2_d.checkerboard = cb;
 | 
			
		||||
    
 | 
			
		||||
      FieldD src_d(DoublePrecGrid);
 | 
			
		||||
      src_d = src_d_in; //source for next inner iteration, computed from residual during operation
 | 
			
		||||
    
 | 
			
		||||
      RealD inner_tol = Tolerance;
 | 
			
		||||
    
 | 
			
		||||
      FieldF src_f(SinglePrecGrid);
 | 
			
		||||
      src_f.checkerboard = cb;
 | 
			
		||||
    
 | 
			
		||||
      FieldF sol_f(SinglePrecGrid);
 | 
			
		||||
      sol_f.checkerboard = cb;
 | 
			
		||||
    
 | 
			
		||||
      ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
 | 
			
		||||
      CG_f.ErrorOnNoConverge = false;
 | 
			
		||||
 | 
			
		||||
      GridStopWatch InnerCGtimer;
 | 
			
		||||
 | 
			
		||||
      GridStopWatch PrecChangeTimer;
 | 
			
		||||
    
 | 
			
		||||
      for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
 | 
			
		||||
	//Compute double precision rsd and also new RHS vector.
 | 
			
		||||
	Linop_d.HermOp(sol_d, tmp_d);
 | 
			
		||||
	RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
 | 
			
		||||
      
 | 
			
		||||
	std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
 | 
			
		||||
 | 
			
		||||
	if(norm < OuterLoopNormMult * stop){
 | 
			
		||||
	  std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
 | 
			
		||||
	  break;
 | 
			
		||||
	}
 | 
			
		||||
	while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ??
 | 
			
		||||
 | 
			
		||||
	PrecChangeTimer.Start();
 | 
			
		||||
	precisionChange(src_f, src_d);
 | 
			
		||||
	PrecChangeTimer.Stop();
 | 
			
		||||
      
 | 
			
		||||
	zeroit(sol_f);
 | 
			
		||||
 | 
			
		||||
	//Optionally improve inner solver guess (eg using known eigenvectors)
 | 
			
		||||
	if(guesser != NULL)
 | 
			
		||||
	  (*guesser)(src_f, sol_f);
 | 
			
		||||
 | 
			
		||||
	//Inner CG
 | 
			
		||||
	CG_f.Tolerance = inner_tol;
 | 
			
		||||
	InnerCGtimer.Start();
 | 
			
		||||
	CG_f(Linop_f, src_f, sol_f);
 | 
			
		||||
	InnerCGtimer.Stop();
 | 
			
		||||
      
 | 
			
		||||
	//Convert sol back to double and add to double prec solution
 | 
			
		||||
	PrecChangeTimer.Start();
 | 
			
		||||
	precisionChange(tmp_d, sol_f);
 | 
			
		||||
	PrecChangeTimer.Stop();
 | 
			
		||||
      
 | 
			
		||||
	axpy(sol_d, 1.0, tmp_d, sol_d);
 | 
			
		||||
      }
 | 
			
		||||
    
 | 
			
		||||
      //Final trial CG
 | 
			
		||||
      std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
 | 
			
		||||
    
 | 
			
		||||
      ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
 | 
			
		||||
      CG_d(Linop_d, src_d_in, sol_d);
 | 
			
		||||
 | 
			
		||||
      TotalTimer.Stop();
 | 
			
		||||
      std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
 | 
			
		||||
#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
 | 
			
		||||
 | 
			
		||||
@@ -246,7 +274,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
  }
 | 
			
		||||
  // ugly hack
 | 
			
		||||
  std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
 | 
			
		||||
  assert(0);
 | 
			
		||||
//  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ConjugateResidual.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_RESIDUAL_H
 | 
			
		||||
#define GRID_CONJUGATE_RESIDUAL_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/DenseMatrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_DENSE_MATRIX_H
 | 
			
		||||
#define GRID_DENSE_MATRIX_H
 | 
			
		||||
 | 
			
		||||
@@ -102,8 +130,8 @@ DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st,
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#include <algorithms/iterative/Householder.h>
 | 
			
		||||
#include <algorithms/iterative/Francis.h>
 | 
			
		||||
#include "Householder.h"
 | 
			
		||||
#include "Francis.h"
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/EigenSort.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_EIGENSORT_H
 | 
			
		||||
#define GRID_EIGENSORT_H
 | 
			
		||||
 | 
			
		||||
@@ -11,32 +38,34 @@ template<class Field>
 | 
			
		||||
class SortEigen {
 | 
			
		||||
 private:
 | 
			
		||||
  
 | 
			
		||||
//hacking for testing for now
 | 
			
		||||
 private:
 | 
			
		||||
  static bool less_lmd(RealD left,RealD right){
 | 
			
		||||
    return fabs(left) < fabs(right);
 | 
			
		||||
    return left > right;
 | 
			
		||||
  }  
 | 
			
		||||
  static bool less_pair(std::pair<RealD,Field>& left,
 | 
			
		||||
		 std::pair<RealD,Field>& right){
 | 
			
		||||
    return fabs(left.first) < fabs(right.first);
 | 
			
		||||
  static bool less_pair(std::pair<RealD,Field const*>& left,
 | 
			
		||||
                        std::pair<RealD,Field const*>& right){
 | 
			
		||||
    return left.first > (right.first);
 | 
			
		||||
  }  
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  void push(DenseVector<RealD>& lmd,
 | 
			
		||||
	    DenseVector<Field>& evec,int N) {
 | 
			
		||||
 | 
			
		||||
    DenseVector<std::pair<RealD, Field> > emod;
 | 
			
		||||
    typename DenseVector<std::pair<RealD, Field> >::iterator it;
 | 
			
		||||
            DenseVector<Field>& evec,int N) {
 | 
			
		||||
    DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
 | 
			
		||||
    for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
 | 
			
		||||
    
 | 
			
		||||
    for(int i=0;i<lmd.size();++i){
 | 
			
		||||
      emod.push_back(std::pair<RealD,Field>(lmd[i],evec[i]));
 | 
			
		||||
    }
 | 
			
		||||
    DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());    
 | 
			
		||||
    for(int i=0;i<lmd.size();++i)
 | 
			
		||||
      emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
 | 
			
		||||
 | 
			
		||||
    partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
 | 
			
		||||
 | 
			
		||||
    it=emod.begin();
 | 
			
		||||
    typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
 | 
			
		||||
    for(int i=0;i<N;++i){
 | 
			
		||||
      lmd[i]=it->first;
 | 
			
		||||
      evec[i]=it->second;
 | 
			
		||||
      evec[i]=*(it->second);
 | 
			
		||||
      ++it;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/Francis.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef FRANCIS_H
 | 
			
		||||
#define FRANCIS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/Householder.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef HOUSEHOLDER_H
 | 
			
		||||
#define HOUSEHOLDER_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,40 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_IRL_H
 | 
			
		||||
#define GRID_IRL_H
 | 
			
		||||
 | 
			
		||||
#include <algorithms/iterative/DenseMatrix.h>
 | 
			
		||||
#include <algorithms/iterative/EigenSort.h>
 | 
			
		||||
#include <string.h> //memset
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
#include <lapacke.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "DenseMatrix.h"
 | 
			
		||||
#include "EigenSort.h"
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
@@ -21,6 +53,7 @@ public:
 | 
			
		||||
    int Niter;
 | 
			
		||||
    int converged;
 | 
			
		||||
 | 
			
		||||
    int Nstop;   // Number of evecs checked for convergence
 | 
			
		||||
    int Nk;      // Number of converged sought
 | 
			
		||||
    int Np;      // Np -- Number of spare vecs in kryloc space
 | 
			
		||||
    int Nm;      // Nm -- total number of vectors
 | 
			
		||||
@@ -29,6 +62,8 @@ public:
 | 
			
		||||
 | 
			
		||||
    SortEigen<Field> _sort;
 | 
			
		||||
 | 
			
		||||
//    GridCartesian &_fgrid;
 | 
			
		||||
 | 
			
		||||
    LinearOperatorBase<Field> &_Linop;
 | 
			
		||||
 | 
			
		||||
    OperatorFunction<Field>   &_poly;
 | 
			
		||||
@@ -39,7 +74,27 @@ public:
 | 
			
		||||
    void init(void){};
 | 
			
		||||
    void Abort(int ff, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs);
 | 
			
		||||
 | 
			
		||||
    ImplicitlyRestartedLanczos(LinearOperatorBase<Field> &Linop, // op
 | 
			
		||||
    ImplicitlyRestartedLanczos(
 | 
			
		||||
				LinearOperatorBase<Field> &Linop, // op
 | 
			
		||||
			       OperatorFunction<Field> & poly,   // polynmial
 | 
			
		||||
			       int _Nstop, // sought vecs
 | 
			
		||||
			       int _Nk, // sought vecs
 | 
			
		||||
			       int _Nm, // spare vecs
 | 
			
		||||
			       RealD _eresid, // resid in lmdue deficit 
 | 
			
		||||
			       int _Niter) : // Max iterations
 | 
			
		||||
      _Linop(Linop),
 | 
			
		||||
      _poly(poly),
 | 
			
		||||
      Nstop(_Nstop),
 | 
			
		||||
      Nk(_Nk),
 | 
			
		||||
      Nm(_Nm),
 | 
			
		||||
      eresid(_eresid),
 | 
			
		||||
      Niter(_Niter)
 | 
			
		||||
    { 
 | 
			
		||||
      Np = Nm-Nk; assert(Np>0);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    ImplicitlyRestartedLanczos(
 | 
			
		||||
				LinearOperatorBase<Field> &Linop, // op
 | 
			
		||||
			       OperatorFunction<Field> & poly,   // polynmial
 | 
			
		||||
			       int _Nk, // sought vecs
 | 
			
		||||
			       int _Nm, // spare vecs
 | 
			
		||||
@@ -47,6 +102,7 @@ public:
 | 
			
		||||
			       int _Niter) : // Max iterations
 | 
			
		||||
      _Linop(Linop),
 | 
			
		||||
      _poly(poly),
 | 
			
		||||
      Nstop(_Nk),
 | 
			
		||||
      Nk(_Nk),
 | 
			
		||||
      Nm(_Nm),
 | 
			
		||||
      eresid(_eresid),
 | 
			
		||||
@@ -114,10 +170,11 @@ public:
 | 
			
		||||
      RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
                                 // 7. vk+1 := wk/βk+1
 | 
			
		||||
 | 
			
		||||
//	std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl;
 | 
			
		||||
      const RealD tiny = 1.0e-20;
 | 
			
		||||
      if ( beta < tiny ) { 
 | 
			
		||||
	std::cout << " beta is tiny "<<beta<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
     }
 | 
			
		||||
      lmd[k] = alph;
 | 
			
		||||
      lme[k]  = beta;
 | 
			
		||||
 | 
			
		||||
@@ -191,15 +248,122 @@ public:
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
    void diagonalize_lapack(DenseVector<RealD>& lmd,
 | 
			
		||||
		     DenseVector<RealD>& lme, 
 | 
			
		||||
		     int N1,
 | 
			
		||||
		     int N2,
 | 
			
		||||
		     DenseVector<RealD>& Qt,
 | 
			
		||||
		     GridBase *grid){
 | 
			
		||||
  const int size = Nm;
 | 
			
		||||
//  tevals.resize(size);
 | 
			
		||||
//  tevecs.resize(size);
 | 
			
		||||
  int NN = N1;
 | 
			
		||||
  double evals_tmp[NN];
 | 
			
		||||
  double evec_tmp[NN][NN];
 | 
			
		||||
  memset(evec_tmp[0],0,sizeof(double)*NN*NN);
 | 
			
		||||
//  double AA[NN][NN];
 | 
			
		||||
  double DD[NN];
 | 
			
		||||
  double EE[NN];
 | 
			
		||||
  for (int i = 0; i< NN; i++)
 | 
			
		||||
    for (int j = i - 1; j <= i + 1; j++)
 | 
			
		||||
      if ( j < NN && j >= 0 ) {
 | 
			
		||||
        if (i==j) DD[i] = lmd[i];
 | 
			
		||||
        if (i==j) evals_tmp[i] = lmd[i];
 | 
			
		||||
        if (j==(i-1)) EE[j] = lme[j];
 | 
			
		||||
      }
 | 
			
		||||
  int evals_found;
 | 
			
		||||
  int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
 | 
			
		||||
  int liwork =  3+NN*10 ;
 | 
			
		||||
  int iwork[liwork];
 | 
			
		||||
  double work[lwork];
 | 
			
		||||
  int isuppz[2*NN];
 | 
			
		||||
  char jobz = 'V'; // calculate evals & evecs
 | 
			
		||||
  char range = 'I'; // calculate all evals
 | 
			
		||||
  //    char range = 'A'; // calculate all evals
 | 
			
		||||
  char uplo = 'U'; // refer to upper half of original matrix
 | 
			
		||||
  char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
 | 
			
		||||
  int ifail[NN];
 | 
			
		||||
  int info;
 | 
			
		||||
//  int total = QMP_get_number_of_nodes();
 | 
			
		||||
//  int node = QMP_get_node_number();
 | 
			
		||||
//  GridBase *grid = evec[0]._grid;
 | 
			
		||||
  int total = grid->_Nprocessors;
 | 
			
		||||
  int node = grid->_processor;
 | 
			
		||||
  int interval = (NN/total)+1;
 | 
			
		||||
  double vl = 0.0, vu = 0.0;
 | 
			
		||||
  int il = interval*node+1 , iu = interval*(node+1);
 | 
			
		||||
  if (iu > NN)  iu=NN;
 | 
			
		||||
  double tol = 0.0;
 | 
			
		||||
    if (1) {
 | 
			
		||||
      memset(evals_tmp,0,sizeof(double)*NN);
 | 
			
		||||
      if ( il <= NN){
 | 
			
		||||
        printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu);
 | 
			
		||||
        LAPACK_dstegr(&jobz, &range, &NN,
 | 
			
		||||
            (double*)DD, (double*)EE,
 | 
			
		||||
            &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
 | 
			
		||||
            &tol, // tolerance
 | 
			
		||||
            &evals_found, evals_tmp, (double*)evec_tmp, &NN,
 | 
			
		||||
            isuppz,
 | 
			
		||||
            work, &lwork, iwork, &liwork,
 | 
			
		||||
            &info);
 | 
			
		||||
        for (int i = iu-1; i>= il-1; i--){
 | 
			
		||||
          printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]);
 | 
			
		||||
          evals_tmp[i] = evals_tmp[i - (il-1)];
 | 
			
		||||
          if (il>1) evals_tmp[i-(il-1)]=0.;
 | 
			
		||||
          for (int j = 0; j< NN; j++){
 | 
			
		||||
            evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
 | 
			
		||||
            if (il>1) evec_tmp[i-(il-1)][j]=0.;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      {
 | 
			
		||||
//        QMP_sum_double_array(evals_tmp,NN);
 | 
			
		||||
//        QMP_sum_double_array((double *)evec_tmp,NN*NN);
 | 
			
		||||
         grid->GlobalSumVector(evals_tmp,NN);
 | 
			
		||||
         grid->GlobalSumVector((double*)evec_tmp,NN*NN);
 | 
			
		||||
      }
 | 
			
		||||
    } 
 | 
			
		||||
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
 | 
			
		||||
  for(int i=0;i<NN;i++){
 | 
			
		||||
    for(int j=0;j<NN;j++)
 | 
			
		||||
      Qt[(NN-1-i)*N2+j]=evec_tmp[i][j];
 | 
			
		||||
      lmd [NN-1-i]=evals_tmp[i];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    void diagonalize(DenseVector<RealD>& lmd,
 | 
			
		||||
		     DenseVector<RealD>& lme, 
 | 
			
		||||
		     int Nm2,
 | 
			
		||||
		     int Nm,
 | 
			
		||||
		     DenseVector<RealD>& Qt)
 | 
			
		||||
		     int N2,
 | 
			
		||||
		     int N1,
 | 
			
		||||
		     DenseVector<RealD>& Qt,
 | 
			
		||||
		     GridBase *grid)
 | 
			
		||||
    {
 | 
			
		||||
      int Niter = 100*Nm;
 | 
			
		||||
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
    const int check_lapack=0; // just use lapack if 0, check against lapack if 1
 | 
			
		||||
 | 
			
		||||
    if(!check_lapack)
 | 
			
		||||
	return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
 | 
			
		||||
 | 
			
		||||
	DenseVector <RealD> lmd2(N1);
 | 
			
		||||
	DenseVector <RealD> lme2(N1);
 | 
			
		||||
	DenseVector<RealD> Qt2(N1*N1);
 | 
			
		||||
         for(int k=0; k<N1; ++k){
 | 
			
		||||
	    lmd2[k] = lmd[k];
 | 
			
		||||
	    lme2[k] = lme[k];
 | 
			
		||||
	  }
 | 
			
		||||
         for(int k=0; k<N1*N1; ++k)
 | 
			
		||||
	Qt2[k] = Qt[k];
 | 
			
		||||
 | 
			
		||||
//	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
      int Niter = 100*N1;
 | 
			
		||||
      int kmin = 1;
 | 
			
		||||
      int kmax = Nk;
 | 
			
		||||
      int kmax = N2;
 | 
			
		||||
      // (this should be more sophisticated)
 | 
			
		||||
 | 
			
		||||
      for(int iter=0; iter<Niter; ++iter){
 | 
			
		||||
@@ -211,7 +375,7 @@ public:
 | 
			
		||||
	// (Dsh: shift)
 | 
			
		||||
	
 | 
			
		||||
	// transformation
 | 
			
		||||
	qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax);
 | 
			
		||||
	qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
 | 
			
		||||
	
 | 
			
		||||
	// Convergence criterion (redef of kmin and kamx)
 | 
			
		||||
	for(int j=kmax-1; j>= kmin; --j){
 | 
			
		||||
@@ -222,6 +386,23 @@ public:
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	Niter = iter;
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
    if(check_lapack){
 | 
			
		||||
	const double SMALL=1e-8;
 | 
			
		||||
	diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
 | 
			
		||||
	DenseVector <RealD> lmd3(N2);
 | 
			
		||||
         for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
 | 
			
		||||
        _sort.push(lmd3,N2);
 | 
			
		||||
        _sort.push(lmd2,N2);
 | 
			
		||||
         for(int k=0; k<N2; ++k){
 | 
			
		||||
	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
 | 
			
		||||
//	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
 | 
			
		||||
	  }
 | 
			
		||||
         for(int k=0; k<N1*N1; ++k){
 | 
			
		||||
//	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
 | 
			
		||||
	}
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
	return;
 | 
			
		||||
 | 
			
		||||
      continued:
 | 
			
		||||
@@ -237,6 +418,7 @@ public:
 | 
			
		||||
      abort();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#if 1
 | 
			
		||||
    static RealD normalise(Field& v) 
 | 
			
		||||
    {
 | 
			
		||||
      RealD nn = norm2(v);
 | 
			
		||||
@@ -298,6 +480,7 @@ until convergence
 | 
			
		||||
      {
 | 
			
		||||
 | 
			
		||||
	GridBase *grid = evec[0]._grid;
 | 
			
		||||
	assert(grid == src._grid);
 | 
			
		||||
 | 
			
		||||
	std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
 | 
			
		||||
	std::cout << " -- Nm = " << Nm << std::endl;
 | 
			
		||||
@@ -328,11 +511,21 @@ until convergence
 | 
			
		||||
	// (uniform vector) Why not src??
 | 
			
		||||
	//	evec[0] = 1.0;
 | 
			
		||||
	evec[0] = src;
 | 
			
		||||
	std:: cout <<"norm2(src)= " << norm2(src)<<std::endl;
 | 
			
		||||
// << src._grid  << std::endl;
 | 
			
		||||
	normalise(evec[0]);
 | 
			
		||||
	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
 | 
			
		||||
// << evec[0]._grid << std::endl;
 | 
			
		||||
	
 | 
			
		||||
	// Initial Nk steps
 | 
			
		||||
	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
 | 
			
		||||
//	std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
 | 
			
		||||
//	std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
 | 
			
		||||
	RitzMatrix(evec,Nk);
 | 
			
		||||
	for(int k=0; k<Nk; ++k){
 | 
			
		||||
//	std:: cout <<"eval " << k << " " <<eval[k] << std::endl;
 | 
			
		||||
//	std:: cout <<"lme " << k << " " << lme[k] << std::endl;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Restarting loop begins
 | 
			
		||||
	for(int iter = 0; iter<Niter; ++iter){
 | 
			
		||||
@@ -354,20 +547,24 @@ until convergence
 | 
			
		||||
	    lme2[k] = lme[k+k1-1];
 | 
			
		||||
	  }
 | 
			
		||||
	  setUnit_Qt(Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nm,Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
 | 
			
		||||
 | 
			
		||||
	  // sorting
 | 
			
		||||
	  _sort.push(eval2,Nm);
 | 
			
		||||
	  
 | 
			
		||||
	  // Implicitly shifted QR transformations
 | 
			
		||||
	  setUnit_Qt(Nm,Qt);
 | 
			
		||||
	  for(int ip=k2; ip<Nm; ++ip) 
 | 
			
		||||
	  for(int ip=k2; ip<Nm; ++ip){ 
 | 
			
		||||
	std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
 | 
			
		||||
	    qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
    
 | 
			
		||||
	  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
 | 
			
		||||
	  
 | 
			
		||||
	  for(int j=k1-1; j<k2+1; ++j){
 | 
			
		||||
	    for(int k=0; k<Nm; ++k){
 | 
			
		||||
	    B[j].checkerboard = evec[k].checkerboard;
 | 
			
		||||
	      B[j] += Qt[k+Nm*j] * evec[k];
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
@@ -390,21 +587,25 @@ until convergence
 | 
			
		||||
	    lme2[k] = lme[k];
 | 
			
		||||
	  }
 | 
			
		||||
	  setUnit_Qt(Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nk,Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
 | 
			
		||||
	  
 | 
			
		||||
	  for(int k = 0; k<Nk; ++k) B[k]=0.0;
 | 
			
		||||
	  
 | 
			
		||||
	  for(int j = 0; j<Nk; ++j){
 | 
			
		||||
	    for(int k = 0; k<Nk; ++k){
 | 
			
		||||
	    B[j].checkerboard = evec[k].checkerboard;
 | 
			
		||||
	      B[j] += Qt[k+j*Nm] * evec[k];
 | 
			
		||||
	    }
 | 
			
		||||
//	    std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
 | 
			
		||||
	  }
 | 
			
		||||
//	_sort.push(eval2,B,Nk);
 | 
			
		||||
 | 
			
		||||
	  Nconv = 0;
 | 
			
		||||
	  //	  std::cout << std::setiosflags(std::ios_base::scientific);
 | 
			
		||||
	  for(int i=0; i<Nk; ++i){
 | 
			
		||||
 | 
			
		||||
	    _poly(_Linop,B[i],v);
 | 
			
		||||
//	    _poly(_Linop,B[i],v);
 | 
			
		||||
	    _Linop.HermOp(B[i],v);
 | 
			
		||||
	    
 | 
			
		||||
	    RealD vnum = real(innerProduct(B[i],v)); // HermOp.
 | 
			
		||||
	    RealD vden = norm2(B[i]);
 | 
			
		||||
@@ -412,11 +613,13 @@ until convergence
 | 
			
		||||
	    v -= eval2[i]*B[i];
 | 
			
		||||
	    RealD vv = norm2(v);
 | 
			
		||||
	    
 | 
			
		||||
	    std::cout.precision(13);
 | 
			
		||||
	    std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
 | 
			
		||||
	    std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
 | 
			
		||||
	    std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
 | 
			
		||||
	    
 | 
			
		||||
	    if(vv<eresid*eresid){
 | 
			
		||||
	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
 | 
			
		||||
	    if((vv<eresid*eresid) && (i == Nconv) ){
 | 
			
		||||
	      Iconv[Nconv] = i;
 | 
			
		||||
	      ++Nconv;
 | 
			
		||||
	    }
 | 
			
		||||
@@ -427,7 +630,7 @@ until convergence
 | 
			
		||||
 | 
			
		||||
	  std::cout<<" #modes converged: "<<Nconv<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  if( Nconv>=Nk ){
 | 
			
		||||
	  if( Nconv>=Nstop ){
 | 
			
		||||
	    goto converged;
 | 
			
		||||
	  }
 | 
			
		||||
	} // end of iter loop
 | 
			
		||||
@@ -436,21 +639,20 @@ until convergence
 | 
			
		||||
	abort();
 | 
			
		||||
	
 | 
			
		||||
      converged:
 | 
			
		||||
	// Sorting
 | 
			
		||||
	
 | 
			
		||||
	eval.clear();
 | 
			
		||||
	evec.clear();
 | 
			
		||||
	for(int i=0; i<Nconv; ++i){
 | 
			
		||||
	  eval.push_back(eval2[Iconv[i]]);
 | 
			
		||||
	  evec.push_back(B[Iconv[i]]);
 | 
			
		||||
	}
 | 
			
		||||
	_sort.push(eval,evec,Nconv);
 | 
			
		||||
	
 | 
			
		||||
	std::cout << "\n Converged\n Summary :\n";
 | 
			
		||||
	std::cout << " -- Iterations  = "<< Nconv  << "\n";
 | 
			
		||||
	std::cout << " -- beta(k)     = "<< beta_k << "\n";
 | 
			
		||||
	std::cout << " -- Nconv       = "<< Nconv  << "\n";
 | 
			
		||||
      }
 | 
			
		||||
       // Sorting
 | 
			
		||||
       eval.resize(Nconv);
 | 
			
		||||
       evec.resize(Nconv,grid);
 | 
			
		||||
       for(int i=0; i<Nconv; ++i){
 | 
			
		||||
         eval[i] = eval2[Iconv[i]];
 | 
			
		||||
         evec[i] = B[Iconv[i]];
 | 
			
		||||
       }
 | 
			
		||||
      _sort.push(eval,evec,Nconv);
 | 
			
		||||
 | 
			
		||||
      std::cout << "\n Converged\n Summary :\n";
 | 
			
		||||
      std::cout << " -- Iterations  = "<< Nconv  << "\n";
 | 
			
		||||
      std::cout << " -- beta(k)     = "<< beta_k << "\n";
 | 
			
		||||
      std::cout << " -- Nconv       = "<< Nconv  << "\n";
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////
 | 
			
		||||
    // Adapted from Rudy's lanczos factor routine
 | 
			
		||||
@@ -997,6 +1199,7 @@ static void Lock(DenseMatrix<T> &H, 	///Hess mtx
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/Matrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef MATRIX_H
 | 
			
		||||
#define MATRIX_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/MatrixUtils.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATRIX_UTILS_H
 | 
			
		||||
#define GRID_MATRIX_UTILS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/NormalEquations.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_NORMAL_EQUATIONS_H
 | 
			
		||||
#define GRID_NORMAL_EQUATIONS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/PrecConjugateResidual.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
 | 
			
		||||
#define GRID_PREC_CONJUGATE_RESIDUAL_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PREC_GCR_H
 | 
			
		||||
#define GRID_PREC_GCR_H
 | 
			
		||||
 | 
			
		||||
@@ -19,6 +47,10 @@ namespace Grid {
 | 
			
		||||
    int mmax;
 | 
			
		||||
    int nstep;
 | 
			
		||||
    int steps;
 | 
			
		||||
    GridStopWatch PrecTimer;
 | 
			
		||||
    GridStopWatch MatTimer;
 | 
			
		||||
    GridStopWatch LinalgTimer;
 | 
			
		||||
 | 
			
		||||
    LinearFunction<Field> &Preconditioner;
 | 
			
		||||
 | 
			
		||||
   PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
 | 
			
		||||
@@ -40,14 +72,24 @@ namespace Grid {
 | 
			
		||||
      
 | 
			
		||||
      Field r(src._grid);
 | 
			
		||||
 | 
			
		||||
        PrecTimer.Reset();
 | 
			
		||||
         MatTimer.Reset();
 | 
			
		||||
      LinalgTimer.Reset();
 | 
			
		||||
 | 
			
		||||
      GridStopWatch SolverTimer;
 | 
			
		||||
      SolverTimer.Start();
 | 
			
		||||
 | 
			
		||||
      steps=0;
 | 
			
		||||
      for(int k=0;k<MaxIterations;k++){
 | 
			
		||||
 | 
			
		||||
	cp=GCRnStep(Linop,src,psi,rsq);
 | 
			
		||||
 | 
			
		||||
	if ( verbose ) std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
 | 
			
		||||
 | 
			
		||||
	if(cp<rsq) {
 | 
			
		||||
 | 
			
		||||
	  SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
	  Linop.HermOp(psi,r);
 | 
			
		||||
	  axpy(r,-1.0,src,r);
 | 
			
		||||
	  RealD tr = norm2(r);
 | 
			
		||||
@@ -55,6 +97,11 @@ namespace Grid {
 | 
			
		||||
		   << " computed residual "<<sqrt(cp/ssq)
 | 
			
		||||
	           << " true residual "    <<sqrt(tr/ssq)
 | 
			
		||||
	           << " target "           <<Tolerance <<std::endl;
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total  "<< SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<<   PrecTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<<    MatTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@@ -62,6 +109,7 @@ namespace Grid {
 | 
			
		||||
      std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
 | 
			
		||||
 | 
			
		||||
      RealD cp;
 | 
			
		||||
@@ -88,24 +136,25 @@ namespace Grid {
 | 
			
		||||
      // initial guess x0 is taken as nonzero.
 | 
			
		||||
      // r0=src-A x0 = src
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(psi,Az,zAz,zAAz); 
 | 
			
		||||
      MatTimer.Stop();
 | 
			
		||||
      r=src-Az;
 | 
			
		||||
      
 | 
			
		||||
      /////////////////////
 | 
			
		||||
      // p = Prec(r)
 | 
			
		||||
      /////////////////////
 | 
			
		||||
      PrecTimer.Start();
 | 
			
		||||
      Preconditioner(r,z);
 | 
			
		||||
      PrecTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner in " << norm2(r)<<std::endl; 
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner out " << norm2(z)<<std::endl; 
 | 
			
		||||
      
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOp(z,tmp); 
 | 
			
		||||
      MatTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner Aout " << norm2(tmp)<<std::endl; 
 | 
			
		||||
      ttmp=tmp;
 | 
			
		||||
      tmp=tmp-r;
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner resid " << std::sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 | 
			
		||||
      /*
 | 
			
		||||
      std::cout<<GridLogMessage<<r<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<z<<std::endl;
 | 
			
		||||
@@ -113,7 +162,9 @@ namespace Grid {
 | 
			
		||||
      std::cout<<GridLogMessage<<tmp<<std::endl;
 | 
			
		||||
      */
 | 
			
		||||
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
 | 
			
		||||
      MatTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      //p[0],q[0],qq[0] 
 | 
			
		||||
      p[0]= z;
 | 
			
		||||
@@ -137,18 +188,22 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
	cp = axpy_norm(r,-a,q[peri_k],r);  
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage<< " VPGCR_step resid" <<sqrt(cp/rsq)<<std::endl; 
 | 
			
		||||
	if((k==nstep-1)||(cp<rsq)){
 | 
			
		||||
	  return cp;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"]  resid " <<sqrt(cp/rsq)<<std::endl; 
 | 
			
		||||
 | 
			
		||||
	PrecTimer.Start();
 | 
			
		||||
	Preconditioner(r,z);// solve Az = r
 | 
			
		||||
	PrecTimer.Stop();
 | 
			
		||||
 | 
			
		||||
	MatTimer.Start();
 | 
			
		||||
	Linop.HermOpAndNorm(z,Az,zAz,zAAz);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	Linop.HermOp(z,tmp);
 | 
			
		||||
	MatTimer.Stop();
 | 
			
		||||
        tmp=tmp-r;
 | 
			
		||||
	std::cout<<GridLogMessage<< " Preconditioner resid" <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 | 
			
		||||
	std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 | 
			
		||||
 | 
			
		||||
	q[peri_kp]=Az;
 | 
			
		||||
	p[peri_kp]=z;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
#define GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
 | 
			
		||||
@@ -75,7 +102,9 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,in);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,in);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,sol_e,out);
 | 
			
		||||
      pickCheckerboard(Odd ,sol_o,out);
 | 
			
		||||
    
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,34 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cartesian/Cartesian_base.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_BASE_H
 | 
			
		||||
#define GRID_CARTESIAN_BASE_H
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
 | 
			
		||||
@@ -50,15 +77,12 @@ public:
 | 
			
		||||
    // GridCartesian / GridRedBlackCartesian
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
    virtual int CheckerBoarded(int dim)=0;
 | 
			
		||||
    virtual int CheckerBoard(std::vector<int> site)=0;
 | 
			
		||||
    virtual int CheckerBoard(std::vector<int> &site)=0;
 | 
			
		||||
    virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
 | 
			
		||||
    virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
 | 
			
		||||
    virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
 | 
			
		||||
    int  CheckerBoardFromOindex (int Oindex){
 | 
			
		||||
      std::vector<int> ocoor;
 | 
			
		||||
      oCoorFromOindex(ocoor,Oindex); 
 | 
			
		||||
      return CheckerBoard(ocoor);
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoardFromOindex (int Oindex)=0;
 | 
			
		||||
    virtual int CheckerBoardFromOindexTable (int Oindex)=0;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Local layout calculations
 | 
			
		||||
@@ -79,6 +103,12 @@ public:
 | 
			
		||||
        for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
 | 
			
		||||
        return idx;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int iIndex(std::vector<int> &lcoor)
 | 
			
		||||
    {
 | 
			
		||||
        int idx=0;
 | 
			
		||||
        for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
 | 
			
		||||
        return idx;
 | 
			
		||||
    }
 | 
			
		||||
    inline int oIndexReduced(std::vector<int> &ocoor)
 | 
			
		||||
    {
 | 
			
		||||
      int idx=0; 
 | 
			
		||||
@@ -87,45 +117,42 @@ public:
 | 
			
		||||
      for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d];
 | 
			
		||||
      return idx;
 | 
			
		||||
    }
 | 
			
		||||
    static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
 | 
			
		||||
      int nd= dims.size();
 | 
			
		||||
      coor.resize(nd);
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	coor[d] = index % dims[d];
 | 
			
		||||
	index   = index / dims[d];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
 | 
			
		||||
      CoorFromIndex(coor,Oindex,_rdimensions);
 | 
			
		||||
    }
 | 
			
		||||
    static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
 | 
			
		||||
      int nd=dims.size();
 | 
			
		||||
      int stride=1;
 | 
			
		||||
      index=0;
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	index = index+stride*coor[d];
 | 
			
		||||
	stride=stride*dims[d];
 | 
			
		||||
      }
 | 
			
		||||
      Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    // SIMD lane addressing
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    inline int iIndex(std::vector<int> &lcoor)
 | 
			
		||||
    {
 | 
			
		||||
        int idx=0;
 | 
			
		||||
        for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
 | 
			
		||||
        return idx;
 | 
			
		||||
    }
 | 
			
		||||
    inline void iCoorFromIindex(std::vector<int> &coor,int lane)
 | 
			
		||||
    {
 | 
			
		||||
      CoorFromIndex(coor,lane,_simd_layout);
 | 
			
		||||
      Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
 | 
			
		||||
    }
 | 
			
		||||
    inline int PermuteDim(int dimension){
 | 
			
		||||
      return _simd_layout[dimension]>1;
 | 
			
		||||
    }
 | 
			
		||||
    inline int PermuteType(int dimension){
 | 
			
		||||
      int permute_type=0;
 | 
			
		||||
      //
 | 
			
		||||
      // FIXME:
 | 
			
		||||
      //
 | 
			
		||||
      // Best way to encode this would be to present a mask 
 | 
			
		||||
      // for which simd dimensions are rotated, and the rotation
 | 
			
		||||
      // size. If there is only one simd dimension rotated, this is just 
 | 
			
		||||
      // a permute. 
 | 
			
		||||
      //
 | 
			
		||||
      // Cases: PermuteType == 1,2,4,8
 | 
			
		||||
      // Distance should be either 0,1,2..
 | 
			
		||||
      //
 | 
			
		||||
      if ( _simd_layout[dimension] > 2 ) { 
 | 
			
		||||
	for(int d=0;d<_ndimension;d++){
 | 
			
		||||
	  if ( d != dimension ) assert ( (_simd_layout[d]==1)  );
 | 
			
		||||
	}
 | 
			
		||||
	permute_type = RotateBit; // How to specify distance; this is not just direction.
 | 
			
		||||
	return permute_type;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      for(int d=_ndimension-1;d>dimension;d--){
 | 
			
		||||
	if (_simd_layout[d]>1 ) permute_type++;
 | 
			
		||||
      }
 | 
			
		||||
@@ -135,12 +162,12 @@ public:
 | 
			
		||||
    // Array sizing queries
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    inline int iSites(void) { return _isites; };
 | 
			
		||||
    inline int Nsimd(void)  { return _isites; };// Synonymous with iSites
 | 
			
		||||
    inline int oSites(void) { return _osites; };
 | 
			
		||||
    inline int lSites(void) { return _isites*_osites; }; 
 | 
			
		||||
    inline int gSites(void) { return _isites*_osites*_Nprocessors; }; 
 | 
			
		||||
    inline int Nd    (void) { return _ndimension;};
 | 
			
		||||
    inline int iSites(void) const { return _isites; };
 | 
			
		||||
    inline int Nsimd(void)  const { return _isites; };// Synonymous with iSites
 | 
			
		||||
    inline int oSites(void) const { return _osites; };
 | 
			
		||||
    inline int lSites(void) const { return _isites*_osites; }; 
 | 
			
		||||
    inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; 
 | 
			
		||||
    inline int Nd    (void) const { return _ndimension;};
 | 
			
		||||
 | 
			
		||||
    inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;};
 | 
			
		||||
    inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;};
 | 
			
		||||
@@ -151,7 +178,10 @@ public:
 | 
			
		||||
    // Global addressing
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
    void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
 | 
			
		||||
      CoorFromIndex(gcoor,gidx,_gdimensions);
 | 
			
		||||
      Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
 | 
			
		||||
    }
 | 
			
		||||
    void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
 | 
			
		||||
      Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
 | 
			
		||||
    }
 | 
			
		||||
    void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
 | 
			
		||||
      gidx=0;
 | 
			
		||||
@@ -186,7 +216,7 @@ public:
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
 | 
			
		||||
      o_idx= oIndex(lcoor);// this implies divide by 2 on checkerdim
 | 
			
		||||
      o_idx= oIndex(lcoor);  // this implies divide by 2 on checkerdim
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cartesian/Cartesian_full.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_FULL_H
 | 
			
		||||
#define GRID_CARTESIAN_FULL_H
 | 
			
		||||
 | 
			
		||||
@@ -12,10 +39,17 @@ class GridCartesian: public GridBase {
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    virtual int  CheckerBoardFromOindexTable (int Oindex) {
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int  CheckerBoardFromOindex (int Oindex)
 | 
			
		||||
    {
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoarded(int dim){
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoard(std::vector<int> site){
 | 
			
		||||
    virtual int CheckerBoard(std::vector<int> &site){
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoardDestination(int cb,int shift,int dim){
 | 
			
		||||
 
 | 
			
		||||
@@ -1,32 +1,55 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cartesian/Cartesian_red_black.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_RED_BLACK_H
 | 
			
		||||
#define GRID_CARTESIAN_RED_BLACK_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    static const int CbRed  =0;
 | 
			
		||||
    static const int CbBlack=1;
 | 
			
		||||
    static const int Even   =CbRed;
 | 
			
		||||
    static const int Odd    =CbBlack;
 | 
			
		||||
 | 
			
		||||
    // Perhaps these are misplaced and 
 | 
			
		||||
    // should be in sparse matrix.
 | 
			
		||||
    // Also should make these a named enum type
 | 
			
		||||
    static const int DaggerNo=0;
 | 
			
		||||
    static const int DaggerYes=1;
 | 
			
		||||
 | 
			
		||||
  static const int CbRed  =0;
 | 
			
		||||
  static const int CbBlack=1;
 | 
			
		||||
  static const int Even   =CbRed;
 | 
			
		||||
  static const int Odd    =CbBlack;
 | 
			
		||||
    
 | 
			
		||||
// Specialise this for red black grids storing half the data like a chess board.
 | 
			
		||||
class GridRedBlackCartesian : public GridBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    std::vector<int> _checker_dim_mask;
 | 
			
		||||
    int              _checker_dim;
 | 
			
		||||
    std::vector<int> _checker_board;
 | 
			
		||||
 | 
			
		||||
    virtual int CheckerBoarded(int dim){
 | 
			
		||||
      if( dim==_checker_dim) return 1;
 | 
			
		||||
      else return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoard(std::vector<int> site){
 | 
			
		||||
    virtual int CheckerBoard(std::vector<int> &site){
 | 
			
		||||
      int linear=0;
 | 
			
		||||
      assert(site.size()==_ndimension);
 | 
			
		||||
      for(int d=0;d<_ndimension;d++){ 
 | 
			
		||||
@@ -50,12 +73,20 @@ public:
 | 
			
		||||
      // or by looping over x,y,z and multiply rather than computing checkerboard.
 | 
			
		||||
	  
 | 
			
		||||
      if ( (source_cb+ocb)&1 ) {
 | 
			
		||||
 | 
			
		||||
	return (shift)/2;
 | 
			
		||||
      } else {
 | 
			
		||||
	return (shift+1)/2;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    virtual int  CheckerBoardFromOindexTable (int Oindex) {
 | 
			
		||||
      return _checker_board[Oindex];
 | 
			
		||||
    }
 | 
			
		||||
    virtual int  CheckerBoardFromOindex (int Oindex)
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<int> ocoor;
 | 
			
		||||
      oCoorFromOindex(ocoor,Oindex);
 | 
			
		||||
      return CheckerBoard(ocoor);
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
 | 
			
		||||
 | 
			
		||||
      if(dim != _checker_dim) return shift;
 | 
			
		||||
@@ -142,9 +173,15 @@ public:
 | 
			
		||||
	// Use a reduced simd grid
 | 
			
		||||
	_simd_layout[d] = simd_layout[d];
 | 
			
		||||
	_rdimensions[d]= _ldimensions[d]/_simd_layout[d];
 | 
			
		||||
	assert(_rdimensions[d]>0);
 | 
			
		||||
 | 
			
		||||
	// all elements of a simd vector must have same checkerboard.
 | 
			
		||||
	if ( simd_layout[d]>1 ) assert((_rdimensions[d]&0x1)==0); 
 | 
			
		||||
	// If Ls vectorised, this must still be the case; e.g. dwf rb5d
 | 
			
		||||
	if ( _simd_layout[d]>1 ) {
 | 
			
		||||
	  if ( checker_dim_mask[d] ) { 
 | 
			
		||||
	    assert( (_rdimensions[d]&0x1) == 0 );
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	_osites *= _rdimensions[d];
 | 
			
		||||
	_isites *= _simd_layout[d];
 | 
			
		||||
@@ -157,6 +194,8 @@ public:
 | 
			
		||||
	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1];
 | 
			
		||||
	  _istride[d] = _istride[d-1]*_simd_layout[d-1];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
            
 | 
			
		||||
      ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -177,6 +216,18 @@ public:
 | 
			
		||||
	_slice_nblock[d]=nblock;
 | 
			
		||||
	block = block*_rdimensions[d];
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      // Create a checkerboard lookup table
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      int rvol = 1;
 | 
			
		||||
      for(int d=0;d<_ndimension;d++){
 | 
			
		||||
	rvol=rvol * _rdimensions[d];
 | 
			
		||||
      }
 | 
			
		||||
      _checker_board.resize(rvol);
 | 
			
		||||
      for(int osite=0;osite<_osites;osite++){
 | 
			
		||||
	_checker_board[osite] = CheckerBoardFromOindex (osite);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
    };
 | 
			
		||||
protected:
 | 
			
		||||
@@ -190,9 +241,21 @@ protected:
 | 
			
		||||
	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
        return idx;
 | 
			
		||||
      return idx;
 | 
			
		||||
    };
 | 
			
		||||
        
 | 
			
		||||
    virtual int iIndex(std::vector<int> &lcoor)
 | 
			
		||||
    {
 | 
			
		||||
        int idx=0;
 | 
			
		||||
        for(int d=0;d<_ndimension;d++) {
 | 
			
		||||
	  if( d==_checker_dim ) {
 | 
			
		||||
	    idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d]));
 | 
			
		||||
	  } else { 
 | 
			
		||||
	    idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
        return idx;
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										132
									
								
								lib/communicator/Communicator_base.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								lib/communicator/Communicator_base.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,132 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_none.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////
 | 
			
		||||
// Info that is setup once and indept of cartesian layout
 | 
			
		||||
///////////////////////////////////////////////////////////////
 | 
			
		||||
int CartesianCommunicator::ShmRank;
 | 
			
		||||
int CartesianCommunicator::ShmSize;
 | 
			
		||||
int CartesianCommunicator::GroupRank;
 | 
			
		||||
int CartesianCommunicator::GroupSize;
 | 
			
		||||
int CartesianCommunicator::WorldRank;
 | 
			
		||||
int CartesianCommunicator::WorldSize;
 | 
			
		||||
int CartesianCommunicator::Slave;
 | 
			
		||||
void *              CartesianCommunicator::ShmCommBuf;
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////
 | 
			
		||||
// Alloc, free shmem region
 | 
			
		||||
/////////////////////////////////
 | 
			
		||||
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
 | 
			
		||||
  //  bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
 | 
			
		||||
  void *ptr = (void *)heap_top;
 | 
			
		||||
  heap_top  += bytes;
 | 
			
		||||
  heap_bytes+= bytes;
 | 
			
		||||
  std::cout <<"Shm alloc "<<ptr<<std::endl;
 | 
			
		||||
  assert(heap_bytes < MAX_MPI_SHM_BYTES);
 | 
			
		||||
  return ptr;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::ShmBufferFreeAll(void) { 
 | 
			
		||||
  heap_top  =(size_t)ShmBufferSelf();
 | 
			
		||||
  heap_bytes=0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////
 | 
			
		||||
// Grid information queries
 | 
			
		||||
/////////////////////////////////
 | 
			
		||||
int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; };
 | 
			
		||||
int                      CartesianCommunicator::BossRank(void)          { return 0; };
 | 
			
		||||
int                      CartesianCommunicator::ThisRank(void)          { return _processor; };
 | 
			
		||||
const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; };
 | 
			
		||||
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; };
 | 
			
		||||
int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; };
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// very VERY rarely (Log, serial RNG) we need world without a grid
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int  CartesianCommunicator::RankWorld(void){ return WorldRank; };
 | 
			
		||||
int CartesianCommunicator::Ranks    (void) { return WorldSize; };
 | 
			
		||||
int CartesianCommunicator::Nodes    (void) { return GroupSize; };
 | 
			
		||||
int CartesianCommunicator::Cores    (void) { return ShmSize;   };
 | 
			
		||||
int CartesianCommunicator::NodeRank (void) { return GroupRank; };
 | 
			
		||||
int CartesianCommunicator::CoreRank (void) { return ShmRank;   };
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::GlobalSum(ComplexF &c)
 | 
			
		||||
{
 | 
			
		||||
  GlobalSumVector((float *)&c,2);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N)
 | 
			
		||||
{
 | 
			
		||||
  GlobalSumVector((float *)c,2*N);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(ComplexD &c)
 | 
			
		||||
{
 | 
			
		||||
  GlobalSumVector((double *)&c,2);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
 | 
			
		||||
{
 | 
			
		||||
  GlobalSumVector((double *)c,2*N);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_COMMS_MPI3
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						       void *xmit,
 | 
			
		||||
						       int xmit_to_rank,
 | 
			
		||||
						       void *recv,
 | 
			
		||||
						       int recv_from_rank,
 | 
			
		||||
						       int bytes)
 | 
			
		||||
{
 | 
			
		||||
  SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
 | 
			
		||||
{
 | 
			
		||||
  SendToRecvFromComplete(waitall);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::StencilBarrier(void){};
 | 
			
		||||
 | 
			
		||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
 | 
			
		||||
 | 
			
		||||
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
 | 
			
		||||
 | 
			
		||||
void *CartesianCommunicator::ShmBuffer(int rank) {
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { 
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::ShmInitGeneric(void){
 | 
			
		||||
  ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
 | 
			
		||||
  ShmCommBuf=(void *)&ShmBufStorageVector[0];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_base.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_COMMUNICATOR_BASE_H
 | 
			
		||||
#define GRID_COMMUNICATOR_BASE_H
 | 
			
		||||
 | 
			
		||||
@@ -7,118 +34,194 @@
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_COMMS_MPI3
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
#include <mpp/shmem.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
class CartesianCommunicator {
 | 
			
		||||
  public:    
 | 
			
		||||
 | 
			
		||||
  // 65536 ranks per node adequate for now
 | 
			
		||||
  // 128MB shared memory for comms enought for 48^4 local vol comms
 | 
			
		||||
  // Give external control (command line override?) of this
 | 
			
		||||
 | 
			
		||||
  static const int      MAXLOG2RANKSPERNODE = 16;            
 | 
			
		||||
  static const uint64_t MAX_MPI_SHM_BYTES   = 128*1024*1024; 
 | 
			
		||||
 | 
			
		||||
  // Communicator should know nothing of the physics grid, only processor grid.
 | 
			
		||||
  int              _Nprocessors;     // How many in all
 | 
			
		||||
  std::vector<int> _processors;      // Which dimensions get relayed out over processors lanes.
 | 
			
		||||
  int              _processor;       // linear processor rank
 | 
			
		||||
  std::vector<int> _processor_coor;  // linear processor coordinate
 | 
			
		||||
  unsigned long _ndimension;
 | 
			
		||||
 | 
			
		||||
    int              _Nprocessors;     // How many in all
 | 
			
		||||
    std::vector<int> _processors;      // Which dimensions get relayed out over processors lanes.
 | 
			
		||||
    int              _processor;       // linear processor rank
 | 
			
		||||
    std::vector<int> _processor_coor;  // linear processor coordinate
 | 
			
		||||
    unsigned long _ndimension;
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
    MPI_Comm communicator;
 | 
			
		||||
    typedef MPI_Request CommsRequest_t;
 | 
			
		||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
 | 
			
		||||
  MPI_Comm communicator;
 | 
			
		||||
  static MPI_Comm communicator_world;
 | 
			
		||||
  typedef MPI_Request CommsRequest_t;
 | 
			
		||||
#else 
 | 
			
		||||
    typedef int CommsRequest_t;
 | 
			
		||||
  typedef int CommsRequest_t;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    // Constructor
 | 
			
		||||
    CartesianCommunicator(const std::vector<int> &pdimensions_in);
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Helper functionality for SHM Windows common to all other impls
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Longer term; drop this in favour of a master / slave model with 
 | 
			
		||||
  // cartesian communicator on a subset of ranks, slave ranks controlled
 | 
			
		||||
  // by group leader with data xfer via shared memory
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////
 | 
			
		||||
#ifdef  GRID_COMMS_MPI3
 | 
			
		||||
  std::vector<int>  WorldDims;
 | 
			
		||||
  std::vector<int>  GroupDims;
 | 
			
		||||
  std::vector<int>  ShmDims;
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> GroupCoor;
 | 
			
		||||
  std::vector<int> ShmCoor;
 | 
			
		||||
  std::vector<int> WorldCoor;
 | 
			
		||||
  
 | 
			
		||||
  static std::vector<int> GroupRanks; 
 | 
			
		||||
  static std::vector<int> MyGroup;
 | 
			
		||||
  static int ShmSetup;
 | 
			
		||||
  static MPI_Win ShmWindow; 
 | 
			
		||||
  static MPI_Comm ShmComm;
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int>  LexicographicToWorldRank;
 | 
			
		||||
  
 | 
			
		||||
  static std::vector<void *> ShmCommBufs;
 | 
			
		||||
#else 
 | 
			
		||||
  static void ShmInitGeneric(void);
 | 
			
		||||
  static commVector<uint8_t> ShmBufStorageVector;
 | 
			
		||||
#endif 
 | 
			
		||||
  static void * ShmCommBuf;
 | 
			
		||||
  size_t heap_top;
 | 
			
		||||
  size_t heap_bytes;
 | 
			
		||||
  void *ShmBufferSelf(void);
 | 
			
		||||
  void *ShmBuffer(int rank);
 | 
			
		||||
  void *ShmBufferTranslate(int rank,void * local_p);
 | 
			
		||||
  void *ShmBufferMalloc(size_t bytes);
 | 
			
		||||
  void ShmBufferFreeAll(void) ;
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Must call in Grid startup
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  static void Init(int *argc, char ***argv);
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Constructor of any given grid
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  CartesianCommunicator(const std::vector<int> &pdimensions_in);
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Wraps MPI_Cart routines, or implements equivalent on other impls
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  void ShiftedRanks(int dim,int shift,int & source, int & dest);
 | 
			
		||||
  int  RankFromProcessorCoor(std::vector<int> &coor);
 | 
			
		||||
  void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
 | 
			
		||||
  
 | 
			
		||||
  /////////////////////////////////
 | 
			
		||||
  // Grid information and queries
 | 
			
		||||
  /////////////////////////////////
 | 
			
		||||
  static int ShmRank;
 | 
			
		||||
  static int ShmSize;
 | 
			
		||||
  static int GroupSize;
 | 
			
		||||
  static int GroupRank;
 | 
			
		||||
  static int WorldRank;
 | 
			
		||||
  static int WorldSize;
 | 
			
		||||
  static int Slave;
 | 
			
		||||
  
 | 
			
		||||
  int                      IsBoss(void)            ;
 | 
			
		||||
  int                      BossRank(void)          ;
 | 
			
		||||
  int                      ThisRank(void)          ;
 | 
			
		||||
  const std::vector<int> & ThisProcessorCoor(void) ;
 | 
			
		||||
  const std::vector<int> & ProcessorGrid(void)     ;
 | 
			
		||||
  int                      ProcessorCount(void)    ;
 | 
			
		||||
  static int Ranks    (void);
 | 
			
		||||
  static int Nodes    (void);
 | 
			
		||||
  static int Cores    (void);
 | 
			
		||||
  static int NodeRank (void);
 | 
			
		||||
  static int CoreRank (void);
 | 
			
		||||
 | 
			
		||||
    // Wraps MPI_Cart routines
 | 
			
		||||
    void ShiftedRanks(int dim,int shift,int & source, int & dest);
 | 
			
		||||
    int  RankFromProcessorCoor(std::vector<int> &coor);
 | 
			
		||||
    void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // very VERY rarely (Log, serial RNG) we need world without a grid
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  static int  RankWorld(void) ;
 | 
			
		||||
  static void BroadcastWorld(int root,void* data, int bytes);
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  // Reduction
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  void GlobalSum(RealF &);
 | 
			
		||||
  void GlobalSumVector(RealF *,int N);
 | 
			
		||||
  void GlobalSum(RealD &);
 | 
			
		||||
  void GlobalSumVector(RealD *,int N);
 | 
			
		||||
  void GlobalSum(uint32_t &);
 | 
			
		||||
  void GlobalSum(uint64_t &);
 | 
			
		||||
  void GlobalSum(ComplexF &c);
 | 
			
		||||
  void GlobalSumVector(ComplexF *c,int N);
 | 
			
		||||
  void GlobalSum(ComplexD &c);
 | 
			
		||||
  void GlobalSumVector(ComplexD *c,int N);
 | 
			
		||||
  
 | 
			
		||||
  template<class obj> void GlobalSum(obj &o){
 | 
			
		||||
    typedef typename obj::scalar_type scalar_type;
 | 
			
		||||
    int words = sizeof(obj)/sizeof(scalar_type);
 | 
			
		||||
    scalar_type * ptr = (scalar_type *)& o;
 | 
			
		||||
    GlobalSumVector(ptr,words);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  // Face exchange, buffer swap in translational invariant way
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  void SendToRecvFrom(void *xmit,
 | 
			
		||||
		      int xmit_to_rank,
 | 
			
		||||
		      void *recv,
 | 
			
		||||
		      int recv_from_rank,
 | 
			
		||||
		      int bytes);
 | 
			
		||||
  
 | 
			
		||||
  void SendRecvPacket(void *xmit,
 | 
			
		||||
		      void *recv,
 | 
			
		||||
		      int xmit_to_rank,
 | 
			
		||||
		      int recv_from_rank,
 | 
			
		||||
		      int bytes);
 | 
			
		||||
  
 | 
			
		||||
  void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			   void *xmit,
 | 
			
		||||
			   int xmit_to_rank,
 | 
			
		||||
			   void *recv,
 | 
			
		||||
			   int recv_from_rank,
 | 
			
		||||
			   int bytes);
 | 
			
		||||
  
 | 
			
		||||
  void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////
 | 
			
		||||
    // Grid information queries
 | 
			
		||||
    /////////////////////////////////
 | 
			
		||||
    int                      IsBoss(void)            { return _processor==0; };
 | 
			
		||||
    int                      BossRank(void)          { return 0; };
 | 
			
		||||
    int                      ThisRank(void)          { return _processor; };
 | 
			
		||||
    const std::vector<int> & ThisProcessorCoor(void) { return _processor_coor; };
 | 
			
		||||
    const std::vector<int> & ProcessorGrid(void)     { return _processors; };
 | 
			
		||||
    int                      ProcessorCount(void)    { return _Nprocessors; };
 | 
			
		||||
  void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
				  void *xmit,
 | 
			
		||||
				  int xmit_to_rank,
 | 
			
		||||
				  void *recv,
 | 
			
		||||
				  int recv_from_rank,
 | 
			
		||||
				  int bytes);
 | 
			
		||||
  
 | 
			
		||||
  void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
 | 
			
		||||
  void StencilBarrier(void);
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    // Reduction
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    void GlobalSum(RealF &);
 | 
			
		||||
    void GlobalSumVector(RealF *,int N);
 | 
			
		||||
 | 
			
		||||
    void GlobalSum(RealD &);
 | 
			
		||||
    void GlobalSumVector(RealD *,int N);
 | 
			
		||||
 | 
			
		||||
    void GlobalSum(uint32_t &);
 | 
			
		||||
 | 
			
		||||
    void GlobalSum(ComplexF &c)
 | 
			
		||||
    {
 | 
			
		||||
      GlobalSumVector((float *)&c,2);
 | 
			
		||||
    }
 | 
			
		||||
    void GlobalSumVector(ComplexF *c,int N)
 | 
			
		||||
    {
 | 
			
		||||
      GlobalSumVector((float *)c,2*N);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void GlobalSum(ComplexD &c)
 | 
			
		||||
    {
 | 
			
		||||
      GlobalSumVector((double *)&c,2);
 | 
			
		||||
    }
 | 
			
		||||
    void GlobalSumVector(ComplexD *c,int N)
 | 
			
		||||
    {
 | 
			
		||||
      GlobalSumVector((double *)c,2*N);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    template<class obj> void GlobalSum(obj &o){
 | 
			
		||||
      typedef typename obj::scalar_type scalar_type;
 | 
			
		||||
      int words = sizeof(obj)/sizeof(scalar_type);
 | 
			
		||||
      scalar_type * ptr = (scalar_type *)& o;
 | 
			
		||||
      GlobalSumVector(ptr,words);
 | 
			
		||||
    }
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    // Face exchange, buffer swap in translational invariant way
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    void SendToRecvFrom(void *xmit,
 | 
			
		||||
			int xmit_to_rank,
 | 
			
		||||
			void *recv,
 | 
			
		||||
			int recv_from_rank,
 | 
			
		||||
			int bytes);
 | 
			
		||||
 | 
			
		||||
    void RecvFrom(void *recv,
 | 
			
		||||
		  int recv_from_rank,
 | 
			
		||||
		  int bytes);
 | 
			
		||||
    void SendTo(void *xmit,
 | 
			
		||||
		int xmit_to_rank,
 | 
			
		||||
		int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			 void *xmit,
 | 
			
		||||
			 int xmit_to_rank,
 | 
			
		||||
			 void *recv,
 | 
			
		||||
			 int recv_from_rank,
 | 
			
		||||
			 int bytes);
 | 
			
		||||
    void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    // Barrier
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    void Barrier(void);
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    // Broadcast a buffer and composite larger
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    void Broadcast(int root,void* data, int bytes);
 | 
			
		||||
    template<class obj> void Broadcast(int root,obj &data)
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  // Barrier
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  void Barrier(void);
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  // Broadcast a buffer and composite larger
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  void Broadcast(int root,void* data, int bytes);
 | 
			
		||||
  
 | 
			
		||||
  template<class obj> void Broadcast(int root,obj &data)
 | 
			
		||||
    {
 | 
			
		||||
      Broadcast(root,(void *)&data,sizeof(data));
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    static void BroadcastWorld(int root,void* data, int bytes);
 | 
			
		||||
 | 
			
		||||
}; 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,58 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_mpi.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  // Should error check all MPI calls.
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Info that is setup once and indept of cartesian layout
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
MPI_Comm CartesianCommunicator::communicator_world;
 | 
			
		||||
 | 
			
		||||
// Should error check all MPI calls.
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
  int flag;
 | 
			
		||||
  MPI_Initialized(&flag); // needed to coexist with other libs apparently
 | 
			
		||||
  if ( !flag ) {
 | 
			
		||||
    MPI_Init(argc,argv);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
			
		||||
  MPI_Comm_rank(communicator_world,&WorldRank);
 | 
			
		||||
  MPI_Comm_size(communicator_world,&WorldSize);
 | 
			
		||||
  ShmRank=0;
 | 
			
		||||
  ShmSize=1;
 | 
			
		||||
  GroupRank=WorldRank;
 | 
			
		||||
  GroupSize=WorldSize;
 | 
			
		||||
  Slave    =0;
 | 
			
		||||
  ShmInitGeneric();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
@@ -14,7 +63,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
  _processor_coor.resize(_ndimension);
 | 
			
		||||
  
 | 
			
		||||
  MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator);
 | 
			
		||||
  MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
 | 
			
		||||
  MPI_Comm_rank(communicator,&_processor);
 | 
			
		||||
  MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
 | 
			
		||||
 | 
			
		||||
@@ -27,11 +76,14 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
  
 | 
			
		||||
  assert(Size==_Nprocessors);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
@@ -81,21 +133,22 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromComplete(reqs);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::RecvFrom(void *recv,
 | 
			
		||||
				     int from,
 | 
			
		||||
				     int bytes) 
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int sender,
 | 
			
		||||
					   int receiver,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Status stat;
 | 
			
		||||
  int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendTo(void *xmit,
 | 
			
		||||
				   int dest,
 | 
			
		||||
				   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int rank = _processor; // used for tag; must know who it comes from
 | 
			
		||||
  int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  assert(sender != receiver);
 | 
			
		||||
  int tag = sender;
 | 
			
		||||
  if ( _processor == sender ) {
 | 
			
		||||
    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
 | 
			
		||||
  }
 | 
			
		||||
  if ( _processor == receiver ) { 
 | 
			
		||||
    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
@@ -123,7 +176,6 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
 | 
			
		||||
  int nreq=list.size();
 | 
			
		||||
  std::vector<MPI_Status> status(nreq);
 | 
			
		||||
  int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
 | 
			
		||||
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -142,14 +194,17 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
			
		||||
		     communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////
 | 
			
		||||
  // Should only be used prior to Grid Init finished.
 | 
			
		||||
  // Check for this?
 | 
			
		||||
  ///////////////////////////////////////////////////////
 | 
			
		||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int ierr= MPI_Bcast(data,
 | 
			
		||||
		      bytes,
 | 
			
		||||
		      MPI_BYTE,
 | 
			
		||||
		      root,
 | 
			
		||||
		      MPI_COMM_WORLD);
 | 
			
		||||
		      communicator_world);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										574
									
								
								lib/communicator/Communicator_mpi3.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										574
									
								
								lib/communicator/Communicator_mpi3.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,574 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_mpi.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Info that is setup once and indept of cartesian layout
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int CartesianCommunicator::ShmSetup = 0;
 | 
			
		||||
 | 
			
		||||
MPI_Comm CartesianCommunicator::communicator_world;
 | 
			
		||||
MPI_Comm CartesianCommunicator::ShmComm;
 | 
			
		||||
MPI_Win  CartesianCommunicator::ShmWindow;
 | 
			
		||||
 | 
			
		||||
std::vector<int> CartesianCommunicator::GroupRanks;  
 | 
			
		||||
std::vector<int> CartesianCommunicator::MyGroup;
 | 
			
		||||
std::vector<void *> CartesianCommunicator::ShmCommBufs;
 | 
			
		||||
 | 
			
		||||
void *CartesianCommunicator::ShmBufferSelf(void)
 | 
			
		||||
{
 | 
			
		||||
  return ShmCommBufs[ShmRank];
 | 
			
		||||
}
 | 
			
		||||
void *CartesianCommunicator::ShmBuffer(int rank)
 | 
			
		||||
{
 | 
			
		||||
  int gpeer = GroupRanks[rank];
 | 
			
		||||
  if (gpeer == MPI_UNDEFINED){
 | 
			
		||||
    return NULL;
 | 
			
		||||
  } else { 
 | 
			
		||||
    return ShmCommBufs[gpeer];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
 | 
			
		||||
{
 | 
			
		||||
  int gpeer = GroupRanks[rank];
 | 
			
		||||
  if (gpeer == MPI_UNDEFINED){
 | 
			
		||||
    return NULL;
 | 
			
		||||
  } else { 
 | 
			
		||||
    uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
 | 
			
		||||
    uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
 | 
			
		||||
    return (void *) remote;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
  int flag;
 | 
			
		||||
  MPI_Initialized(&flag); // needed to coexist with other libs apparently
 | 
			
		||||
  if ( !flag ) {
 | 
			
		||||
    MPI_Init(argc,argv);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
			
		||||
  MPI_Comm_rank(communicator_world,&WorldRank);
 | 
			
		||||
  MPI_Comm_size(communicator_world,&WorldSize);
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Split into groups that can share memory
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
 | 
			
		||||
  MPI_Comm_rank(ShmComm     ,&ShmRank);
 | 
			
		||||
  MPI_Comm_size(ShmComm     ,&ShmSize);
 | 
			
		||||
  GroupSize = WorldSize/ShmSize;
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // find world ranks in our SHM group (i.e. which ranks are on our node)
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Group WorldGroup, ShmGroup;
 | 
			
		||||
  MPI_Comm_group (communicator_world, &WorldGroup); 
 | 
			
		||||
  MPI_Comm_group (ShmComm, &ShmGroup);
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> world_ranks(WorldSize); 
 | 
			
		||||
  GroupRanks.resize(WorldSize); 
 | 
			
		||||
  MyGroup.resize(ShmSize);
 | 
			
		||||
  for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
 | 
			
		||||
  
 | 
			
		||||
  MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &GroupRanks[0]); 
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify who is in my group and noninate the leader
 | 
			
		||||
    ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  int g=0;
 | 
			
		||||
  for(int rank=0;rank<WorldSize;rank++){
 | 
			
		||||
    if(GroupRanks[rank]!=MPI_UNDEFINED){
 | 
			
		||||
      assert(g<ShmSize);
 | 
			
		||||
      MyGroup[g++] = rank;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
 | 
			
		||||
  int myleader = MyGroup[0];
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> leaders_1hot(WorldSize,0);
 | 
			
		||||
  std::vector<int> leaders_group(GroupSize,0);
 | 
			
		||||
  leaders_1hot [ myleader ] = 1;
 | 
			
		||||
    
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // global sum leaders over comm world
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // find the group leaders world rank
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  int group=0;
 | 
			
		||||
  for(int l=0;l<WorldSize;l++){
 | 
			
		||||
    if(leaders_1hot[l]){
 | 
			
		||||
      leaders_group[group++] = l;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify the rank of the group in which I (and my leader) live
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  GroupRank=-1;
 | 
			
		||||
  for(int g=0;g<GroupSize;g++){
 | 
			
		||||
    if (myleader == leaders_group[g]){
 | 
			
		||||
      GroupRank=g;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  assert(GroupRank!=-1);
 | 
			
		||||
  
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // allocate the shared window for our group
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  
 | 
			
		||||
  ShmCommBuf = 0;
 | 
			
		||||
  ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  // KNL hack -- force to numa-domain 1 in flat
 | 
			
		||||
#if 0
 | 
			
		||||
  //#include <numaif.h>
 | 
			
		||||
  for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
 | 
			
		||||
    void *pages = (void *) ( page + ShmCommBuf );
 | 
			
		||||
    int status;
 | 
			
		||||
    int flags=MPOL_MF_MOVE_ALL;
 | 
			
		||||
    int nodes=1; // numa domain == MCDRAM
 | 
			
		||||
    unsigned long count=1;
 | 
			
		||||
    ierr= move_pages(0,count, &pages,&nodes,&status,flags);
 | 
			
		||||
    if (ierr && (page==0)) perror("numa relocate command failed");
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
  MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
 | 
			
		||||
  
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  ShmCommBufs.resize(ShmSize);
 | 
			
		||||
  for(int r=0;r<ShmSize;r++){
 | 
			
		||||
    MPI_Aint sz;
 | 
			
		||||
    int dsp_unit;
 | 
			
		||||
    MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Verbose for now
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  if (WorldRank == 0){
 | 
			
		||||
    std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
 | 
			
		||||
    std::cout<< WorldSize << " Ranks " ;
 | 
			
		||||
    std::cout<< GroupSize << " Nodes " ;
 | 
			
		||||
    std::cout<<  ShmSize  << " with ranks-per-node "<<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    std::cout<<GridLogMessage     <<"Grid MPI-3 configuration: allocated shared memory region of size ";
 | 
			
		||||
    std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
 | 
			
		||||
 | 
			
		||||
    for(int g=0;g<GroupSize;g++){
 | 
			
		||||
      std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<<leaders_group[g]<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage<<" Boss Node Shm Pointers are {";
 | 
			
		||||
    for(int g=0;g<ShmSize;g++){
 | 
			
		||||
      std::cout<<std::hex<<ShmCommBufs[g]<<std::dec;
 | 
			
		||||
      if(g!=ShmSize-1) std::cout<<",";
 | 
			
		||||
      else std::cout<<"}"<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  for(int g=0;g<GroupSize;g++){
 | 
			
		||||
    if ( (ShmRank == 0) && (GroupRank==g) )  std::cout<<GridLogMessage<<"["<<g<<"] Node Group "<<g<<" is ranks {";
 | 
			
		||||
    for(int r=0;r<ShmSize;r++){
 | 
			
		||||
      if ( (ShmRank == 0) && (GroupRank==g) ) {
 | 
			
		||||
	std::cout<<MyGroup[r];
 | 
			
		||||
	if(r<ShmSize-1) std::cout<<",";
 | 
			
		||||
	else std::cout<<"}"<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      MPI_Barrier(communicator_world);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  assert(ShmSetup==0);  ShmSetup=1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Want to implement some magic ... Group sub-cubes into those on same node
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<int> coor = _processor_coor;
 | 
			
		||||
 | 
			
		||||
  assert(std::abs(shift) <_processors[dim]);
 | 
			
		||||
 | 
			
		||||
  coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,source,_processors);
 | 
			
		||||
  source = LexicographicToWorldRank[source];
 | 
			
		||||
 | 
			
		||||
  coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,dest,_processors);
 | 
			
		||||
  dest = LexicographicToWorldRank[dest];
 | 
			
		||||
}
 | 
			
		||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  int rank;
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,rank,_processors);
 | 
			
		||||
  rank = LexicographicToWorldRank[rank];
 | 
			
		||||
  return rank;
 | 
			
		||||
}
 | 
			
		||||
void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  Lexicographic::CoorFromIndex(coor,rank,_processors);
 | 
			
		||||
  rank = LexicographicToWorldRank[rank];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{ 
 | 
			
		||||
  int ierr;
 | 
			
		||||
 | 
			
		||||
  communicator=communicator_world;
 | 
			
		||||
 | 
			
		||||
  _ndimension = processors.size();
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Assert power of two shm_size.
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int log2size = -1;
 | 
			
		||||
  for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){  
 | 
			
		||||
    if ( (0x1<<i) == ShmSize ) {
 | 
			
		||||
      log2size = i;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  assert(log2size != -1);
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify subblock of ranks on node spreading across dims
 | 
			
		||||
  // in a maximally symmetrical way
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int dim = 0;
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> WorldDims = processors;
 | 
			
		||||
 | 
			
		||||
  ShmDims.resize(_ndimension,1);
 | 
			
		||||
  GroupDims.resize(_ndimension);
 | 
			
		||||
    
 | 
			
		||||
  ShmCoor.resize(_ndimension);
 | 
			
		||||
  GroupCoor.resize(_ndimension);
 | 
			
		||||
  WorldCoor.resize(_ndimension);
 | 
			
		||||
 | 
			
		||||
  for(int l2=0;l2<log2size;l2++){
 | 
			
		||||
    while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
 | 
			
		||||
    ShmDims[dim]*=2;
 | 
			
		||||
    dim=(dim+1)%_ndimension;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Establish torus of processes and nodes with sub-blockings
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  for(int d=0;d<_ndimension;d++){
 | 
			
		||||
    GroupDims[d] = WorldDims[d]/ShmDims[d];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Check processor counts match
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  _Nprocessors=1;
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
  _processor_coor.resize(_ndimension);
 | 
			
		||||
  for(int i=0;i<_ndimension;i++){
 | 
			
		||||
    _Nprocessors*=_processors[i];
 | 
			
		||||
  }
 | 
			
		||||
  assert(WorldSize==_Nprocessors);
 | 
			
		||||
      
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Establish mapping between lexico physics coord and WorldRank
 | 
			
		||||
  // 
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  LexicographicToWorldRank.resize(WorldSize,0);
 | 
			
		||||
  Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
 | 
			
		||||
  Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
 | 
			
		||||
  for(int d=0;d<_ndimension;d++){
 | 
			
		||||
    WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
 | 
			
		||||
  }
 | 
			
		||||
  _processor_coor = WorldCoor;
 | 
			
		||||
 | 
			
		||||
  int lexico;
 | 
			
		||||
  Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
 | 
			
		||||
  LexicographicToWorldRank[lexico]=WorldRank;
 | 
			
		||||
  _processor = lexico;
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // global sum Lexico to World mapping
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(float *f,int N)
 | 
			
		||||
{
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(double &d)
 | 
			
		||||
{
 | 
			
		||||
  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *d,int N)
 | 
			
		||||
{
 | 
			
		||||
  int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
					   int dest,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int from,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<CommsRequest_t> reqs(0);
 | 
			
		||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromComplete(reqs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int sender,
 | 
			
		||||
					   int receiver,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Status stat;
 | 
			
		||||
  assert(sender != receiver);
 | 
			
		||||
  int tag = sender;
 | 
			
		||||
  if ( _processor == sender ) {
 | 
			
		||||
    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
 | 
			
		||||
  }
 | 
			
		||||
  if ( _processor == receiver ) { 
 | 
			
		||||
    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
#if 0
 | 
			
		||||
  this->StencilBarrier();
 | 
			
		||||
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
  
 | 
			
		||||
  static int sequence;
 | 
			
		||||
 | 
			
		||||
  int ierr;
 | 
			
		||||
  int tag;
 | 
			
		||||
  int check;
 | 
			
		||||
 | 
			
		||||
  assert(dest != _processor);
 | 
			
		||||
  assert(from != _processor);
 | 
			
		||||
  
 | 
			
		||||
  int gdest = GroupRanks[dest];
 | 
			
		||||
  int gfrom = GroupRanks[from];
 | 
			
		||||
  int gme   = GroupRanks[_processor];
 | 
			
		||||
 | 
			
		||||
  sequence++;
 | 
			
		||||
  
 | 
			
		||||
  char *from_ptr = (char *)ShmCommBufs[ShmRank];
 | 
			
		||||
 | 
			
		||||
  int small = (bytes<MAX_MPI_SHM_BYTES);
 | 
			
		||||
 | 
			
		||||
  typedef uint64_t T;
 | 
			
		||||
  int words = bytes/sizeof(T);
 | 
			
		||||
 | 
			
		||||
  assert(((size_t)bytes &(sizeof(T)-1))==0);
 | 
			
		||||
  assert(gme == ShmRank);
 | 
			
		||||
 | 
			
		||||
  if ( small && (gdest !=MPI_UNDEFINED) ) {
 | 
			
		||||
 | 
			
		||||
    char *to_ptr   = (char *)ShmCommBufs[gdest];
 | 
			
		||||
 | 
			
		||||
    assert(gme != gdest);
 | 
			
		||||
 | 
			
		||||
    T *ip = (T *)xmit;
 | 
			
		||||
    T *op = (T *)to_ptr;
 | 
			
		||||
PARALLEL_FOR_LOOP 
 | 
			
		||||
    for(int w=0;w<words;w++) {
 | 
			
		||||
      op[w]=ip[w];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
 | 
			
		||||
    bcopy(&  sequence,&to_ptr[bytes+4],sizeof(sequence));
 | 
			
		||||
  } else { 
 | 
			
		||||
    ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(xrq);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  this->StencilBarrier();
 | 
			
		||||
  
 | 
			
		||||
  if (small && (gfrom !=MPI_UNDEFINED) ) {
 | 
			
		||||
    T *ip = (T *)from_ptr;
 | 
			
		||||
    T *op = (T *)recv;
 | 
			
		||||
PARALLEL_FOR_LOOP 
 | 
			
		||||
    for(int w=0;w<words;w++) {
 | 
			
		||||
      op[w]=ip[w];
 | 
			
		||||
    }
 | 
			
		||||
    bcopy(&from_ptr[bytes]  ,&tag  ,sizeof(tag));
 | 
			
		||||
    bcopy(&from_ptr[bytes+4],&check,sizeof(check));
 | 
			
		||||
    assert(check==sequence);
 | 
			
		||||
    assert(tag==from);
 | 
			
		||||
  } else { 
 | 
			
		||||
    ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(rrq);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  this->StencilBarrier();
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
  int rank = _processor;
 | 
			
		||||
  int ierr;
 | 
			
		||||
  ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
  ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
			
		||||
  
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
 | 
			
		||||
  list.push_back(xrq);
 | 
			
		||||
  list.push_back(rrq);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						       void *xmit,
 | 
			
		||||
						       int dest,
 | 
			
		||||
						       void *recv,
 | 
			
		||||
						       int from,
 | 
			
		||||
						       int bytes)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
 | 
			
		||||
  int ierr;
 | 
			
		||||
 | 
			
		||||
  assert(dest != _processor);
 | 
			
		||||
  assert(from != _processor);
 | 
			
		||||
  
 | 
			
		||||
  int gdest = GroupRanks[dest];
 | 
			
		||||
  int gfrom = GroupRanks[from];
 | 
			
		||||
  int gme   = GroupRanks[_processor];
 | 
			
		||||
 | 
			
		||||
  assert(gme == ShmRank);
 | 
			
		||||
 | 
			
		||||
  if ( gdest == MPI_UNDEFINED ) {
 | 
			
		||||
    ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(xrq);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if ( gfrom ==MPI_UNDEFINED) {
 | 
			
		||||
    ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(rrq);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  SendToRecvFromComplete(list);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::StencilBarrier(void)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Win_sync (ShmWindow);   
 | 
			
		||||
  MPI_Barrier  (ShmComm);
 | 
			
		||||
  MPI_Win_sync (ShmWindow);   
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  int nreq=list.size();
 | 
			
		||||
  std::vector<MPI_Status> status(nreq);
 | 
			
		||||
  int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Barrier(void)
 | 
			
		||||
{
 | 
			
		||||
  int ierr = MPI_Barrier(communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int ierr=MPI_Bcast(data,
 | 
			
		||||
		     bytes,
 | 
			
		||||
		     MPI_BYTE,
 | 
			
		||||
		     root,
 | 
			
		||||
		     communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int ierr= MPI_Bcast(data,
 | 
			
		||||
		      bytes,
 | 
			
		||||
		      MPI_BYTE,
 | 
			
		||||
		      root,
 | 
			
		||||
		      communicator_world);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1,6 +1,49 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_none.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Info that is setup once and indept of cartesian layout
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char *** arv)
 | 
			
		||||
{
 | 
			
		||||
  WorldRank = 0;
 | 
			
		||||
  WorldSize = 1;
 | 
			
		||||
  ShmRank=0;
 | 
			
		||||
  ShmSize=1;
 | 
			
		||||
  GroupRank=WorldRank;
 | 
			
		||||
  GroupSize=WorldSize;
 | 
			
		||||
  Slave    =0;
 | 
			
		||||
  ShmInitGeneric();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
@@ -20,17 +63,14 @@ void CartesianCommunicator::GlobalSum(float &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(float *,int N){}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(double &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::RecvFrom(void *recv,
 | 
			
		||||
				     int recv_from_rank,
 | 
			
		||||
				     int bytes) 
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendTo(void *xmit,
 | 
			
		||||
				   int xmit_to_rank,
 | 
			
		||||
				   int bytes)
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int xmit_to_rank,
 | 
			
		||||
					   int recv_from_rank,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
@@ -59,30 +99,16 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Barrier(void)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Barrier(void){}
 | 
			
		||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
 | 
			
		||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
 | 
			
		||||
int  CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) {  return 0;}
 | 
			
		||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){  assert(0);}
 | 
			
		||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
 | 
			
		||||
{
 | 
			
		||||
  source =0;
 | 
			
		||||
  dest=0;
 | 
			
		||||
}
 | 
			
		||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										338
									
								
								lib/communicator/Communicator_shmem.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										338
									
								
								lib/communicator/Communicator_shmem.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,338 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_shmem.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
#include <mpp/shmem.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  // Should error check all MPI calls.
 | 
			
		||||
#define SHMEM_VET(addr) 
 | 
			
		||||
 | 
			
		||||
#define SHMEM_VET_DEBUG(addr) {				\
 | 
			
		||||
  if ( ! shmem_addr_accessible(addr,_processor) ) {\
 | 
			
		||||
    std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \
 | 
			
		||||
    BACKTRACEFILE();		   \
 | 
			
		||||
  }\
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Info that is setup once and indept of cartesian layout
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
typedef struct HandShake_t { 
 | 
			
		||||
  uint64_t seq_local;
 | 
			
		||||
  uint64_t seq_remote;
 | 
			
		||||
} HandShake;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static Vector< HandShake > XConnections;
 | 
			
		||||
static Vector< HandShake > RConnections;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
  shmem_init();
 | 
			
		||||
  XConnections.resize(shmem_n_pes());
 | 
			
		||||
  RConnections.resize(shmem_n_pes());
 | 
			
		||||
  for(int pe =0 ; pe<shmem_n_pes();pe++){
 | 
			
		||||
    XConnections[pe].seq_local = 0;
 | 
			
		||||
    XConnections[pe].seq_remote= 0;
 | 
			
		||||
    RConnections[pe].seq_local = 0;
 | 
			
		||||
    RConnections[pe].seq_remote= 0;
 | 
			
		||||
  }
 | 
			
		||||
  WorldSize = shmem_n_pes();
 | 
			
		||||
  WorldRank = shmem_my_pe();
 | 
			
		||||
  ShmRank=0;
 | 
			
		||||
  ShmSize=1;
 | 
			
		||||
  GroupRank=WorldRank;
 | 
			
		||||
  GroupSize=WorldSize;
 | 
			
		||||
  Slave    =0;
 | 
			
		||||
  shmem_barrier_all();
 | 
			
		||||
  ShmInitGeneric();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
  _ndimension = processors.size();
 | 
			
		||||
  std::vector<int> periodic(_ndimension,1);
 | 
			
		||||
 | 
			
		||||
  _Nprocessors=1;
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
  _processor_coor.resize(_ndimension);
 | 
			
		||||
 | 
			
		||||
  _processor = shmem_my_pe();
 | 
			
		||||
  
 | 
			
		||||
  Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors);
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<_ndimension;i++){
 | 
			
		||||
    _Nprocessors*=_processors[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int Size = shmem_n_pes(); 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  assert(Size==_Nprocessors);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
			
		||||
  static long long source ;
 | 
			
		||||
  static long long dest   ;
 | 
			
		||||
  static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long      psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  //  int nreduce=1;
 | 
			
		||||
  //  int pestart=0;
 | 
			
		||||
  //  int logStride=0;
 | 
			
		||||
 | 
			
		||||
  source = u;
 | 
			
		||||
  dest   = 0;
 | 
			
		||||
  shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  shmem_barrier_all(); // necessary?
 | 
			
		||||
  u = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
			
		||||
  static long long source ;
 | 
			
		||||
  static long long dest   ;
 | 
			
		||||
  static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long      psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  //  int nreduce=1;
 | 
			
		||||
  //  int pestart=0;
 | 
			
		||||
  //  int logStride=0;
 | 
			
		||||
 | 
			
		||||
  source = u;
 | 
			
		||||
  dest   = 0;
 | 
			
		||||
  shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  shmem_barrier_all(); // necessary?
 | 
			
		||||
  u = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
			
		||||
  static float source ;
 | 
			
		||||
  static float dest   ;
 | 
			
		||||
  static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  source = f;
 | 
			
		||||
  dest   =0.0;
 | 
			
		||||
  shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  f = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(float *f,int N)
 | 
			
		||||
{
 | 
			
		||||
  static float source ;
 | 
			
		||||
  static float dest   = 0 ;
 | 
			
		||||
  static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  if ( shmem_addr_accessible(f,_processor)  ){
 | 
			
		||||
    shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    dest   =0.0;
 | 
			
		||||
    source = f[i];
 | 
			
		||||
    shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    f[i] = dest;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(double &d)
 | 
			
		||||
{
 | 
			
		||||
  static double source;
 | 
			
		||||
  static double dest  ;
 | 
			
		||||
  static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  source = d;
 | 
			
		||||
  dest   = 0;
 | 
			
		||||
  shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  d = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *d,int N)
 | 
			
		||||
{
 | 
			
		||||
  static double source ;
 | 
			
		||||
  static double dest   ;
 | 
			
		||||
  static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  if ( shmem_addr_accessible(d,_processor)  ){
 | 
			
		||||
    shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    source = d[i];
 | 
			
		||||
    dest   =0.0;
 | 
			
		||||
    shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    d[i] = dest;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<int> coor = _processor_coor;
 | 
			
		||||
 | 
			
		||||
  assert(std::abs(shift) <_processors[dim]);
 | 
			
		||||
 | 
			
		||||
  coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,source,_processors);
 | 
			
		||||
 | 
			
		||||
  coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,dest,_processors);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  int rank;
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,rank,_processors);
 | 
			
		||||
  return rank;
 | 
			
		||||
}
 | 
			
		||||
void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  Lexicographic::CoorFromIndex(coor,rank,_processors);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
					   int dest,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int from,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  SHMEM_VET(xmit);
 | 
			
		||||
  SHMEM_VET(recv);
 | 
			
		||||
  std::vector<CommsRequest_t> reqs(0);
 | 
			
		||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromComplete(reqs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int sender,
 | 
			
		||||
					   int receiver,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  static uint64_t seq;
 | 
			
		||||
 | 
			
		||||
  assert(recv!=xmit);
 | 
			
		||||
  volatile HandShake *RecvSeq = (volatile HandShake *) & RConnections[sender];
 | 
			
		||||
  volatile HandShake *SendSeq = (volatile HandShake *) & XConnections[receiver];
 | 
			
		||||
 | 
			
		||||
  if ( _processor == sender ) {
 | 
			
		||||
 | 
			
		||||
    // Check he has posted a receive
 | 
			
		||||
    while(SendSeq->seq_remote == SendSeq->seq_local);
 | 
			
		||||
 | 
			
		||||
    // Advance our send count
 | 
			
		||||
    seq = ++(SendSeq->seq_local);
 | 
			
		||||
    
 | 
			
		||||
    // Send this packet 
 | 
			
		||||
    SHMEM_VET(recv);
 | 
			
		||||
    shmem_putmem(recv,xmit,bytes,receiver);
 | 
			
		||||
    shmem_fence();
 | 
			
		||||
 | 
			
		||||
    //Notify him we're done
 | 
			
		||||
    shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
 | 
			
		||||
    shmem_fence();
 | 
			
		||||
  }
 | 
			
		||||
  if ( _processor == receiver ) {
 | 
			
		||||
 | 
			
		||||
    // Post a receive
 | 
			
		||||
    seq = ++(RecvSeq->seq_local);
 | 
			
		||||
    shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
 | 
			
		||||
 | 
			
		||||
    // Now wait until he has advanced our reception counter
 | 
			
		||||
    while(RecvSeq->seq_remote != RecvSeq->seq_local);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  SHMEM_VET(xmit);
 | 
			
		||||
  SHMEM_VET(recv);
 | 
			
		||||
  //  shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
 | 
			
		||||
  shmem_putmem(recv,xmit,bytes,dest);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  //  shmem_quiet();      // I'm done
 | 
			
		||||
  shmem_barrier_all();// He's done too
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::Barrier(void)
 | 
			
		||||
{
 | 
			
		||||
  shmem_barrier_all();
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
  static uint32_t word;
 | 
			
		||||
  uint32_t *array = (uint32_t *) data;
 | 
			
		||||
  assert( (bytes % 4)==0);
 | 
			
		||||
  int words = bytes/4;
 | 
			
		||||
 | 
			
		||||
  if ( shmem_addr_accessible(data,_processor)  ){
 | 
			
		||||
    shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int w=0;w<words;w++){
 | 
			
		||||
    word = array[w];
 | 
			
		||||
    shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
 | 
			
		||||
    if ( shmem_my_pe() != root ) {
 | 
			
		||||
      array[w] = word;
 | 
			
		||||
    }
 | 
			
		||||
    shmem_barrier_all();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
  static uint32_t word;
 | 
			
		||||
  uint32_t *array = (uint32_t *) data;
 | 
			
		||||
  assert( (bytes % 4)==0);
 | 
			
		||||
  int words = bytes/4;
 | 
			
		||||
 | 
			
		||||
  for(int w=0;w<words;w++){
 | 
			
		||||
    word = array[w];
 | 
			
		||||
    shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
 | 
			
		||||
    if ( shmem_my_pe() != root ) {
 | 
			
		||||
      array[w]= word;
 | 
			
		||||
    }
 | 
			
		||||
    shmem_barrier_all();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cshift/Cshift_common.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_COMMON_H_
 | 
			
		||||
#define _GRID_CSHIFT_COMMON_H_
 | 
			
		||||
 | 
			
		||||
@@ -8,7 +36,7 @@ class SimpleCompressor {
 | 
			
		||||
public:
 | 
			
		||||
  void Point(int) {};
 | 
			
		||||
 | 
			
		||||
  vobj operator() (const vobj &arg,int dimension,int plane,int osite,GridBase *grid) {
 | 
			
		||||
  vobj operator() (const vobj &arg) {
 | 
			
		||||
    return arg;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@@ -17,7 +45,7 @@ public:
 | 
			
		||||
// Gather for when there is no need to SIMD split with compression
 | 
			
		||||
///////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class vobj,class cobj,class compressor> void 
 | 
			
		||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
 | 
			
		||||
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -30,26 +58,32 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
  int stride=rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  if ( cbmask == 0x3 ) { 
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
	int o  = n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int bo = n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
	buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	int o  = n*stride;
 | 
			
		||||
	int bo = n*e2;
 | 
			
		||||
	buffer[off+bo+b]=compress(rhs._odata[so+o+b]);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else { 
 | 
			
		||||
     int bo=0;
 | 
			
		||||
     std::vector<std::pair<int,int> > table;
 | 
			
		||||
     for(int n=0;n<e1;n++){
 | 
			
		||||
       for(int b=0;b<e2;b++){
 | 
			
		||||
	 int o  = n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	 int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
 | 
			
		||||
	 int o  = n*stride;
 | 
			
		||||
	 int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
 | 
			
		||||
	 if ( ocb &cbmask ) {
 | 
			
		||||
	   buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	   table.push_back(std::pair<int,int> (bo++,o+b));
 | 
			
		||||
	 }
 | 
			
		||||
       }
 | 
			
		||||
     }
 | 
			
		||||
PARALLEL_FOR_LOOP     
 | 
			
		||||
     for(int i=0;i<table.size();i++){
 | 
			
		||||
       buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
 | 
			
		||||
     }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -70,16 +104,17 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
 | 
			
		||||
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
  
 | 
			
		||||
  int n1=rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  int n2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
  if ( cbmask ==0x3){
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 | 
			
		||||
	int o=n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int offset = b+n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
	int o      =   n*n1;
 | 
			
		||||
	int offset = b+n*n2;
 | 
			
		||||
	cobj temp =compress(rhs._odata[so+o+b]);
 | 
			
		||||
 | 
			
		||||
	cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	extract<cobj>(temp,pointers,offset);
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
@@ -87,6 +122,7 @@ PARALLEL_NESTED_LOOP2
 | 
			
		||||
  } else { 
 | 
			
		||||
 | 
			
		||||
    assert(0); //Fixme think this is buggy
 | 
			
		||||
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
	int o=n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
@@ -94,7 +130,7 @@ PARALLEL_NESTED_LOOP2
 | 
			
		||||
	int offset = b+n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
	if ( ocb & cbmask ) {
 | 
			
		||||
	  cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	  cobj temp =compress(rhs._odata[so+o+b]);
 | 
			
		||||
	  extract<cobj>(temp,pointers,offset);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
@@ -105,7 +141,7 @@ PARALLEL_NESTED_LOOP2
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// Gather for when there is no need to SIMD split
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer,             int dimension,int plane,int cbmask)
 | 
			
		||||
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  SimpleCompressor<vobj> dontcompress;
 | 
			
		||||
  Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
 | 
			
		||||
@@ -123,7 +159,7 @@ template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vec
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// Scatter for when there is no need to SIMD split
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
 | 
			
		||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -216,13 +252,13 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
 | 
			
		||||
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
  int stride = rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  if(cbmask == 0x3 ){
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 
 | 
			
		||||
        int o =n*rhs._grid->_slice_stride[dimension]+b;
 | 
			
		||||
        int o =n*stride+b;
 | 
			
		||||
  	//lhs._odata[lo+o]=rhs._odata[ro+o];
 | 
			
		||||
	vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
 | 
			
		||||
      }
 | 
			
		||||
@@ -232,7 +268,7 @@ PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 
 | 
			
		||||
        int o =n*rhs._grid->_slice_stride[dimension]+b;
 | 
			
		||||
        int o =n*stride+b;
 | 
			
		||||
        int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
 | 
			
		||||
        if ( ocb&cbmask ) {
 | 
			
		||||
  	//lhs._odata[lo+o]=rhs._odata[ro+o];
 | 
			
		||||
@@ -258,11 +294,12 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
 | 
			
		||||
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
			
		||||
  int e2=rhs._grid->_slice_block [dimension];
 | 
			
		||||
  int stride = rhs._grid->_slice_stride[dimension];
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<e1;n++){
 | 
			
		||||
  for(int b=0;b<e2;b++){
 | 
			
		||||
 | 
			
		||||
      int o  =n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
      int o  =n*stride;
 | 
			
		||||
      int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
 | 
			
		||||
      if ( ocb&cbmask ) {
 | 
			
		||||
	permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
 | 
			
		||||
@@ -296,6 +333,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
  int rd = grid->_rdimensions[dimension];
 | 
			
		||||
  int ld = grid->_ldimensions[dimension];
 | 
			
		||||
  int gd = grid->_gdimensions[dimension];
 | 
			
		||||
  int ly = grid->_simd_layout[dimension];
 | 
			
		||||
 | 
			
		||||
  // Map to always positive shift modulo global full dimension.
 | 
			
		||||
  shift = (shift+fd)%fd;
 | 
			
		||||
@@ -304,6 +342,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
  // the permute type
 | 
			
		||||
  int permute_dim =grid->PermuteDim(dimension);
 | 
			
		||||
  int permute_type=grid->PermuteType(dimension);
 | 
			
		||||
  int permute_type_dist;
 | 
			
		||||
 | 
			
		||||
  for(int x=0;x<rd;x++){       
 | 
			
		||||
 | 
			
		||||
@@ -315,15 +354,31 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
    int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
			
		||||
    int sx     = (x+sshift)%rd;
 | 
			
		||||
 | 
			
		||||
    // FIXME : This must change where we have a 
 | 
			
		||||
    // Rotate slice.
 | 
			
		||||
    
 | 
			
		||||
    // Document how this works ; why didn't I do this when I first wrote it...
 | 
			
		||||
    // wrap is whether sshift > rd.
 | 
			
		||||
    //  num is sshift mod rd.
 | 
			
		||||
    // 
 | 
			
		||||
    int permute_slice=0;
 | 
			
		||||
    if(permute_dim){
 | 
			
		||||
      int wrap = sshift/rd;
 | 
			
		||||
      int  num = sshift%rd;
 | 
			
		||||
 | 
			
		||||
      if ( x< rd-num ) permute_slice=wrap;
 | 
			
		||||
      else permute_slice = 1-wrap;
 | 
			
		||||
      else permute_slice = (wrap+1)%ly;
 | 
			
		||||
 | 
			
		||||
      if ( (ly>2) && (permute_slice) ) {
 | 
			
		||||
	assert(permute_type & RotateBit);
 | 
			
		||||
	permute_type_dist = permute_type|permute_slice;
 | 
			
		||||
      } else {
 | 
			
		||||
	permute_type_dist = permute_type;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type);
 | 
			
		||||
    if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
 | 
			
		||||
    else                 Copy_plane(ret,rhs,dimension,x,sx,cbmask); 
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cshift/Cshift_mpi.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_MPI_H_
 | 
			
		||||
#define _GRID_CSHIFT_MPI_H_
 | 
			
		||||
 | 
			
		||||
@@ -91,8 +119,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
 | 
			
		||||
  assert(shift<fd);
 | 
			
		||||
  
 | 
			
		||||
  int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
 | 
			
		||||
  std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size);
 | 
			
		||||
  std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
 | 
			
		||||
  commVector<vobj> send_buf(buffer_size);
 | 
			
		||||
  commVector<vobj> recv_buf(buffer_size);
 | 
			
		||||
 | 
			
		||||
  int cb= (cbmask==0x2)? Odd : Even;
 | 
			
		||||
  int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
			
		||||
@@ -163,11 +191,12 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
 | 
			
		||||
  int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
 | 
			
		||||
  int words = sizeof(vobj)/sizeof(vector_type);
 | 
			
		||||
 | 
			
		||||
  std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
 | 
			
		||||
  std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
 | 
			
		||||
  std::vector<commVector<scalar_object> >   send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
 | 
			
		||||
  std::vector<commVector<scalar_object> >   recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
 | 
			
		||||
 | 
			
		||||
  int bytes = buffer_size*sizeof(scalar_object);
 | 
			
		||||
 | 
			
		||||
  std::vector<scalar_object *>  pointers(Nsimd);  // 
 | 
			
		||||
  std::vector<scalar_object *>  pointers(Nsimd); // 
 | 
			
		||||
  std::vector<scalar_object *> rpointers(Nsimd); // received pointers
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cshift/Cshift_none.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_NONE_H_
 | 
			
		||||
#define _GRID_CSHIFT_NONE_H_
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										412
									
								
								lib/fftw/fftw3.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										412
									
								
								lib/fftw/fftw3.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,412 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (c) 2003, 2007-14 Matteo Frigo
 | 
			
		||||
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
 | 
			
		||||
 *
 | 
			
		||||
 * The following statement of license applies *only* to this header file,
 | 
			
		||||
 * and *not* to the other files distributed with FFTW or derived therefrom:
 | 
			
		||||
 * 
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *
 | 
			
		||||
 * 1. Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *
 | 
			
		||||
 * 2. Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 | 
			
		||||
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 | 
			
		||||
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 | 
			
		||||
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
			
		||||
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 | 
			
		||||
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | 
			
		||||
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 | 
			
		||||
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 | 
			
		||||
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 | 
			
		||||
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/***************************** NOTE TO USERS *********************************
 | 
			
		||||
 *
 | 
			
		||||
 *                 THIS IS A HEADER FILE, NOT A MANUAL
 | 
			
		||||
 *
 | 
			
		||||
 *    If you want to know how to use FFTW, please read the manual,
 | 
			
		||||
 *    online at http://www.fftw.org/doc/ and also included with FFTW.
 | 
			
		||||
 *    For a quick start, see the manual's tutorial section.
 | 
			
		||||
 *
 | 
			
		||||
 *   (Reading header files to learn how to use a library is a habit
 | 
			
		||||
 *    stemming from code lacking a proper manual.  Arguably, it's a
 | 
			
		||||
 *    *bad* habit in most cases, because header files can contain
 | 
			
		||||
 *    interfaces that are not part of the public, stable API.)
 | 
			
		||||
 *
 | 
			
		||||
 ****************************************************************************/
 | 
			
		||||
 | 
			
		||||
#ifndef FFTW3_H
 | 
			
		||||
#define FFTW3_H
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
extern "C"
 | 
			
		||||
{
 | 
			
		||||
#endif /* __cplusplus */
 | 
			
		||||
 | 
			
		||||
/* If <complex.h> is included, use the C99 complex type.  Otherwise
 | 
			
		||||
   define a type bit-compatible with C99 complex */
 | 
			
		||||
#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
 | 
			
		||||
#  define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C
 | 
			
		||||
#else
 | 
			
		||||
#  define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2]
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define FFTW_CONCAT(prefix, name) prefix ## name
 | 
			
		||||
#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name)
 | 
			
		||||
#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name)
 | 
			
		||||
#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name)
 | 
			
		||||
#define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name)
 | 
			
		||||
 | 
			
		||||
/* IMPORTANT: for Windows compilers, you should add a line
 | 
			
		||||
        #define FFTW_DLL
 | 
			
		||||
   here and in kernel/ifftw.h if you are compiling/using FFTW as a
 | 
			
		||||
   DLL, in order to do the proper importing/exporting, or
 | 
			
		||||
   alternatively compile with -DFFTW_DLL or the equivalent
 | 
			
		||||
   command-line flag.  This is not necessary under MinGW/Cygwin, where
 | 
			
		||||
   libtool does the imports/exports automatically. */
 | 
			
		||||
#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__))
 | 
			
		||||
   /* annoying Windows syntax for shared-library declarations */
 | 
			
		||||
#  if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */
 | 
			
		||||
#    define FFTW_EXTERN extern __declspec(dllexport) 
 | 
			
		||||
#  else /* user is calling FFTW; import symbol */
 | 
			
		||||
#    define FFTW_EXTERN extern __declspec(dllimport) 
 | 
			
		||||
#  endif
 | 
			
		||||
#else
 | 
			
		||||
#  define FFTW_EXTERN extern
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
enum fftw_r2r_kind_do_not_use_me {
 | 
			
		||||
     FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
 | 
			
		||||
     FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
 | 
			
		||||
     FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct fftw_iodim_do_not_use_me {
 | 
			
		||||
     int n;                     /* dimension size */
 | 
			
		||||
     int is;			/* input stride */
 | 
			
		||||
     int os;			/* output stride */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#include <stddef.h> /* for ptrdiff_t */
 | 
			
		||||
struct fftw_iodim64_do_not_use_me {
 | 
			
		||||
     ptrdiff_t n;                     /* dimension size */
 | 
			
		||||
     ptrdiff_t is;			/* input stride */
 | 
			
		||||
     ptrdiff_t os;			/* output stride */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *);
 | 
			
		||||
typedef int (*fftw_read_char_func_do_not_use_me)(void *);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
  huge second-order macro that defines prototypes for all API
 | 
			
		||||
  functions.  We expand this macro for each supported precision
 | 
			
		||||
 
 | 
			
		||||
  X: name-mangling macro
 | 
			
		||||
  R: real data type
 | 
			
		||||
  C: complex data type
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#define FFTW_DEFINE_API(X, R, C)					   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_DEFINE_COMPLEX(R, C);						   \
 | 
			
		||||
									   \
 | 
			
		||||
typedef struct X(plan_s) *X(plan);					   \
 | 
			
		||||
									   \
 | 
			
		||||
typedef struct fftw_iodim_do_not_use_me X(iodim);			   \
 | 
			
		||||
typedef struct fftw_iodim64_do_not_use_me X(iodim64);			   \
 | 
			
		||||
									   \
 | 
			
		||||
typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind);			   \
 | 
			
		||||
									   \
 | 
			
		||||
typedef fftw_write_char_func_do_not_use_me X(write_char_func);		   \
 | 
			
		||||
typedef fftw_read_char_func_do_not_use_me X(read_char_func);		   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(execute)(const X(plan) p);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n,			   \
 | 
			
		||||
		    C *in, C *out, int sign, unsigned flags);		   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign,	   \
 | 
			
		||||
		       unsigned flags);					   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1,			   \
 | 
			
		||||
		       C *in, C *out, int sign, unsigned flags);	   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2,		   \
 | 
			
		||||
		       C *in, C *out, int sign, unsigned flags);	   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n,		   \
 | 
			
		||||
                         int howmany,					   \
 | 
			
		||||
                         C *in, const int *inembed,			   \
 | 
			
		||||
                         int istride, int idist,			   \
 | 
			
		||||
                         C *out, const int *onembed,			   \
 | 
			
		||||
                         int ostride, int odist,			   \
 | 
			
		||||
                         int sign, unsigned flags);			   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims,	   \
 | 
			
		||||
			 int howmany_rank,				   \
 | 
			
		||||
			 const X(iodim) *howmany_dims,			   \
 | 
			
		||||
			 C *in, C *out,					   \
 | 
			
		||||
			 int sign, unsigned flags);			   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \
 | 
			
		||||
			 int howmany_rank,				   \
 | 
			
		||||
			 const X(iodim) *howmany_dims,			   \
 | 
			
		||||
			 R *ri, R *ii, R *ro, R *io,			   \
 | 
			
		||||
			 unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank,			   \
 | 
			
		||||
                         const X(iodim64) *dims,			   \
 | 
			
		||||
			 int howmany_rank,				   \
 | 
			
		||||
			 const X(iodim64) *howmany_dims,		   \
 | 
			
		||||
			 C *in, C *out,					   \
 | 
			
		||||
			 int sign, unsigned flags);			   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank,			   \
 | 
			
		||||
                         const X(iodim64) *dims,			   \
 | 
			
		||||
			 int howmany_rank,				   \
 | 
			
		||||
			 const X(iodim64) *howmany_dims,		   \
 | 
			
		||||
			 R *ri, R *ii, R *ro, R *io,			   \
 | 
			
		||||
			 unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out);	   \
 | 
			
		||||
FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii,	   \
 | 
			
		||||
                                      R *ro, R *io);			   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n,	   \
 | 
			
		||||
                             int howmany,				   \
 | 
			
		||||
                             R *in, const int *inembed,			   \
 | 
			
		||||
                             int istride, int idist,			   \
 | 
			
		||||
                             C *out, const int *onembed,		   \
 | 
			
		||||
                             int ostride, int odist,			   \
 | 
			
		||||
                             unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n,		   \
 | 
			
		||||
                        R *in, C *out, unsigned flags);			   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1,			   \
 | 
			
		||||
			   R *in, C *out, unsigned flags);		   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1,			   \
 | 
			
		||||
			   int n2,					   \
 | 
			
		||||
			   R *in, C *out, unsigned flags);		   \
 | 
			
		||||
									   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n,	   \
 | 
			
		||||
			     int howmany,				   \
 | 
			
		||||
			     C *in, const int *inembed,			   \
 | 
			
		||||
			     int istride, int idist,			   \
 | 
			
		||||
			     R *out, const int *onembed,		   \
 | 
			
		||||
			     int ostride, int odist,			   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n,		   \
 | 
			
		||||
                        C *in, R *out, unsigned flags);			   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1,			   \
 | 
			
		||||
			   C *in, R *out, unsigned flags);		   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1,			   \
 | 
			
		||||
			   int n2,					   \
 | 
			
		||||
			   C *in, R *out, unsigned flags);		   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims,   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim) *howmany_dims,		   \
 | 
			
		||||
			     R *in, C *out,				   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims,   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim) *howmany_dims,		   \
 | 
			
		||||
			     C *in, R *out,				   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)(				   \
 | 
			
		||||
                             int rank, const X(iodim) *dims,		   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim) *howmany_dims,		   \
 | 
			
		||||
			     R *in, R *ro, R *io,			   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)(				   \
 | 
			
		||||
                             int rank, const X(iodim) *dims,		   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim) *howmany_dims,		   \
 | 
			
		||||
			     R *ri, R *ii, R *out,			   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank,			   \
 | 
			
		||||
                             const X(iodim64) *dims,			   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim64) *howmany_dims,		   \
 | 
			
		||||
			     R *in, C *out,				   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank,			   \
 | 
			
		||||
                             const X(iodim64) *dims,			   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim64) *howmany_dims,		   \
 | 
			
		||||
			     C *in, R *out,				   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)(			   \
 | 
			
		||||
                             int rank, const X(iodim64) *dims,		   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim64) *howmany_dims,		   \
 | 
			
		||||
			     R *in, R *ro, R *io,			   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)(			   \
 | 
			
		||||
                             int rank, const X(iodim64) *dims,		   \
 | 
			
		||||
			     int howmany_rank,				   \
 | 
			
		||||
			     const X(iodim64) *howmany_dims,		   \
 | 
			
		||||
			     R *ri, R *ii, R *out,			   \
 | 
			
		||||
			     unsigned flags);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out);	   \
 | 
			
		||||
FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out);	   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p,		   \
 | 
			
		||||
                                          R *in, R *ro, R *io);		   \
 | 
			
		||||
FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p,		   \
 | 
			
		||||
                                          R *ri, R *ii, R *out);	   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n,		   \
 | 
			
		||||
                         int howmany,					   \
 | 
			
		||||
                         R *in, const int *inembed,			   \
 | 
			
		||||
                         int istride, int idist,			   \
 | 
			
		||||
                         R *out, const int *onembed,			   \
 | 
			
		||||
                         int ostride, int odist,			   \
 | 
			
		||||
                         const X(r2r_kind) *kind, unsigned flags);	   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out,	   \
 | 
			
		||||
                    const X(r2r_kind) *kind, unsigned flags);		   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out,		   \
 | 
			
		||||
                       X(r2r_kind) kind, unsigned flags);		   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out,	   \
 | 
			
		||||
                       X(r2r_kind) kind0, X(r2r_kind) kind1,		   \
 | 
			
		||||
                       unsigned flags);					   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2,		   \
 | 
			
		||||
                       R *in, R *out, X(r2r_kind) kind0,		   \
 | 
			
		||||
                       X(r2r_kind) kind1, X(r2r_kind) kind2,		   \
 | 
			
		||||
                       unsigned flags);					   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims,	   \
 | 
			
		||||
                         int howmany_rank,				   \
 | 
			
		||||
                         const X(iodim) *howmany_dims,			   \
 | 
			
		||||
                         R *in, R *out,					   \
 | 
			
		||||
                         const X(r2r_kind) *kind, unsigned flags);	   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims,   \
 | 
			
		||||
                         int howmany_rank,				   \
 | 
			
		||||
                         const X(iodim64) *howmany_dims,		   \
 | 
			
		||||
                         R *in, R *out,					   \
 | 
			
		||||
                         const X(r2r_kind) *kind, unsigned flags);	   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out);	   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(destroy_plan)(X(plan) p);				   \
 | 
			
		||||
FFTW_EXTERN void X(forget_wisdom)(void);				   \
 | 
			
		||||
FFTW_EXTERN void X(cleanup)(void);					   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(set_timelimit)(double t);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(plan_with_nthreads)(int nthreads);			   \
 | 
			
		||||
FFTW_EXTERN int X(init_threads)(void);					   \
 | 
			
		||||
FFTW_EXTERN void X(cleanup_threads)(void);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename);	   \
 | 
			
		||||
FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file);		   \
 | 
			
		||||
FFTW_EXTERN char *X(export_wisdom_to_string)(void);			   \
 | 
			
		||||
FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char,   	   \
 | 
			
		||||
                                  void *data);				   \
 | 
			
		||||
FFTW_EXTERN int X(import_system_wisdom)(void);				   \
 | 
			
		||||
FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename);	   \
 | 
			
		||||
FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file);		   \
 | 
			
		||||
FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string);	   \
 | 
			
		||||
FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file);	   \
 | 
			
		||||
FFTW_EXTERN void X(print_plan)(const X(plan) p);			   \
 | 
			
		||||
FFTW_EXTERN char *X(sprint_plan)(const X(plan) p);			   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void *X(malloc)(size_t n);					   \
 | 
			
		||||
FFTW_EXTERN R *X(alloc_real)(size_t n);					   \
 | 
			
		||||
FFTW_EXTERN C *X(alloc_complex)(size_t n);				   \
 | 
			
		||||
FFTW_EXTERN void X(free)(void *p);					   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN void X(flops)(const X(plan) p,				   \
 | 
			
		||||
                          double *add, double *mul, double *fmas);	   \
 | 
			
		||||
FFTW_EXTERN double X(estimate_cost)(const X(plan) p);			   \
 | 
			
		||||
FFTW_EXTERN double X(cost)(const X(plan) p);				   \
 | 
			
		||||
									   \
 | 
			
		||||
FFTW_EXTERN int X(alignment_of)(R *p);                                     \
 | 
			
		||||
FFTW_EXTERN const char X(version)[];                                       \
 | 
			
		||||
FFTW_EXTERN const char X(cc)[];						   \
 | 
			
		||||
FFTW_EXTERN const char X(codelet_optim)[];
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* end of FFTW_DEFINE_API macro */
 | 
			
		||||
 | 
			
		||||
FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex)
 | 
			
		||||
FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex)
 | 
			
		||||
FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex)
 | 
			
		||||
 | 
			
		||||
/* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64
 | 
			
		||||
   for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */
 | 
			
		||||
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \
 | 
			
		||||
 && !(defined(__ICC) || defined(__INTEL_COMPILER)) \
 | 
			
		||||
 && (defined(__i386__) || defined(__x86_64__) || defined(__ia64__))
 | 
			
		||||
#  if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
 | 
			
		||||
/* note: __float128 is a typedef, which is not supported with the _Complex
 | 
			
		||||
         keyword in gcc, so instead we use this ugly __attribute__ version.
 | 
			
		||||
         However, we can't simply pass the __attribute__ version to
 | 
			
		||||
         FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer
 | 
			
		||||
         types.  Hence redefining FFTW_DEFINE_COMPLEX.  Ugh. */
 | 
			
		||||
#    undef FFTW_DEFINE_COMPLEX
 | 
			
		||||
#    define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C
 | 
			
		||||
#  endif
 | 
			
		||||
FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define FFTW_FORWARD (-1)
 | 
			
		||||
#define FFTW_BACKWARD (+1)
 | 
			
		||||
 | 
			
		||||
#define FFTW_NO_TIMELIMIT (-1.0)
 | 
			
		||||
 | 
			
		||||
/* documented flags */
 | 
			
		||||
#define FFTW_MEASURE (0U)
 | 
			
		||||
#define FFTW_DESTROY_INPUT (1U << 0)
 | 
			
		||||
#define FFTW_UNALIGNED (1U << 1)
 | 
			
		||||
#define FFTW_CONSERVE_MEMORY (1U << 2)
 | 
			
		||||
#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */
 | 
			
		||||
#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */
 | 
			
		||||
#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */
 | 
			
		||||
#define FFTW_ESTIMATE (1U << 6)
 | 
			
		||||
#define FFTW_WISDOM_ONLY (1U << 21)
 | 
			
		||||
 | 
			
		||||
/* undocumented beyond-guru flags */
 | 
			
		||||
#define FFTW_ESTIMATE_PATIENT (1U << 7)
 | 
			
		||||
#define FFTW_BELIEVE_PCOST (1U << 8)
 | 
			
		||||
#define FFTW_NO_DFT_R2HC (1U << 9)
 | 
			
		||||
#define FFTW_NO_NONTHREADED (1U << 10)
 | 
			
		||||
#define FFTW_NO_BUFFERING (1U << 11)
 | 
			
		||||
#define FFTW_NO_INDIRECT_OP (1U << 12)
 | 
			
		||||
#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */
 | 
			
		||||
#define FFTW_NO_RANK_SPLITS (1U << 14)
 | 
			
		||||
#define FFTW_NO_VRANK_SPLITS (1U << 15)
 | 
			
		||||
#define FFTW_NO_VRECURSE (1U << 16)
 | 
			
		||||
#define FFTW_NO_SIMD (1U << 17)
 | 
			
		||||
#define FFTW_NO_SLOW (1U << 18)
 | 
			
		||||
#define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19)
 | 
			
		||||
#define FFTW_ALLOW_PRUNING (1U << 20)
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
}  /* extern "C" */
 | 
			
		||||
#endif /* __cplusplus */
 | 
			
		||||
 | 
			
		||||
#endif /* FFTW3_H */
 | 
			
		||||
@@ -1,44 +1,74 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/lattice/Lattice_ET.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_ET_H
 | 
			
		||||
#define GRID_LATTICE_ET_H
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <tuple>
 | 
			
		||||
#include <typeinfo>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  // Predicated where support
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  template<class iobj,class vobj,class robj>
 | 
			
		||||
    inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) {
 | 
			
		||||
////////////////////////////////////////////////////
 | 
			
		||||
// Predicated where support
 | 
			
		||||
////////////////////////////////////////////////////
 | 
			
		||||
template <class iobj, class vobj, class robj>
 | 
			
		||||
inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
 | 
			
		||||
                            const robj &iffalse) {
 | 
			
		||||
  typename std::remove_const<vobj>::type ret;
 | 
			
		||||
 | 
			
		||||
    typename std::remove_const<vobj>::type ret;
 | 
			
		||||
  typedef typename vobj::scalar_object scalar_object;
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
  typedef typename vobj::vector_type vector_type;
 | 
			
		||||
 | 
			
		||||
    typedef typename vobj::scalar_object scalar_object;
 | 
			
		||||
    typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
    typedef typename vobj::vector_type vector_type;
 | 
			
		||||
  const int Nsimd = vobj::vector_type::Nsimd();
 | 
			
		||||
  const int words = sizeof(vobj) / sizeof(vector_type);
 | 
			
		||||
 | 
			
		||||
    const int Nsimd = vobj::vector_type::Nsimd();
 | 
			
		||||
    const int words = sizeof(vobj)/sizeof(vector_type);
 | 
			
		||||
  std::vector<Integer> mask(Nsimd);
 | 
			
		||||
  std::vector<scalar_object> truevals(Nsimd);
 | 
			
		||||
  std::vector<scalar_object> falsevals(Nsimd);
 | 
			
		||||
 | 
			
		||||
    std::vector<Integer> mask(Nsimd);
 | 
			
		||||
    std::vector<scalar_object> truevals (Nsimd);
 | 
			
		||||
    std::vector<scalar_object> falsevals(Nsimd);
 | 
			
		||||
  extract(iftrue, truevals);
 | 
			
		||||
  extract(iffalse, falsevals);
 | 
			
		||||
  extract<vInteger, Integer>(TensorRemove(predicate), mask);
 | 
			
		||||
 | 
			
		||||
    extract(iftrue   ,truevals);
 | 
			
		||||
    extract(iffalse  ,falsevals);
 | 
			
		||||
    extract<vInteger,Integer>(TensorRemove(predicate),mask);
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<Nsimd;s++){
 | 
			
		||||
      if (mask[s]) falsevals[s]=truevals[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    merge(ret,falsevals);
 | 
			
		||||
    return ret;
 | 
			
		||||
  for (int s = 0; s < Nsimd; s++) {
 | 
			
		||||
    if (mask[s]) falsevals[s] = truevals[s];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  merge(ret, falsevals);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// recursive evaluation of expressions; Could
 | 
			
		||||
// switch to generic approach with variadics, a la
 | 
			
		||||
@@ -46,303 +76,351 @@ namespace Grid {
 | 
			
		||||
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
// leaf eval of lattice ; should enable if protect using traits
 | 
			
		||||
 | 
			
		||||
//leaf eval of lattice ; should enable if protect using traits
 | 
			
		||||
template <typename T>
 | 
			
		||||
using is_lattice = std::is_base_of<LatticeBase, T>;
 | 
			
		||||
 | 
			
		||||
template <typename T> using is_lattice      = std::is_base_of<LatticeBase,T >;
 | 
			
		||||
template <typename T>
 | 
			
		||||
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
 | 
			
		||||
 | 
			
		||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
 | 
			
		||||
 | 
			
		||||
//Specialization of getVectorType for lattices
 | 
			
		||||
template<typename T>
 | 
			
		||||
struct getVectorType<Lattice<T> >{
 | 
			
		||||
  typedef typename Lattice<T>::vector_object type;
 | 
			
		||||
};
 | 
			
		||||
 
 | 
			
		||||
template<class sobj>
 | 
			
		||||
inline sobj eval(const unsigned int ss, const sobj &arg)
 | 
			
		||||
{
 | 
			
		||||
  return arg;
 | 
			
		||||
}
 | 
			
		||||
template<class lobj>
 | 
			
		||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg)
 | 
			
		||||
{
 | 
			
		||||
    return arg._odata[ss];
 | 
			
		||||
template <class lobj>
 | 
			
		||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) {
 | 
			
		||||
  return arg._odata[ss];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// handle nodes in syntax tree
 | 
			
		||||
template <typename Op, typename T1>
 | 
			
		||||
auto inline eval(const unsigned int ss, const LatticeUnaryExpression<Op,T1 > &expr) // eval one operand
 | 
			
		||||
  -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second))))
 | 
			
		||||
{
 | 
			
		||||
  return expr.first.func(eval(ss,std::get<0>(expr.second)));
 | 
			
		||||
auto inline eval(
 | 
			
		||||
    const unsigned int ss,
 | 
			
		||||
    const LatticeUnaryExpression<Op, T1> &expr)  // eval one operand
 | 
			
		||||
    -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) {
 | 
			
		||||
  return expr.first.func(eval(ss, std::get<0>(expr.second)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1, typename T2>
 | 
			
		||||
auto inline eval(const unsigned int ss, const LatticeBinaryExpression<Op,T1,T2> &expr) // eval two operands
 | 
			
		||||
  -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second))))
 | 
			
		||||
{
 | 
			
		||||
  return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)));
 | 
			
		||||
auto inline eval(
 | 
			
		||||
    const unsigned int ss,
 | 
			
		||||
    const LatticeBinaryExpression<Op, T1, T2> &expr)  // eval two operands
 | 
			
		||||
    -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
 | 
			
		||||
                                eval(ss, std::get<1>(expr.second)))) {
 | 
			
		||||
  return expr.first.func(eval(ss, std::get<0>(expr.second)),
 | 
			
		||||
                         eval(ss, std::get<1>(expr.second)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1, typename T2, typename T3>
 | 
			
		||||
auto inline eval(const unsigned int ss, const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) // eval three operands
 | 
			
		||||
  -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second))))
 | 
			
		||||
{
 | 
			
		||||
  return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)) );
 | 
			
		||||
auto inline eval(const unsigned int ss,
 | 
			
		||||
                 const LatticeTrinaryExpression<Op, T1, T2, T3>
 | 
			
		||||
                     &expr)  // eval three operands
 | 
			
		||||
    -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
 | 
			
		||||
                                eval(ss, std::get<1>(expr.second)),
 | 
			
		||||
                                eval(ss, std::get<2>(expr.second)))) {
 | 
			
		||||
  return expr.first.func(eval(ss, std::get<0>(expr.second)),
 | 
			
		||||
                         eval(ss, std::get<1>(expr.second)),
 | 
			
		||||
                         eval(ss, std::get<2>(expr.second)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Obtain the grid from an expression, ensuring conformable. This must follow a tree recursion
 | 
			
		||||
// Obtain the grid from an expression, ensuring conformable. This must follow a
 | 
			
		||||
// tree recursion
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
 | 
			
		||||
inline void GridFromExpression(GridBase * &grid,const T1& lat)   // Lattice leaf
 | 
			
		||||
{
 | 
			
		||||
  if ( grid ) {
 | 
			
		||||
    conformable(grid,lat._grid);
 | 
			
		||||
  } 
 | 
			
		||||
  grid=lat._grid;
 | 
			
		||||
}
 | 
			
		||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
 | 
			
		||||
inline void GridFromExpression(GridBase * &grid,const T1& notlat)   // non-lattice leaf
 | 
			
		||||
template <class T1,
 | 
			
		||||
          typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
 | 
			
		||||
inline void GridFromExpression(GridBase *&grid, const T1 &lat)  // Lattice leaf
 | 
			
		||||
{
 | 
			
		||||
  if (grid) {
 | 
			
		||||
    conformable(grid, lat._grid);
 | 
			
		||||
  }
 | 
			
		||||
  grid = lat._grid;
 | 
			
		||||
}
 | 
			
		||||
template <class T1,
 | 
			
		||||
          typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
 | 
			
		||||
inline void GridFromExpression(GridBase *&grid,
 | 
			
		||||
                               const T1 ¬lat)  // non-lattice leaf
 | 
			
		||||
{}
 | 
			
		||||
template <typename Op, typename T1>
 | 
			
		||||
inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
 | 
			
		||||
{
 | 
			
		||||
  GridFromExpression(grid,std::get<0>(expr.second));// recurse 
 | 
			
		||||
inline void GridFromExpression(GridBase *&grid,
 | 
			
		||||
                               const LatticeUnaryExpression<Op, T1> &expr) {
 | 
			
		||||
  GridFromExpression(grid, std::get<0>(expr.second));  // recurse
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1, typename T2>
 | 
			
		||||
inline void GridFromExpression(GridBase * &grid,const LatticeBinaryExpression<Op,T1,T2> &expr) 
 | 
			
		||||
{
 | 
			
		||||
  GridFromExpression(grid,std::get<0>(expr.second));// recurse
 | 
			
		||||
  GridFromExpression(grid,std::get<1>(expr.second));
 | 
			
		||||
inline void GridFromExpression(
 | 
			
		||||
    GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) {
 | 
			
		||||
  GridFromExpression(grid, std::get<0>(expr.second));  // recurse
 | 
			
		||||
  GridFromExpression(grid, std::get<1>(expr.second));
 | 
			
		||||
}
 | 
			
		||||
template <typename Op, typename T1, typename T2, typename T3>
 | 
			
		||||
inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) 
 | 
			
		||||
{
 | 
			
		||||
  GridFromExpression(grid,std::get<0>(expr.second));// recurse
 | 
			
		||||
  GridFromExpression(grid,std::get<1>(expr.second));
 | 
			
		||||
  GridFromExpression(grid,std::get<2>(expr.second));
 | 
			
		||||
inline void GridFromExpression(
 | 
			
		||||
    GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
 | 
			
		||||
  GridFromExpression(grid, std::get<0>(expr.second));  // recurse
 | 
			
		||||
  GridFromExpression(grid, std::get<1>(expr.second));
 | 
			
		||||
  GridFromExpression(grid, std::get<2>(expr.second));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion
 | 
			
		||||
// Obtain the CB from an expression, ensuring conformable. This must follow a
 | 
			
		||||
// tree recursion
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
 | 
			
		||||
inline void CBFromExpression(int &cb,const T1& lat)   // Lattice leaf
 | 
			
		||||
template <class T1,
 | 
			
		||||
          typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
 | 
			
		||||
inline void CBFromExpression(int &cb, const T1 &lat)  // Lattice leaf
 | 
			
		||||
{
 | 
			
		||||
  if ( (cb==Odd) || (cb==Even) ) {
 | 
			
		||||
    assert(cb==lat.checkerboard);
 | 
			
		||||
  } 
 | 
			
		||||
  cb=lat.checkerboard;
 | 
			
		||||
  if ((cb == Odd) || (cb == Even)) {
 | 
			
		||||
    assert(cb == lat.checkerboard);
 | 
			
		||||
  }
 | 
			
		||||
  cb = lat.checkerboard;
 | 
			
		||||
  //  std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
 | 
			
		||||
}
 | 
			
		||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
 | 
			
		||||
inline void CBFromExpression(int &cb,const T1& notlat)   // non-lattice leaf
 | 
			
		||||
template <class T1,
 | 
			
		||||
          typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
 | 
			
		||||
inline void CBFromExpression(int &cb, const T1 ¬lat)  // non-lattice leaf
 | 
			
		||||
{
 | 
			
		||||
  //  std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
 | 
			
		||||
}
 | 
			
		||||
template <typename Op, typename T1>
 | 
			
		||||
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
 | 
			
		||||
{
 | 
			
		||||
  CBFromExpression(cb,std::get<0>(expr.second));// recurse 
 | 
			
		||||
inline void CBFromExpression(int &cb,
 | 
			
		||||
                             const LatticeUnaryExpression<Op, T1> &expr) {
 | 
			
		||||
  CBFromExpression(cb, std::get<0>(expr.second));  // recurse
 | 
			
		||||
  //  std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1, typename T2>
 | 
			
		||||
inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr) 
 | 
			
		||||
{
 | 
			
		||||
  CBFromExpression(cb,std::get<0>(expr.second));// recurse
 | 
			
		||||
  CBFromExpression(cb,std::get<1>(expr.second));
 | 
			
		||||
inline void CBFromExpression(int &cb,
 | 
			
		||||
                             const LatticeBinaryExpression<Op, T1, T2> &expr) {
 | 
			
		||||
  CBFromExpression(cb, std::get<0>(expr.second));  // recurse
 | 
			
		||||
  CBFromExpression(cb, std::get<1>(expr.second));
 | 
			
		||||
  //  std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
 | 
			
		||||
}
 | 
			
		||||
template <typename Op, typename T1, typename T2, typename T3>
 | 
			
		||||
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) 
 | 
			
		||||
{
 | 
			
		||||
  CBFromExpression(cb,std::get<0>(expr.second));// recurse
 | 
			
		||||
  CBFromExpression(cb,std::get<1>(expr.second));
 | 
			
		||||
  CBFromExpression(cb,std::get<2>(expr.second));
 | 
			
		||||
inline void CBFromExpression(
 | 
			
		||||
    int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
 | 
			
		||||
  CBFromExpression(cb, std::get<0>(expr.second));  // recurse
 | 
			
		||||
  CBFromExpression(cb, std::get<1>(expr.second));
 | 
			
		||||
  CBFromExpression(cb, std::get<2>(expr.second));
 | 
			
		||||
  //  std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Unary operators and funcs
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
#define GridUnopClass(name,ret)\
 | 
			
		||||
template <class arg> struct name\
 | 
			
		||||
{\
 | 
			
		||||
  static auto inline func(const arg a)-> decltype(ret) { return ret; } \
 | 
			
		||||
};
 | 
			
		||||
#define GridUnopClass(name, ret)                                          \
 | 
			
		||||
  template <class arg>                                                    \
 | 
			
		||||
  struct name {                                                           \
 | 
			
		||||
    static auto inline func(const arg a) -> decltype(ret) { return ret; } \
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
GridUnopClass(UnarySub,-a);
 | 
			
		||||
GridUnopClass(UnaryNot,Not(a));
 | 
			
		||||
GridUnopClass(UnaryAdj,adj(a));
 | 
			
		||||
GridUnopClass(UnaryConj,conjugate(a));
 | 
			
		||||
GridUnopClass(UnaryTrace,trace(a));
 | 
			
		||||
GridUnopClass(UnaryTranspose,transpose(a));
 | 
			
		||||
GridUnopClass(UnaryTa,Ta(a));
 | 
			
		||||
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
 | 
			
		||||
GridUnopClass(UnaryReal,real(a));
 | 
			
		||||
GridUnopClass(UnaryImag,imag(a));
 | 
			
		||||
GridUnopClass(UnaryToReal,toReal(a));
 | 
			
		||||
GridUnopClass(UnaryToComplex,toComplex(a));
 | 
			
		||||
GridUnopClass(UnaryAbs,abs(a));
 | 
			
		||||
GridUnopClass(UnarySqrt,sqrt(a));
 | 
			
		||||
GridUnopClass(UnaryRsqrt,rsqrt(a));
 | 
			
		||||
GridUnopClass(UnarySin,sin(a));
 | 
			
		||||
GridUnopClass(UnaryCos,cos(a));
 | 
			
		||||
GridUnopClass(UnaryLog,log(a));
 | 
			
		||||
GridUnopClass(UnaryExp,exp(a));
 | 
			
		||||
GridUnopClass(UnarySub, -a);
 | 
			
		||||
GridUnopClass(UnaryNot, Not(a));
 | 
			
		||||
GridUnopClass(UnaryAdj, adj(a));
 | 
			
		||||
GridUnopClass(UnaryConj, conjugate(a));
 | 
			
		||||
GridUnopClass(UnaryTrace, trace(a));
 | 
			
		||||
GridUnopClass(UnaryTranspose, transpose(a));
 | 
			
		||||
GridUnopClass(UnaryTa, Ta(a));
 | 
			
		||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
 | 
			
		||||
GridUnopClass(UnaryReal, real(a));
 | 
			
		||||
GridUnopClass(UnaryImag, imag(a));
 | 
			
		||||
GridUnopClass(UnaryToReal, toReal(a));
 | 
			
		||||
GridUnopClass(UnaryToComplex, toComplex(a));
 | 
			
		||||
GridUnopClass(UnaryTimesI, timesI(a));
 | 
			
		||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
 | 
			
		||||
GridUnopClass(UnaryAbs, abs(a));
 | 
			
		||||
GridUnopClass(UnarySqrt, sqrt(a));
 | 
			
		||||
GridUnopClass(UnaryRsqrt, rsqrt(a));
 | 
			
		||||
GridUnopClass(UnarySin, sin(a));
 | 
			
		||||
GridUnopClass(UnaryCos, cos(a));
 | 
			
		||||
GridUnopClass(UnaryAsin, asin(a));
 | 
			
		||||
GridUnopClass(UnaryAcos, acos(a));
 | 
			
		||||
GridUnopClass(UnaryLog, log(a));
 | 
			
		||||
GridUnopClass(UnaryExp, exp(a));
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Binary operators
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
#define GridBinOpClass(name,combination)\
 | 
			
		||||
template <class left,class right>\
 | 
			
		||||
struct name\
 | 
			
		||||
{\
 | 
			
		||||
  static auto inline func(const left &lhs,const right &rhs)-> decltype(combination) const \
 | 
			
		||||
    {\
 | 
			
		||||
      return combination;\
 | 
			
		||||
    }\
 | 
			
		||||
}
 | 
			
		||||
GridBinOpClass(BinaryAdd,lhs+rhs);
 | 
			
		||||
GridBinOpClass(BinarySub,lhs-rhs);
 | 
			
		||||
GridBinOpClass(BinaryMul,lhs*rhs);
 | 
			
		||||
#define GridBinOpClass(name, combination)                      \
 | 
			
		||||
  template <class left, class right>                           \
 | 
			
		||||
  struct name {                                                \
 | 
			
		||||
    static auto inline func(const left &lhs, const right &rhs) \
 | 
			
		||||
        -> decltype(combination) const {                       \
 | 
			
		||||
      return combination;                                      \
 | 
			
		||||
    }                                                          \
 | 
			
		||||
  }
 | 
			
		||||
GridBinOpClass(BinaryAdd, lhs + rhs);
 | 
			
		||||
GridBinOpClass(BinarySub, lhs - rhs);
 | 
			
		||||
GridBinOpClass(BinaryMul, lhs *rhs);
 | 
			
		||||
 | 
			
		||||
GridBinOpClass(BinaryAnd   ,lhs&rhs);
 | 
			
		||||
GridBinOpClass(BinaryOr    ,lhs|rhs);
 | 
			
		||||
GridBinOpClass(BinaryAndAnd,lhs&&rhs);
 | 
			
		||||
GridBinOpClass(BinaryOrOr  ,lhs||rhs);
 | 
			
		||||
GridBinOpClass(BinaryAnd, lhs &rhs);
 | 
			
		||||
GridBinOpClass(BinaryOr, lhs | rhs);
 | 
			
		||||
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
 | 
			
		||||
GridBinOpClass(BinaryOrOr, lhs || rhs);
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////
 | 
			
		||||
// Trinary conditional op
 | 
			
		||||
////////////////////////////////////////////////////
 | 
			
		||||
#define GridTrinOpClass(name,combination)\
 | 
			
		||||
template <class predicate,class left, class right>	\
 | 
			
		||||
struct name\
 | 
			
		||||
{\
 | 
			
		||||
  static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \
 | 
			
		||||
    {\
 | 
			
		||||
      return combination;\
 | 
			
		||||
    }\
 | 
			
		||||
}
 | 
			
		||||
#define GridTrinOpClass(name, combination)                                     \
 | 
			
		||||
  template <class predicate, class left, class right>                          \
 | 
			
		||||
  struct name {                                                                \
 | 
			
		||||
    static auto inline func(const predicate &pred, const left &lhs,            \
 | 
			
		||||
                            const right &rhs) -> decltype(combination) const { \
 | 
			
		||||
      return combination;                                                      \
 | 
			
		||||
    }                                                                          \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
GridTrinOpClass(TrinaryWhere,(predicatedWhere<predicate, \
 | 
			
		||||
			       typename std::remove_reference<left>::type, \
 | 
			
		||||
			       typename std::remove_reference<right>::type> (pred,lhs,rhs)));
 | 
			
		||||
GridTrinOpClass(
 | 
			
		||||
    TrinaryWhere,
 | 
			
		||||
    (predicatedWhere<predicate, typename std::remove_reference<left>::type,
 | 
			
		||||
                     typename std::remove_reference<right>::type>(pred, lhs,
 | 
			
		||||
                                                                  rhs)));
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Operator syntactical glue
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
#define GRID_UNOP(name)   name<decltype(eval(0, arg))>
 | 
			
		||||
#define GRID_BINOP(name)  name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
 | 
			
		||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
 | 
			
		||||
 | 
			
		||||
#define GRID_DEF_UNOP(op, name)\
 | 
			
		||||
template <typename T1,\
 | 
			
		||||
  typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr> inline auto op(const T1 &arg) \
 | 
			
		||||
  -> decltype(LatticeUnaryExpression<GRID_UNOP(name),const T1&>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg)))) \
 | 
			
		||||
{ return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg))); }
 | 
			
		||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
 | 
			
		||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
 | 
			
		||||
#define GRID_TRINOP(name) \
 | 
			
		||||
  name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
 | 
			
		||||
 | 
			
		||||
#define GRID_BINOP_LEFT(op, name)\
 | 
			
		||||
template <typename T1,typename T2,\
 | 
			
		||||
          typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr>\
 | 
			
		||||
inline auto op(const T1 &lhs,const T2&rhs) \
 | 
			
		||||
  -> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
 | 
			
		||||
											    std::forward_as_tuple(lhs, rhs)))) \
 | 
			
		||||
{\
 | 
			
		||||
 return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
 | 
			
		||||
									  std::forward_as_tuple(lhs, rhs))); \
 | 
			
		||||
}
 | 
			
		||||
#define GRID_DEF_UNOP(op, name)                                             \
 | 
			
		||||
  template <typename T1,                                                    \
 | 
			
		||||
            typename std::enable_if<is_lattice<T1>::value ||                \
 | 
			
		||||
                                        is_lattice_expr<T1>::value,         \
 | 
			
		||||
                                    T1>::type * = nullptr>                  \
 | 
			
		||||
  inline auto op(const T1 &arg)                                             \
 | 
			
		||||
      ->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(       \
 | 
			
		||||
          std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
 | 
			
		||||
    return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(             \
 | 
			
		||||
        std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)));     \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define GRID_BINOP_RIGHT(op, name)\
 | 
			
		||||
 template <typename T1,typename T2,\
 | 
			
		||||
           typename std::enable_if<!is_lattice<T1>::value && !is_lattice_expr<T1>::value, T1>::type* = nullptr,\
 | 
			
		||||
           typename std::enable_if< is_lattice<T2>::value ||  is_lattice_expr<T2>::value, T2>::type* = nullptr> \
 | 
			
		||||
inline auto op(const T1 &lhs,const T2&rhs)			\
 | 
			
		||||
  -> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
 | 
			
		||||
											    std::forward_as_tuple(lhs, rhs)))) \
 | 
			
		||||
{\
 | 
			
		||||
 return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
 | 
			
		||||
								          std::forward_as_tuple(lhs, rhs))); \
 | 
			
		||||
}
 | 
			
		||||
#define GRID_BINOP_LEFT(op, name)                                             \
 | 
			
		||||
  template <typename T1, typename T2,                                         \
 | 
			
		||||
            typename std::enable_if<is_lattice<T1>::value ||                  \
 | 
			
		||||
                                        is_lattice_expr<T1>::value,           \
 | 
			
		||||
                                    T1>::type * = nullptr>                    \
 | 
			
		||||
  inline auto op(const T1 &lhs, const T2 &rhs)                                \
 | 
			
		||||
      ->decltype(                                                             \
 | 
			
		||||
          LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(  \
 | 
			
		||||
              std::make_pair(GRID_BINOP(name)(),                              \
 | 
			
		||||
                             std::forward_as_tuple(lhs, rhs)))) {             \
 | 
			
		||||
    return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
 | 
			
		||||
        std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define GRID_DEF_BINOP(op, name)\
 | 
			
		||||
 GRID_BINOP_LEFT(op,name);\
 | 
			
		||||
 GRID_BINOP_RIGHT(op,name);
 | 
			
		||||
#define GRID_BINOP_RIGHT(op, name)                                            \
 | 
			
		||||
  template <typename T1, typename T2,                                         \
 | 
			
		||||
            typename std::enable_if<!is_lattice<T1>::value &&                 \
 | 
			
		||||
                                        !is_lattice_expr<T1>::value,          \
 | 
			
		||||
                                    T1>::type * = nullptr,                    \
 | 
			
		||||
            typename std::enable_if<is_lattice<T2>::value ||                  \
 | 
			
		||||
                                        is_lattice_expr<T2>::value,           \
 | 
			
		||||
                                    T2>::type * = nullptr>                    \
 | 
			
		||||
  inline auto op(const T1 &lhs, const T2 &rhs)                                \
 | 
			
		||||
      ->decltype(                                                             \
 | 
			
		||||
          LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(  \
 | 
			
		||||
              std::make_pair(GRID_BINOP(name)(),                              \
 | 
			
		||||
                             std::forward_as_tuple(lhs, rhs)))) {             \
 | 
			
		||||
    return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
 | 
			
		||||
        std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define GRID_DEF_BINOP(op, name) \
 | 
			
		||||
  GRID_BINOP_LEFT(op, name);     \
 | 
			
		||||
  GRID_BINOP_RIGHT(op, name);
 | 
			
		||||
 | 
			
		||||
#define GRID_DEF_TRINOP(op, name)\
 | 
			
		||||
template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,const T2&lhs,const T3 &rhs) \
 | 
			
		||||
  -> decltype(LatticeTrinaryExpression<GRID_TRINOP(name),const T1&,const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(),\
 | 
			
		||||
										   std::forward_as_tuple(pred,lhs,rhs)))) \
 | 
			
		||||
{\
 | 
			
		||||
  return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(), \
 | 
			
		||||
										 std::forward_as_tuple(pred,lhs, rhs))); \
 | 
			
		||||
}
 | 
			
		||||
#define GRID_DEF_TRINOP(op, name)                                              \
 | 
			
		||||
  template <typename T1, typename T2, typename T3>                             \
 | 
			
		||||
  inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs)                 \
 | 
			
		||||
      ->decltype(                                                              \
 | 
			
		||||
          LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,  \
 | 
			
		||||
                                   const T3 &>(std::make_pair(                 \
 | 
			
		||||
              GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) {  \
 | 
			
		||||
    return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
 | 
			
		||||
                                    const T3 &>(std::make_pair(                \
 | 
			
		||||
        GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)));          \
 | 
			
		||||
  }
 | 
			
		||||
////////////////////////
 | 
			
		||||
//Operator definitions
 | 
			
		||||
// Operator definitions
 | 
			
		||||
////////////////////////
 | 
			
		||||
 | 
			
		||||
GRID_DEF_UNOP(operator -,UnarySub);
 | 
			
		||||
GRID_DEF_UNOP(Not,UnaryNot);
 | 
			
		||||
GRID_DEF_UNOP(operator !,UnaryNot);
 | 
			
		||||
GRID_DEF_UNOP(adj,UnaryAdj);
 | 
			
		||||
GRID_DEF_UNOP(conjugate,UnaryConj);
 | 
			
		||||
GRID_DEF_UNOP(trace,UnaryTrace);
 | 
			
		||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
 | 
			
		||||
GRID_DEF_UNOP(Ta,UnaryTa);
 | 
			
		||||
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
 | 
			
		||||
GRID_DEF_UNOP(real,UnaryReal);
 | 
			
		||||
GRID_DEF_UNOP(imag,UnaryImag);
 | 
			
		||||
GRID_DEF_UNOP(toReal,UnaryToReal);
 | 
			
		||||
GRID_DEF_UNOP(toComplex,UnaryToComplex);
 | 
			
		||||
GRID_DEF_UNOP(abs  ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
 | 
			
		||||
GRID_DEF_UNOP(sqrt ,UnarySqrt);
 | 
			
		||||
GRID_DEF_UNOP(rsqrt,UnaryRsqrt);
 | 
			
		||||
GRID_DEF_UNOP(sin  ,UnarySin);
 | 
			
		||||
GRID_DEF_UNOP(cos  ,UnaryCos);
 | 
			
		||||
GRID_DEF_UNOP(log  ,UnaryLog);
 | 
			
		||||
GRID_DEF_UNOP(exp  ,UnaryExp);
 | 
			
		||||
GRID_DEF_UNOP(operator-, UnarySub);
 | 
			
		||||
GRID_DEF_UNOP(Not, UnaryNot);
 | 
			
		||||
GRID_DEF_UNOP(operator!, UnaryNot);
 | 
			
		||||
GRID_DEF_UNOP(adj, UnaryAdj);
 | 
			
		||||
GRID_DEF_UNOP(conjugate, UnaryConj);
 | 
			
		||||
GRID_DEF_UNOP(trace, UnaryTrace);
 | 
			
		||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
 | 
			
		||||
GRID_DEF_UNOP(Ta, UnaryTa);
 | 
			
		||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
 | 
			
		||||
GRID_DEF_UNOP(real, UnaryReal);
 | 
			
		||||
GRID_DEF_UNOP(imag, UnaryImag);
 | 
			
		||||
GRID_DEF_UNOP(toReal, UnaryToReal);
 | 
			
		||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
 | 
			
		||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
 | 
			
		||||
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
 | 
			
		||||
GRID_DEF_UNOP(abs, UnaryAbs);  // abs overloaded in cmath C++98; DON'T do the
 | 
			
		||||
                               // abs-fabs-dabs-labs thing
 | 
			
		||||
GRID_DEF_UNOP(sqrt, UnarySqrt);
 | 
			
		||||
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
 | 
			
		||||
GRID_DEF_UNOP(sin, UnarySin);
 | 
			
		||||
GRID_DEF_UNOP(cos, UnaryCos);
 | 
			
		||||
GRID_DEF_UNOP(asin, UnaryAsin);
 | 
			
		||||
GRID_DEF_UNOP(acos, UnaryAcos);
 | 
			
		||||
GRID_DEF_UNOP(log, UnaryLog);
 | 
			
		||||
GRID_DEF_UNOP(exp, UnaryExp);
 | 
			
		||||
 | 
			
		||||
GRID_DEF_BINOP(operator+,BinaryAdd);
 | 
			
		||||
GRID_DEF_BINOP(operator-,BinarySub);
 | 
			
		||||
GRID_DEF_BINOP(operator*,BinaryMul);
 | 
			
		||||
GRID_DEF_BINOP(operator+, BinaryAdd);
 | 
			
		||||
GRID_DEF_BINOP(operator-, BinarySub);
 | 
			
		||||
GRID_DEF_BINOP(operator*, BinaryMul);
 | 
			
		||||
 | 
			
		||||
GRID_DEF_BINOP(operator&,BinaryAnd);
 | 
			
		||||
GRID_DEF_BINOP(operator|,BinaryOr);
 | 
			
		||||
GRID_DEF_BINOP(operator&&,BinaryAndAnd);
 | 
			
		||||
GRID_DEF_BINOP(operator||,BinaryOrOr);
 | 
			
		||||
GRID_DEF_BINOP(operator&, BinaryAnd);
 | 
			
		||||
GRID_DEF_BINOP(operator|, BinaryOr);
 | 
			
		||||
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
 | 
			
		||||
GRID_DEF_BINOP(operator||, BinaryOrOr);
 | 
			
		||||
 | 
			
		||||
GRID_DEF_TRINOP(where,TrinaryWhere);
 | 
			
		||||
GRID_DEF_TRINOP(where, TrinaryWhere);
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
// Closure convenience to force expression to evaluate
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
template<class Op,class T1>
 | 
			
		||||
  auto closure(const LatticeUnaryExpression<Op,T1> & expr)
 | 
			
		||||
  -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))>
 | 
			
		||||
{
 | 
			
		||||
  Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))> ret(expr);
 | 
			
		||||
template <class Op, class T1>
 | 
			
		||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
 | 
			
		||||
    -> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> {
 | 
			
		||||
  Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret(
 | 
			
		||||
      expr);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
template<class Op,class T1, class T2>
 | 
			
		||||
  auto closure(const LatticeBinaryExpression<Op,T1,T2> & expr)
 | 
			
		||||
  -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
			
		||||
				      eval(0,std::get<1>(expr.second))))>
 | 
			
		||||
{
 | 
			
		||||
  Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
			
		||||
				   eval(0,std::get<1>(expr.second))))> ret(expr);
 | 
			
		||||
template <class Op, class T1, class T2>
 | 
			
		||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
 | 
			
		||||
    -> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
 | 
			
		||||
                                        eval(0, std::get<1>(expr.second))))> {
 | 
			
		||||
  Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
 | 
			
		||||
                                   eval(0, std::get<1>(expr.second))))>
 | 
			
		||||
      ret(expr);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
template<class Op,class T1, class T2, class T3>
 | 
			
		||||
  auto closure(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
 | 
			
		||||
  -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
			
		||||
				      eval(0,std::get<1>(expr.second)),
 | 
			
		||||
				      eval(0,std::get<2>(expr.second))))>
 | 
			
		||||
{
 | 
			
		||||
  Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
			
		||||
				   eval(0,std::get<1>(expr.second)),
 | 
			
		||||
				   eval(0,std::get<2>(expr.second))))> ret(expr);
 | 
			
		||||
template <class Op, class T1, class T2, class T3>
 | 
			
		||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
 | 
			
		||||
    -> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
 | 
			
		||||
                                        eval(0, std::get<1>(expr.second)),
 | 
			
		||||
                                        eval(0, std::get<2>(expr.second))))> {
 | 
			
		||||
  Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
 | 
			
		||||
                                   eval(0, std::get<1>(expr.second)),
 | 
			
		||||
                                   eval(0, std::get<2>(expr.second))))>
 | 
			
		||||
      ret(expr);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -353,12 +431,11 @@ template<class Op,class T1, class T2, class T3>
 | 
			
		||||
#undef GRID_DEF_UNOP
 | 
			
		||||
#undef GRID_DEF_BINOP
 | 
			
		||||
#undef GRID_DEF_TRINOP
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 	      
 | 
			
		||||
        
 | 
			
		||||
 int main(int argc,char **argv){
 | 
			
		||||
   
 | 
			
		||||
   Lattice<double> v1(16);
 | 
			
		||||
@@ -368,7 +445,7 @@ using namespace Grid;
 | 
			
		||||
   BinaryAdd<double,double> tmp;
 | 
			
		||||
   LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &> 
 | 
			
		||||
     expr(std::make_pair(tmp,
 | 
			
		||||
	  std::forward_as_tuple(v1,v2)));
 | 
			
		||||
    std::forward_as_tuple(v1,v2)));
 | 
			
		||||
   tmp.func(eval(0,v1),eval(0,v2));
 | 
			
		||||
 | 
			
		||||
   auto var = v1+v2;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_arith.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_ARITH_H
 | 
			
		||||
#define GRID_LATTICE_ARITH_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,33 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/lattice/Lattice_base.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_BASE_H
 | 
			
		||||
#define GRID_LATTICE_BASE_H
 | 
			
		||||
 | 
			
		||||
@@ -26,12 +56,15 @@ extern int GridCshiftPermuteMap[4][16];
 | 
			
		||||
// Basic expressions used in Expression Template
 | 
			
		||||
////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
class LatticeBase {};
 | 
			
		||||
class LatticeBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    virtual ~LatticeBase(void) = default;
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
class LatticeExpressionBase {};
 | 
			
		||||
 | 
			
		||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;               // Aligned allocator??
 | 
			
		||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
 | 
			
		||||
 | 
			
		||||
template <typename Op, typename T1>                           
 | 
			
		||||
class LatticeUnaryExpression  : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
 | 
			
		||||
 public:
 | 
			
		||||
@@ -59,8 +92,6 @@ template<class vobj>
 | 
			
		||||
class Lattice : public LatticeBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
    int checkerboard;
 | 
			
		||||
    Vector<vobj> _odata;
 | 
			
		||||
    
 | 
			
		||||
@@ -68,12 +99,12 @@ public:
 | 
			
		||||
    int begin(void) { return 0;};
 | 
			
		||||
    int end(void)   { return _odata.size(); }
 | 
			
		||||
    vobj & operator[](int i) { return _odata[i]; };
 | 
			
		||||
    const vobj & operator[](int i) const { return _odata[i]; };
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
    typedef typename vobj::vector_type vector_type;
 | 
			
		||||
    typedef vobj vector_object;
 | 
			
		||||
 
 | 
			
		||||
   
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Expression Template closure support
 | 
			
		||||
@@ -149,8 +180,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  }
 | 
			
		||||
  //GridFromExpression is tricky to do
 | 
			
		||||
  template<class Op,class T1>
 | 
			
		||||
    Lattice(const LatticeUnaryExpression<Op,T1> & expr):    _grid(nullptr){
 | 
			
		||||
 | 
			
		||||
    Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -171,7 +202,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2>
 | 
			
		||||
  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr):    _grid(nullptr){
 | 
			
		||||
  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -192,7 +224,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2, class T3>
 | 
			
		||||
  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr):    _grid(nullptr){
 | 
			
		||||
  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -212,14 +245,29 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    // Constructor requires "grid" passed.
 | 
			
		||||
    // what about a default grid?
 | 
			
		||||
    //////////////////////////////////////////////////////////////////
 | 
			
		||||
    Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
 | 
			
		||||
      //        _odata.reserve(_grid->oSites());
 | 
			
		||||
      //        _odata.resize(_grid->oSites());
 | 
			
		||||
    Lattice(GridBase *grid) : _odata(grid->oSites()) {
 | 
			
		||||
        _grid = grid;
 | 
			
		||||
    //        _odata.reserve(_grid->oSites());
 | 
			
		||||
    //        _odata.resize(_grid->oSites());
 | 
			
		||||
    //      std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
 | 
			
		||||
        assert((((uint64_t)&_odata[0])&0xF) ==0);
 | 
			
		||||
        checkerboard=0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Lattice(const Lattice& r){ // copy constructor
 | 
			
		||||
    	_grid = r._grid;
 | 
			
		||||
    	checkerboard = r.checkerboard;
 | 
			
		||||
    	_odata.resize(_grid->oSites());// essential
 | 
			
		||||
  		PARALLEL_FOR_LOOP
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
            _odata[ss]=r._odata[ss];
 | 
			
		||||
        }  	
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    virtual ~Lattice(void) = default;
 | 
			
		||||
    
 | 
			
		||||
    template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
@@ -230,7 +278,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
			
		||||
      this->checkerboard = r.checkerboard;
 | 
			
		||||
      conformable(*this,r);
 | 
			
		||||
      std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
            this->_odata[ss]=r._odata[ss];
 | 
			
		||||
@@ -287,27 +335,27 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <lattice/Lattice_conformable.h>
 | 
			
		||||
#include "Lattice_conformable.h"
 | 
			
		||||
#define GRID_LATTICE_EXPRESSION_TEMPLATES
 | 
			
		||||
#ifdef  GRID_LATTICE_EXPRESSION_TEMPLATES
 | 
			
		||||
#include <lattice/Lattice_ET.h>
 | 
			
		||||
#include "Lattice_ET.h"
 | 
			
		||||
#else 
 | 
			
		||||
#include <lattice/Lattice_overload.h>
 | 
			
		||||
#include "Lattice_overload.h"
 | 
			
		||||
#endif
 | 
			
		||||
#include <lattice/Lattice_arith.h>
 | 
			
		||||
#include <lattice/Lattice_trace.h>
 | 
			
		||||
#include <lattice/Lattice_transpose.h>
 | 
			
		||||
#include <lattice/Lattice_local.h>
 | 
			
		||||
#include <lattice/Lattice_reduction.h>
 | 
			
		||||
#include <lattice/Lattice_peekpoke.h>
 | 
			
		||||
#include <lattice/Lattice_reality.h>
 | 
			
		||||
#include <lattice/Lattice_comparison_utils.h>
 | 
			
		||||
#include <lattice/Lattice_comparison.h>
 | 
			
		||||
#include <lattice/Lattice_coordinate.h>
 | 
			
		||||
#include <lattice/Lattice_where.h>
 | 
			
		||||
#include <lattice/Lattice_rng.h>
 | 
			
		||||
#include <lattice/Lattice_unary.h>
 | 
			
		||||
#include <lattice/Lattice_transfer.h>
 | 
			
		||||
#include "Lattice_arith.h"
 | 
			
		||||
#include "Lattice_trace.h"
 | 
			
		||||
#include "Lattice_transpose.h"
 | 
			
		||||
#include "Lattice_local.h"
 | 
			
		||||
#include "Lattice_reduction.h"
 | 
			
		||||
#include "Lattice_peekpoke.h"
 | 
			
		||||
#include "Lattice_reality.h"
 | 
			
		||||
#include "Lattice_comparison_utils.h"
 | 
			
		||||
#include "Lattice_comparison.h"
 | 
			
		||||
#include "Lattice_coordinate.h"
 | 
			
		||||
#include "Lattice_where.h"
 | 
			
		||||
#include "Lattice_rng.h"
 | 
			
		||||
#include "Lattice_unary.h"
 | 
			
		||||
#include "Lattice_transfer.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_comparison.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_COMPARISON_H
 | 
			
		||||
#define GRID_LATTICE_COMPARISON_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_comparison_utils.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_COMPARISON_H
 | 
			
		||||
#define GRID_COMPARISON_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_conformable.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_CONFORMABLE_H
 | 
			
		||||
#define GRID_LATTICE_CONFORMABLE_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user