mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-02 21:14:32 +00:00 
			
		
		
		
	Compare commits
	
		
			499 Commits
		
	
	
		
			feature/sc
			...
			feature/ha
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 8e0d2f3402 | |||
| 
						 | 
					f3f24b3017 | ||
| 
						 | 
					68c13045d6 | ||
| 
						 | 
					e9b6f58fdc | ||
| 
						 | 
					839605c45c | ||
| 
						 | 
					2205b1e63e | ||
| 
						 | 
					6f421c7a6f | ||
| 
						 | 
					b62b9ac214 | ||
| 
						 | 
					8c3a599148 | ||
| 
						 | 
					4a47b11876 | ||
| 8f514ae550 | |||
| febe41cc1d | |||
| 62173395b8 | |||
| 6b559d68aa | |||
| 1982cc58dd | |||
| 2e2e5ce596 | |||
| 2d3916418e | |||
| 21304e2139 | |||
| 7b850eb48b | |||
| a3ace57e01 | |||
| b1c3cbe35e | |||
| 2b4e253473 | |||
| 0ba3d469c7 | |||
| 
						 | 
					109c74bed8 | ||
| 3023287fd9 | |||
| b3d6805638 | |||
| 291bc2a1f0 | |||
| 2f368c33fc | |||
| 9592115341 | |||
| 
						 | 
					24c07694bc | ||
| 
						 | 
					f0229025e2 | ||
| 
						 | 
					6de9a45a09 | ||
| 
						 | 
					03c3d495a2 | ||
| 
						 | 
					49f25e08e8 | ||
| efc0c65056 | |||
| 936eaac8e1 | |||
| fe6a372f75 | |||
| 148fc052bd | |||
| c073341a10 | |||
| 78299daaac | |||
| 866449c804 | |||
| d69a52079f | |||
| 9f4f8a14a3 | |||
| f6593dc881 | |||
| 
						 | 
					b46d31d4b6 | ||
| 58567fc650 | |||
| 
						 | 
					7c57cac670 | ||
| d0b21bf1ff | |||
| a1825d1f59 | |||
| 5a3e83ff7b | |||
| 52569d98d8 | |||
| b351103c29 | |||
| 118cca4681 | |||
| 44de727cd2 | |||
| 888ebc3cf9 | |||
| 6c031a1b81 | |||
| 02aa4bd762 | |||
| 9aafa8ee60 | |||
| 430b98b354 | |||
| 84189867ef | |||
| 4ab8cfbe2a | |||
| aadd9f4468 | |||
| 8fbb27ce13 | |||
| 21bba95909 | |||
| 6448fe7121 | |||
| 2458a11d1d | |||
| d0ca7c3fe6 | |||
| 57f899d79c | |||
| e881a0c157 | |||
| f411657118 | |||
| 
						 | 
					7458c6174b | ||
| 
						 | 
					21b269d0f9 | ||
| 
						 | 
					083af92ac2 | ||
| 
						 | 
					2c162577b5 | ||
| 
						 | 
					b1c4e96382 | ||
| 
						 | 
					a55c6f34f3 | ||
| 
						 | 
					beed527ea3 | ||
| eaa633cf69 | |||
| c632455129 | |||
| c012899ed5 | |||
| 
						 | 
					8bab544c2f | ||
| 
						 | 
					76fc06a5dc | ||
| 4af6c7e7aa | |||
| f60fbcfc4d | |||
| 464c81706e | |||
| 408130b808 | |||
| 375edd1370 | |||
| 6d912f6c67 | |||
| 6d1d28955e | |||
| 920b471761 | |||
| 63c21767ba | |||
| 7b6b712565 | |||
| 35abd05ee9 | |||
| dd36e60f6a | |||
| cb6c548e21 | |||
| 02c4ccf621 | |||
| fd24588212 | |||
| b800bb3ecb | |||
| f8abd0978b | |||
| 12c7c493bf | |||
| 
						 | 
					c7c9072313 | ||
| 2bf3be5fae | |||
| 3a40e4fc69 | |||
| 2e69e03f6f | |||
| a09f9bb528 | |||
| f0e341d726 | |||
| 6f09df0daf | |||
| 26cee605b8 | |||
| b3fa18c229 | |||
| 2940c9bcfd | |||
| 0bb532f72b | |||
| fada2aa0f7 | |||
| c193e4e675 | |||
| 3ee682f676 | |||
| d85ec3bac2 | |||
| b52d8eb1e3 | |||
| ee630d2e8b | |||
| 2f0af79869 | |||
| 1b7fb79ec0 | |||
| 2db1a4628c | |||
| 6aa047d842 | |||
| 8779c32ae1 | |||
| c527dc3358 | |||
| 6b42577b6b | |||
| fb3596f968 | |||
| f3a0158213 | |||
| 0250aa9347 | |||
| 3df6743396 | |||
| fb7d021b9d | |||
| 5f206df775 | |||
| 7727e81113 | |||
| c4115544a5 | |||
| 08c47328ba | |||
| 09001aedca | |||
| 2c67304716 | |||
| dc6d8686de | |||
| cc2780bea3 | |||
| 6e5a2b7922 | |||
| f4878d3a13 | |||
| 89d2fac92e | |||
| f2d3e41cf2 | |||
| 3c27bb36d4 | |||
| 603d59f389 | |||
| 07a0ef3f95 | |||
| 503259f9c9 | |||
| 5be6a51044 | |||
| ac69f042b1 | |||
| 133d5c2e34 | |||
| 2a94244890 | |||
| a15a2dfd29 | |||
| 093bb02633 | |||
| 99a85116f8 | |||
| 
						 | 
					27cdb79063 | ||
| f4cbfd63ff | |||
| 2b794b6aa7 | |||
| d0244a059f | |||
| dcdd891d7d | |||
| 6d2df9de79 | |||
| 41d4e37bae | |||
| ee5c0cc9b6 | |||
| 0a4020eb4d | |||
| b2de26589b | |||
| 0677adb4dd | |||
| 231cc95be6 | |||
| 639f9cab82 | |||
| 4eac4e575e | |||
| 3f0f92cda6 | |||
| d2650e89bd | |||
| 2962123cba | |||
| 830168ec37 | |||
| 584c921ca0 | |||
| 81347b4d16 | |||
| 2cfa0b0e6b | |||
| 
						 | 
					fa5dee76b1 | ||
| 
						 | 
					8d1679c6b8 | ||
| 
						 | 
					3791a38f7c | ||
| 
						 | 
					142f7b0c86 | ||
| 
						 | 
					891ad66eab | ||
| 
						 | 
					60c43151c5 | ||
| 
						 | 
					e036800261 | ||
| 
						 | 
					62900def36 | ||
| 
						 | 
					e3a309a73f | ||
| 
						 | 
					ad6c1c0c4e | ||
| 
						 | 
					00b92a91b5 | ||
| 
						 | 
					65533741f7 | ||
| 
						 | 
					dc0259fbda | ||
| 
						 | 
					131a6785d4 | ||
| 
						 | 
					44f4f5c8e2 | ||
| 
						 | 
					2679df034f | ||
| bf71162b97 | |||
| 299e828d83 | |||
| ef5452cddf | |||
| 80de748737 | |||
| 
						 | 
					71e1006ba8 | ||
| 00f31ae83f | |||
| cce339deaf | |||
| 
						 | 
					24128ff109 | ||
| 
						 | 
					34e9d3f0ca | ||
| 
						 | 
					c995788259 | ||
| 
						 | 
					94c7198001 | ||
| 
						 | 
					04d86fe9f3 | ||
| 
						 | 
					b78074b6a0 | ||
| 
						 | 
					7dfd3cdae8 | ||
| 
						 | 
					cecee1ef2c | ||
| 
						 | 
					355d4b58be | ||
| 
						 | 
					2c54a536f3 | ||
| 
						 | 
					d868a45120 | ||
| 
						 | 
					9deae8c962 | ||
| 
						 | 
					db86cdd7bd | ||
| 
						 | 
					ec9939c1ba | ||
| 
						 | 
					f74617c124 | ||
| 
						 | 
					8c6a3921ed | ||
| a8a15dd9d0 | |||
| 3ce68a751a | |||
| 
						 | 
					daa0977d01 | ||
| 
						 | 
					a2929f4384 | ||
| 
						 | 
					7fe3974c0a | ||
| 
						 | 
					f7e86f81a0 | ||
| 
						 | 
					fecec803d9 | ||
| 
						 | 
					8fe9a13cdd | ||
| d2c42e6f42 | |||
| 049cc518f4 | |||
| 2e1c66897f | |||
| adcef36189 | |||
| 
						 | 
					2f121c41c9 | ||
| e0ed7e300f | |||
| 485207901b | |||
| c760f0a4c3 | |||
| c84eeedec3 | |||
| 
						 | 
					1ac3526f33 | ||
| 
						 | 
					0de090ee74 | ||
| 91405de3f7 | |||
| 
						 | 
					8fccda301a | ||
| 
						 | 
					7a0abfac89 | ||
| 
						 | 
					ae37fda699 | ||
| 
						 | 
					b5fc5e2030 | ||
| 8db0ef9736 | |||
| 
						 | 
					95d4b46446 | ||
| 
						 | 
					5dfd216a34 | ||
| 
						 | 
					c2e8d0aa88 | ||
| 
						 | 
					0fe5aeffbb | ||
| 
						 | 
					7fbc469046 | ||
| 
						 | 
					bf96a4bdbf | ||
| 
						 | 
					84685c9bc3 | ||
| 
						 | 
					a8d4156997 | ||
| 
						 | 
					c18074869b | ||
| 
						 | 
					f4c6d39238 | ||
| 200d35b38a | |||
| eb52e84d09 | |||
| 72abc34764 | |||
| e3164d4c7b | |||
| 
						 | 
					f5db386c55 | ||
| 
						 | 
					294ee70a7a | ||
| 
						 | 
					013ea4e8d1 | ||
| 
						 | 
					7fbbb31a50 | ||
| 
						 | 
					0e127b1fc7 | ||
| 
						 | 
					68c028b0a6 | ||
| 255d4992e1 | |||
| a0d399e5ce | |||
| fd3b2e945a | |||
| b999984501 | |||
| 
						 | 
					7836cc2d74 | ||
| a61e0df54b | |||
| 9d835afa35 | |||
| 5e3be47117 | |||
| 48de706dd5 | |||
| f871fb0c6d | |||
| 93771f3099 | |||
| 8cb205725b | |||
| 9ad580d82f | |||
| 899f961d0d | |||
| 54d789204f | |||
| 25828746f3 | |||
| f362c00739 | |||
| 2017e4e3b4 | |||
| 27a4d4c951 | |||
| 2f92721249 | |||
| 3252059daf | |||
| 
						 | 
					6eed167f0c | ||
| 661381e881 | |||
| 
						 | 
					9ada378e38 | ||
| 
						 | 
					9d9692d439 | ||
| 0659ae4014 | |||
| dd6b796a01 | |||
| 
						 | 
					52a856b4a8 | ||
| 
						 | 
					04190ee7f3 | ||
| 
						 | 
					587bfcc0f4 | ||
| 
						 | 
					2700992ef5 | ||
| 
						 | 
					4f4181c54a | ||
| 
						 | 
					b35169f1dd | ||
| 
						 | 
					441ad7498d | ||
| ca639c195f | |||
| edc28dcfbf | |||
| 
						 | 
					edcf9b9293 | ||
| 49b8501fd4 | |||
| d47484717e | |||
| 
						 | 
					96272f3841 | ||
| 
						 | 
					5c936d88a0 | ||
| 
						 | 
					1c64ee926e | ||
| 
						 | 
					2cbb72a81c | ||
| 
						 | 
					31d83ee046 | ||
| 
						 | 
					a9e8758a01 | ||
| 
						 | 
					3e125c5b61 | ||
| 
						 | 
					eac6ec4b5e | ||
| 
						 | 
					213f8db6a2 | ||
| cc6eb51e3e | |||
| 
						 | 
					507009089b | ||
| b234784c8e | |||
| 6ea2a8b7ca | |||
| c1d0359aaa | |||
| 047ee4ad0b | |||
| a13106da0c | |||
| 75113e6523 | |||
| 325c73d051 | |||
| b25a59e95e | |||
| 7c4533797f | |||
| af84fd65bb | |||
| 
						 | 
					1a2613086a | ||
| 
						 | 
					4f110c09a5 | ||
| 6764362237 | |||
| 2fa2b0e0b1 | |||
| b61292f735 | |||
| ce7720e221 | |||
| 853a5528dc | |||
| 169f405c9c | |||
| c6125b01ce | |||
| b0b5b34bff | |||
| 1c9722357d | |||
| 334da7f452 | |||
| 4669ecd4ba | |||
| 4573b34cac | |||
| 17f57e85d1 | |||
| 17f27b1ebd | |||
| a16bbecb8a | |||
| 7c9b0dd842 | |||
| 6b7228b3e6 | |||
| f117552334 | |||
| a21a160029 | |||
| 6b8ffbe735 | |||
| 81050535a5 | |||
| 7dcf5c90e3 | |||
| 9ce00f26f9 | |||
| 85c253ed4a | |||
| ccfc0a5a89 | |||
| d3f857b1c9 | |||
| fb62035aa0 | |||
| 0260bc7705 | |||
| 68e6a58f12 | |||
| 640515e3d8 | |||
| 97c579f637 | |||
| a4d8512fb8 | |||
| 5ec903044d | |||
| 8a0cf0194f | |||
| 1c680d4b7a | |||
| e9323460c7 | |||
| 
						 | 
					58c2f60b69 | ||
| 
						 | 
					bfa3a7b3b0 | ||
| 
						 | 
					f212b0a963 | ||
| 
						 | 
					62702dbcb8 | ||
| 41d6cab033 | |||
| 5a31e747c9 | |||
| cbc73a3fd1 | |||
| d516938707 | |||
| 72344d1418 | |||
| 7ecf6ab38b | |||
| 2d4d70d3ec | |||
| 78f8d47528 | |||
| b85f987b0b | |||
| f57afe2079 | |||
| 
						 | 
					8462bbfe63 | ||
| 229977c955 | |||
| e485a07133 | |||
| 70ec2faa98 | |||
| 2f849ee252 | |||
| bb6ed44339 | |||
| 
						 | 
					80302e95a8 | ||
| 9942723189 | |||
| 
						 | 
					b938202081 | ||
| e79ef469ac | |||
| 
						 | 
					c793947209 | ||
| 3e9ee053a1 | |||
| dda6c69d5b | |||
| cd51b9af99 | |||
| f32555dcc5 | |||
| e93c883470 | |||
| fcac5c0772 | |||
| 90f4000935 | |||
| 480708b9a0 | |||
| c4baf876d4 | |||
| 2f4dac3531 | |||
| 3ec6890850 | |||
| 018801d973 | |||
| 1d83521daa | |||
| fc5670c6a4 | |||
| d9c435e282 | |||
| 614a0e8277 | |||
| 
						 | 
					aaf39222c3 | ||
| 550142bd6a | |||
| c0a929aef7 | |||
| 37fe944224 | |||
| 
						 | 
					315a42843f | ||
| 83a101db83 | |||
| c4274e1660 | |||
| ba6db55cb0 | |||
| e5ea84d531 | |||
| 15767a1491 | |||
| 4d2a32ae7a | |||
| 5b937e3644 | |||
| e418b044f7 | |||
| b8b05f143f | |||
| 6ec42b4b82 | |||
| abb7d4d2f5 | |||
| 16ebbfff29 | |||
| 4828226095 | |||
| 8a049f27b8 | |||
| 43578a3eb4 | |||
| fdbd42e542 | |||
| e7e4cee4f3 | |||
| 
						 | 
					ec3954ff5f | ||
| 
						 | 
					0f468e2179 | ||
| 
						 | 
					8e61286741 | ||
| 
						 | 
					69e4ecc1d2 | ||
| 
						 | 
					5f483df16b | ||
| 
						 | 
					4680a977c3 | ||
| 
						 | 
					de42456171 | ||
| 
						 | 
					d55212c998 | ||
| 
						 | 
					c6e1f64573 | ||
| 
						 | 
					724cf02d4a | ||
| 
						 | 
					49a0ae73eb | ||
| 
						 | 
					315f1146cd | ||
| 
						 | 
					9f202782c5 | ||
| 
						 | 
					594a262dcc | ||
| 
						 | 
					7f8ca54285 | ||
| 
						 | 
					c5b23c367e | ||
| 
						 | 
					b6fe03eb26 | ||
| 
						 | 
					f37ed4958b | ||
| 
						 | 
					5f85473d6b | ||
| 
						 | 
					ac3b0ebc58 | ||
| 
						 | 
					4e0cf0cc28 | ||
| 
						 | 
					cdf550845f | ||
| 
						 | 
					3db7a5387b | ||
| 
						 | 
					90dffc73c8 | ||
| a1151fc734 | |||
| 
						 | 
					ab3baeb38f | ||
| 
						 | 
					389731d373 | ||
| 
						 | 
					97b9c6f03d | ||
| 
						 | 
					63982819c6 | ||
| 
						 | 
					6fec507bef | ||
| 
						 | 
					219b3bd34f | ||
| 
						 | 
					24162c9ead | ||
| 
						 | 
					935cd1e173 | ||
| 
						 | 
					55e39df30f | ||
| 
						 | 
					581be32ed2 | ||
| 
						 | 
					6bc136b1d0 | ||
| 
						 | 
					0c668bf46a | ||
| 
						 | 
					840814c776 | ||
| 
						 | 
					95af55128e | ||
| 
						 | 
					9f2a57e334 | ||
| 
						 | 
					c645d33db5 | ||
| 
						 | 
					e0f1349524 | ||
| 
						 | 
					79b761f923 | ||
| 
						 | 
					0d4e31ca58 | ||
| 
						 | 
					b07a354a33 | ||
| 
						 | 
					c433939795 | ||
| 
						 | 
					b6a4c31b48 | ||
| 
						 | 
					98b1439ff9 | ||
| 
						 | 
					564738b1ff | ||
| 
						 | 
					a80e43dbcf | ||
| 
						 | 
					b99622d9fb | ||
| 937c77ead2 | |||
| 95e5a2ade3 | |||
| 
						 | 
					91676d1dda | ||
| 
						 | 
					ac3611bb19 | ||
| 
						 | 
					cc4afb978d | ||
| 
						 | 
					20e92a7009 | ||
| 
						 | 
					42f0afcbfa | ||
| 
						 | 
					20ac13fdf3 | ||
| 
						 | 
					e38612e6fa | ||
| 
						 | 
					c2b2b71c5d | ||
| 
						 | 
					009f48a904 | ||
| 
						 | 
					5cfc0180aa | ||
| 
						 | 
					914f180fa3 | ||
| 
						 | 
					6cb563a40c | ||
| 
						 | 
					db3837be22 | ||
| 
						 | 
					2f0dd83016 | ||
| 
						 | 
					3ac27e5596 | ||
| 
						 | 
					bd466a55a8 | ||
| 
						 | 
					c8e6f58e24 | ||
| 
						 | 
					888988ad37 | ||
| 
						 | 
					e4a105a30b | ||
| 
						 | 
					26ebe41fef | ||
| 1e496fee74 | |||
| 
						 | 
					9f755e0379 | ||
| 
						 | 
					4512dbdf58 | ||
| 
						 | 
					483fd3cfa1 | ||
| 
						 | 
					85516e9c7c | ||
| 
						 | 
					0c006fbfaa | ||
| 
						 | 
					54c10a42cc | ||
| 
						 | 
					ef0fe2bcc1 | 
							
								
								
									
										26
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										26
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -83,6 +83,7 @@ ltmain.sh
 | 
			
		||||
.Trashes
 | 
			
		||||
ehthumbs.db
 | 
			
		||||
Thumbs.db
 | 
			
		||||
.dirstamp
 | 
			
		||||
 | 
			
		||||
# build directory #
 | 
			
		||||
###################
 | 
			
		||||
@@ -97,11 +98,8 @@ build.sh
 | 
			
		||||
 | 
			
		||||
# Eigen source #
 | 
			
		||||
################
 | 
			
		||||
lib/Eigen/*
 | 
			
		||||
 | 
			
		||||
# FFTW source #
 | 
			
		||||
################
 | 
			
		||||
lib/fftw/*
 | 
			
		||||
Grid/Eigen
 | 
			
		||||
Eigen/*
 | 
			
		||||
 | 
			
		||||
# libtool macros #
 | 
			
		||||
##################
 | 
			
		||||
@@ -112,21 +110,7 @@ m4/libtool.m4
 | 
			
		||||
################
 | 
			
		||||
gh-pages/
 | 
			
		||||
 | 
			
		||||
# Buck files #
 | 
			
		||||
##############
 | 
			
		||||
.buck*
 | 
			
		||||
buck-out
 | 
			
		||||
BUCK
 | 
			
		||||
make-bin-BUCK.sh
 | 
			
		||||
 | 
			
		||||
# generated sources #
 | 
			
		||||
#####################
 | 
			
		||||
lib/qcd/spin/gamma-gen/*.h
 | 
			
		||||
lib/qcd/spin/gamma-gen/*.cc
 | 
			
		||||
lib/version.h
 | 
			
		||||
 | 
			
		||||
# vs code editor files #
 | 
			
		||||
########################
 | 
			
		||||
.vscode/
 | 
			
		||||
.vscode/settings.json
 | 
			
		||||
settings.json
 | 
			
		||||
Grid/qcd/spin/gamma-gen/*.h
 | 
			
		||||
Grid/qcd/spin/gamma-gen/*.cc
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										28
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								.travis.yml
									
									
									
									
									
								
							@@ -9,6 +9,11 @@ matrix:
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode8.3
 | 
			
		||||
      compiler: clang
 | 
			
		||||
      env: PREC=single
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode8.3
 | 
			
		||||
      compiler: clang
 | 
			
		||||
      env: PREC=double
 | 
			
		||||
      
 | 
			
		||||
before_install:
 | 
			
		||||
    - export GRIDDIR=`pwd`
 | 
			
		||||
@@ -16,9 +21,11 @@ before_install:
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
 | 
			
		||||
    
 | 
			
		||||
install:
 | 
			
		||||
    - export CWD=`pwd`
 | 
			
		||||
    - echo $CWD
 | 
			
		||||
    - export CC=$CC$VERSION
 | 
			
		||||
    - export CXX=$CXX$VERSION
 | 
			
		||||
    - echo $PATH
 | 
			
		||||
@@ -31,17 +38,24 @@ install:
 | 
			
		||||
    - which $CXX
 | 
			
		||||
    - $CXX --version
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
 | 
			
		||||
    
 | 
			
		||||
script:
 | 
			
		||||
    - ./bootstrap.sh
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - mkdir lime
 | 
			
		||||
    - cd lime
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
 | 
			
		||||
    - tar xf lime-1.3.2.tar.gz
 | 
			
		||||
    - cd lime-1.3.2
 | 
			
		||||
    - ./configure --prefix=$CWD/build/lime/install
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - make install
 | 
			
		||||
    - cd $CWD/build
 | 
			
		||||
    - ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
 | 
			
		||||
    - make -j4 
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
 | 
			
		||||
    - echo make clean
 | 
			
		||||
    - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
 | 
			
		||||
    - make check
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -48,6 +48,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/serialisation/Serialisation.h>
 | 
			
		||||
#include <Grid/threads/Threads.h>
 | 
			
		||||
#include <Grid/util/Util.h>
 | 
			
		||||
#include <Grid/util/Sha.h>
 | 
			
		||||
#include <Grid/communicator/Communicator.h> 
 | 
			
		||||
#include <Grid/cartesian/Cartesian.h>    
 | 
			
		||||
#include <Grid/tensors/Tensors.h>      
 | 
			
		||||
@@ -1,4 +1,9 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
 | 
			
		||||
#ifdef USE_MKL
 | 
			
		||||
#define EIGEN_USE_MKL_ALL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined __GNUC__
 | 
			
		||||
#pragma GCC diagnostic push
 | 
			
		||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 | 
			
		||||
@@ -21,6 +21,32 @@ if BUILD_HDF5
 | 
			
		||||
  extra_headers+=serialisation/Hdf5Type.h
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
all: version-cache
 | 
			
		||||
 | 
			
		||||
version-cache:
 | 
			
		||||
	@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
 | 
			
		||||
		a="uncommited changes";\
 | 
			
		||||
	else\
 | 
			
		||||
		a="clean";\
 | 
			
		||||
	fi;\
 | 
			
		||||
	echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
 | 
			
		||||
	if [ -e version-cache ]; then\
 | 
			
		||||
		d=`diff vertmp version-cache`;\
 | 
			
		||||
		if [ "$${d}" != "" ]; then\
 | 
			
		||||
			mv vertmp version-cache;\
 | 
			
		||||
			rm -f Version.h;\
 | 
			
		||||
		fi;\
 | 
			
		||||
	else\
 | 
			
		||||
		mv vertmp version-cache;\
 | 
			
		||||
		rm -f Version.h;\
 | 
			
		||||
	fi;\
 | 
			
		||||
	rm -f vertmp
 | 
			
		||||
 | 
			
		||||
Version.h:
 | 
			
		||||
	cp version-cache Version.h
 | 
			
		||||
 | 
			
		||||
.PHONY: version-cache
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Libraries
 | 
			
		||||
#
 | 
			
		||||
@@ -30,8 +56,8 @@ include Eigen.inc
 | 
			
		||||
lib_LIBRARIES = libGrid.a
 | 
			
		||||
 | 
			
		||||
CCFILES += $(extra_sources)
 | 
			
		||||
HFILES  += $(extra_headers)
 | 
			
		||||
HFILES  += $(extra_headers) Config.h Version.h
 | 
			
		||||
 | 
			
		||||
libGrid_a_SOURCES              = $(CCFILES)
 | 
			
		||||
libGrid_adir                   = $(pkgincludedir)
 | 
			
		||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
 | 
			
		||||
libGrid_adir                   = $(includedir)/Grid
 | 
			
		||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
 | 
			
		||||
@@ -51,7 +51,7 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      virtual void Op     (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
      virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
      virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
 | 
			
		||||
      virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
 | 
			
		||||
      virtual void HermOp(const Field &in, Field &out)=0;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
@@ -309,36 +309,59 @@ namespace Grid {
 | 
			
		||||
      class SchurStaggeredOperator :  public SchurOperatorBase<Field> {
 | 
			
		||||
    protected:
 | 
			
		||||
      Matrix &_Mat;
 | 
			
		||||
      Field tmp;
 | 
			
		||||
      RealD mass;
 | 
			
		||||
      double tMpc;
 | 
			
		||||
      double tIP;
 | 
			
		||||
      double tMeo;
 | 
			
		||||
      double taxpby_norm;
 | 
			
		||||
      uint64_t ncall;
 | 
			
		||||
    public:
 | 
			
		||||
      SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
      void Report(void)
 | 
			
		||||
      {
 | 
			
		||||
	std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << " HermOpAndNorm.IP  "<< tIP /ncall<<" usec "<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << " Mpc.MeoMoe        "<< tMeo/ncall<<" usec "<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << " Mpc.axpby_norm    "<< taxpby_norm/ncall<<" usec "<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid()) 
 | 
			
		||||
      { 
 | 
			
		||||
	assert( _Mat.isTrivialEE() );
 | 
			
		||||
	mass = _Mat.Mass();
 | 
			
		||||
	tMpc=0;
 | 
			
		||||
	tIP =0;
 | 
			
		||||
        tMeo=0;
 | 
			
		||||
        taxpby_norm=0;
 | 
			
		||||
	ncall=0;
 | 
			
		||||
      }
 | 
			
		||||
      virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
 | 
			
		||||
	GridLogIterative.TimingMode(1);
 | 
			
		||||
	std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
 | 
			
		||||
	ncall++;
 | 
			
		||||
	tMpc-=usecond();
 | 
			
		||||
	n2 = Mpc(in,out);
 | 
			
		||||
	std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
 | 
			
		||||
	tMpc+=usecond();
 | 
			
		||||
	tIP-=usecond();
 | 
			
		||||
	ComplexD dot= innerProduct(in,out);
 | 
			
		||||
	std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
 | 
			
		||||
	tIP+=usecond();
 | 
			
		||||
	n1 = real(dot);
 | 
			
		||||
      }
 | 
			
		||||
      virtual void HermOp(const Field &in, Field &out){
 | 
			
		||||
	std::cout << GridLogIterative << " HermOp "<<std::endl;
 | 
			
		||||
	Mpc(in,out);
 | 
			
		||||
	ncall++;
 | 
			
		||||
	tMpc-=usecond();
 | 
			
		||||
	_Mat.Meooe(in,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
	tMpc+=usecond();
 | 
			
		||||
	taxpby_norm-=usecond();
 | 
			
		||||
	axpby(out,-1.0,mass*mass,tmp,in);
 | 
			
		||||
	taxpby_norm+=usecond();
 | 
			
		||||
      }
 | 
			
		||||
      virtual  RealD Mpc      (const Field &in, Field &out) {
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
	Field tmp2(in._grid);
 | 
			
		||||
 | 
			
		||||
	std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
 | 
			
		||||
	_Mat.Mooee(in,out);
 | 
			
		||||
	_Mat.Mooee(out,tmp);
 | 
			
		||||
	std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
 | 
			
		||||
 | 
			
		||||
	tMeo-=usecond();
 | 
			
		||||
	_Mat.Meooe(in,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp2);
 | 
			
		||||
	std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
 | 
			
		||||
 | 
			
		||||
	RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
 | 
			
		||||
	std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
	tMeo+=usecond();
 | 
			
		||||
	taxpby_norm-=usecond();
 | 
			
		||||
	RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
 | 
			
		||||
	taxpby_norm+=usecond();
 | 
			
		||||
	return nn;
 | 
			
		||||
      }
 | 
			
		||||
      virtual  RealD MpcDag   (const Field &in, Field &out){
 | 
			
		||||
@@ -357,6 +380,12 @@ namespace Grid {
 | 
			
		||||
    template<class Field> class OperatorFunction {
 | 
			
		||||
    public:
 | 
			
		||||
      virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
 | 
			
		||||
      virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
 | 
			
		||||
	assert(in.size()==out.size());
 | 
			
		||||
	for(int k=0;k<in.size();k++){
 | 
			
		||||
	  (*this)(Linop,in[k],out[k]);
 | 
			
		||||
	}
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Field> class LinearFunction {
 | 
			
		||||
@@ -398,7 +427,7 @@ namespace Grid {
 | 
			
		||||
  // Hermitian operator Linear function and operator function
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<class Field>
 | 
			
		||||
      class HermOpOperatorFunction : public OperatorFunction<Field> {
 | 
			
		||||
    class HermOpOperatorFunction : public OperatorFunction<Field> {
 | 
			
		||||
      void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
 | 
			
		||||
	Linop.HermOp(in,out);
 | 
			
		||||
      };
 | 
			
		||||
@@ -55,6 +55,14 @@ namespace Grid {
 | 
			
		||||
    template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
 | 
			
		||||
    public:
 | 
			
		||||
      virtual GridBase *RedBlackGrid(void)=0;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      // Query the even even properties to make algorithmic decisions
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      virtual RealD  Mass(void)        { return 0.0; };
 | 
			
		||||
      virtual int    ConstEE(void)     { return 0; }; // Disable assumptions unless overridden
 | 
			
		||||
      virtual int    isTrivialEE(void) { return 0; }; // by a derived class that knows better
 | 
			
		||||
 | 
			
		||||
      // half checkerboard operaions
 | 
			
		||||
      virtual  void Meooe    (const Field &in, Field &out)=0;
 | 
			
		||||
      virtual  void Mooee    (const Field &in, Field &out)=0;
 | 
			
		||||
@@ -33,7 +33,7 @@ directory
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
 | 
			
		||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Block conjugate gradient. Dimension zero should be the block direction
 | 
			
		||||
@@ -42,7 +42,6 @@ template <class Field>
 | 
			
		||||
class BlockConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  typedef typename Field::scalar_type scomplex;
 | 
			
		||||
 | 
			
		||||
  int blockDim ;
 | 
			
		||||
@@ -54,21 +53,15 @@ class BlockConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
  RealD Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
  Integer PrintInterval; //GridLogMessages or Iterative
 | 
			
		||||
  
 | 
			
		||||
  BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
 | 
			
		||||
    : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
 | 
			
		||||
    : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
 | 
			
		||||
  {};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Thin QR factorisation (google it)
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
 | 
			
		||||
		 Eigen::MatrixXcd &C,
 | 
			
		||||
		 Eigen::MatrixXcd &Cinv,
 | 
			
		||||
		 Field & Q,
 | 
			
		||||
		 const Field & R)
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim; this is an assumption
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //Dimensions
 | 
			
		||||
  // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock
 | 
			
		||||
@@ -85,22 +78,20 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
 | 
			
		||||
  // Cdag C = Rdag R ; passes.
 | 
			
		||||
  // QdagQ  = 1      ; passes
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
 | 
			
		||||
		 Eigen::MatrixXcd &C,
 | 
			
		||||
		 Eigen::MatrixXcd &Cinv,
 | 
			
		||||
		 Field & Q,
 | 
			
		||||
		 const Field & R)
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim; this is an assumption
 | 
			
		||||
  sliceInnerProductMatrix(m_rr,R,R,Orthog);
 | 
			
		||||
 | 
			
		||||
  // Force manifest hermitian to avoid rounding related
 | 
			
		||||
  m_rr = 0.5*(m_rr+m_rr.adjoint());
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  std::cout << " Calling Cholesky  ldlt on m_rr "  << m_rr <<std::endl;
 | 
			
		||||
  Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL(); 
 | 
			
		||||
  std::cout << " Called Cholesky  ldlt on m_rr "  << L_ldlt <<std::endl;
 | 
			
		||||
  auto  D_ldlt = m_rr.ldlt().vectorD(); 
 | 
			
		||||
  std::cout << " Called Cholesky  ldlt on m_rr "  << D_ldlt <<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  //  std::cout << " Calling Cholesky  llt on m_rr "  <<std::endl;
 | 
			
		||||
  Eigen::MatrixXcd L    = m_rr.llt().matrixL(); 
 | 
			
		||||
  //  std::cout << " Called Cholesky  llt on m_rr "  << L <<std::endl;
 | 
			
		||||
 | 
			
		||||
  C    = L.adjoint();
 | 
			
		||||
  Cinv = C.inverse();
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -112,6 +103,25 @@ void ThinQRfact (Eigen::MatrixXcd &m_rr,
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  sliceMulMatrix(Q,Cinv,R,Orthog);
 | 
			
		||||
}
 | 
			
		||||
// see comments above
 | 
			
		||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
 | 
			
		||||
		 Eigen::MatrixXcd &C,
 | 
			
		||||
		 Eigen::MatrixXcd &Cinv,
 | 
			
		||||
		 std::vector<Field> & Q,
 | 
			
		||||
		 const std::vector<Field> & R)
 | 
			
		||||
{
 | 
			
		||||
  InnerProductMatrix(m_rr,R,R);
 | 
			
		||||
 | 
			
		||||
  m_rr = 0.5*(m_rr+m_rr.adjoint());
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd L    = m_rr.llt().matrixL(); 
 | 
			
		||||
 | 
			
		||||
  C    = L.adjoint();
 | 
			
		||||
  Cinv = C.inverse();
 | 
			
		||||
 | 
			
		||||
  MulMatrix(Q,Cinv,R);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Call one of several implementations
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -119,14 +129,20 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
 | 
			
		||||
{
 | 
			
		||||
  if ( CGtype == BlockCGrQ ) {
 | 
			
		||||
    BlockCGrQsolve(Linop,Src,Psi);
 | 
			
		||||
  } else if (CGtype == BlockCG ) {
 | 
			
		||||
    BlockCGsolve(Linop,Src,Psi);
 | 
			
		||||
  } else if (CGtype == CGmultiRHS ) {
 | 
			
		||||
    CGmultiRHSsolve(Linop,Src,Psi);
 | 
			
		||||
  } else {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi) 
 | 
			
		||||
{
 | 
			
		||||
  if ( CGtype == BlockCGrQVec ) {
 | 
			
		||||
    BlockCGrQsolveVec(Linop,Src,Psi);
 | 
			
		||||
  } else {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// BlockCGrQ implementation:
 | 
			
		||||
@@ -139,7 +155,8 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim; this is an assumption
 | 
			
		||||
  Nblock = B._grid->_fdimensions[Orthog];
 | 
			
		||||
 | 
			
		||||
/* FAKE */
 | 
			
		||||
  Nblock=8;
 | 
			
		||||
  std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
 | 
			
		||||
 | 
			
		||||
  X.checkerboard = B.checkerboard;
 | 
			
		||||
@@ -202,15 +219,10 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
 | 
			
		||||
  std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
 | 
			
		||||
 | 
			
		||||
  //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it)
 | 
			
		||||
 | 
			
		||||
  Linop.HermOp(X, AD);
 | 
			
		||||
  tmp = B - AD;  
 | 
			
		||||
  //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
 | 
			
		||||
 | 
			
		||||
  ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
 | 
			
		||||
  //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
 | 
			
		||||
  //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
 | 
			
		||||
  //std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
 | 
			
		||||
  //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
 | 
			
		||||
  D=Q;
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
 | 
			
		||||
@@ -232,14 +244,12 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
 | 
			
		||||
    MatrixTimer.Start();
 | 
			
		||||
    Linop.HermOp(D, Z);      
 | 
			
		||||
    MatrixTimer.Stop();
 | 
			
		||||
    //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    //4. M  = [D^dag Z]^{-1}
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    m_M       = m_DZ.inverse();
 | 
			
		||||
    //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    //5. X  = X + D MC
 | 
			
		||||
    m_tmp     = m_M * m_C;
 | 
			
		||||
@@ -257,6 +267,7 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
 | 
			
		||||
    
 | 
			
		||||
    //7. D  = Q + D S^dag
 | 
			
		||||
    m_tmp = m_S.adjoint();
 | 
			
		||||
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
@@ -317,152 +328,6 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
 | 
			
		||||
  IterationsToComplete = k;
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) 
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim; this is an assumption
 | 
			
		||||
  Nblock = Src._grid->_fdimensions[Orthog];
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
 | 
			
		||||
 | 
			
		||||
  Psi.checkerboard = Src.checkerboard;
 | 
			
		||||
  conformable(Psi, Src);
 | 
			
		||||
 | 
			
		||||
  Field P(Src);
 | 
			
		||||
  Field AP(Src);
 | 
			
		||||
  Field R(Src);
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  // Initial residual computation & set up
 | 
			
		||||
  std::vector<RealD> residuals(Nblock);
 | 
			
		||||
  std::vector<RealD> ssq(Nblock);
 | 
			
		||||
 | 
			
		||||
  sliceNorm(ssq,Src,Orthog);
 | 
			
		||||
  RealD sssum=0;
 | 
			
		||||
  for(int b=0;b<Nblock;b++) sssum+=ssq[b];
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,Src,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,Psi,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  // Initial search dir is guess
 | 
			
		||||
  Linop.HermOp(Psi, AP);
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  /************************************************************************
 | 
			
		||||
   * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
 | 
			
		||||
   ************************************************************************
 | 
			
		||||
   * O'Leary : R = B - A X
 | 
			
		||||
   * O'Leary : P = M R ; preconditioner M = 1
 | 
			
		||||
   * O'Leary : alpha = PAP^{-1} RMR
 | 
			
		||||
   * O'Leary : beta  = RMR^{-1}_old RMR_new
 | 
			
		||||
   * O'Leary : X=X+Palpha
 | 
			
		||||
   * O'Leary : R_new=R_old-AP alpha
 | 
			
		||||
   * O'Leary : P=MR_new+P beta
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  R = Src - AP;  
 | 
			
		||||
  P = R;
 | 
			
		||||
  sliceInnerProductMatrix(m_rr,R,R,Orthog);
 | 
			
		||||
 | 
			
		||||
  GridStopWatch sliceInnerTimer;
 | 
			
		||||
  GridStopWatch sliceMaddTimer;
 | 
			
		||||
  GridStopWatch MatrixTimer;
 | 
			
		||||
  GridStopWatch SolverTimer;
 | 
			
		||||
  SolverTimer.Start();
 | 
			
		||||
 | 
			
		||||
  int k;
 | 
			
		||||
  for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
 | 
			
		||||
    RealD rrsum=0;
 | 
			
		||||
    for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
 | 
			
		||||
	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
 | 
			
		||||
 | 
			
		||||
    MatrixTimer.Start();
 | 
			
		||||
    Linop.HermOp(P, AP);
 | 
			
		||||
    MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    // Alpha
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    m_pAp_inv = m_pAp.inverse();
 | 
			
		||||
    m_alpha   = m_pAp_inv * m_rr ;
 | 
			
		||||
 | 
			
		||||
    // Psi, R update
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi
 | 
			
		||||
    sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    // Beta
 | 
			
		||||
    m_rr_inv = m_rr.inverse();
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    sliceInnerProductMatrix(m_rr,R,R,Orthog);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    m_beta = m_rr_inv *m_rr;
 | 
			
		||||
 | 
			
		||||
    // Search update
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(AP,m_beta,P,R,Orthog);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
    P= AP;
 | 
			
		||||
 | 
			
		||||
    /*********************
 | 
			
		||||
     * convergence monitor
 | 
			
		||||
     *********************
 | 
			
		||||
     */
 | 
			
		||||
    RealD max_resid=0;
 | 
			
		||||
    RealD rr;
 | 
			
		||||
    for(int b=0;b<Nblock;b++){
 | 
			
		||||
      rr = real(m_rr(b,b))/ssq[b];
 | 
			
		||||
      if ( rr > max_resid ) max_resid = rr;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if ( max_resid < Tolerance*Tolerance ) { 
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
 | 
			
		||||
      for(int b=0;b<Nblock;b++){
 | 
			
		||||
	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
 | 
			
		||||
		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(Psi, AP);
 | 
			
		||||
      AP = AP-Src;
 | 
			
		||||
      std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl;
 | 
			
		||||
	    
 | 
			
		||||
      IterationsToComplete = k;
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
 | 
			
		||||
 | 
			
		||||
  if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
  IterationsToComplete = k;
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
 | 
			
		||||
// Use this for spread out across nodes
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -600,6 +465,233 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
 | 
			
		||||
  IterationsToComplete = k;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
 | 
			
		||||
  for(int b=0;b<Nblock;b++){
 | 
			
		||||
  for(int bp=0;bp<Nblock;bp++) {
 | 
			
		||||
    m(b,bp) = innerProduct(X[b],Y[bp]);  
 | 
			
		||||
  }}
 | 
			
		||||
}
 | 
			
		||||
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
 | 
			
		||||
  // Should make this cache friendly with site outermost, parallel_for
 | 
			
		||||
  // Deal with case AP aliases with either Y or X
 | 
			
		||||
  std::vector<Field> tmp(Nblock,X[0]);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){
 | 
			
		||||
    tmp[b]   = Y[b];
 | 
			
		||||
    for(int bp=0;bp<Nblock;bp++) {
 | 
			
		||||
      tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp]; 
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  for(int b=0;b<Nblock;b++){
 | 
			
		||||
    AP[b] = tmp[b];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
 | 
			
		||||
  // Should make this cache friendly with site outermost, parallel_for
 | 
			
		||||
  for(int b=0;b<Nblock;b++){
 | 
			
		||||
    AP[b] = zero;
 | 
			
		||||
    for(int bp=0;bp<Nblock;bp++) {
 | 
			
		||||
      AP[b] += (m(bp,b))*X[bp]; 
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
double normv(const std::vector<Field> &P){
 | 
			
		||||
  double nn = 0.0;
 | 
			
		||||
  for(int b=0;b<Nblock;b++) {
 | 
			
		||||
    nn+=norm2(P[b]);
 | 
			
		||||
  }
 | 
			
		||||
  return nn;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// BlockCGrQvec implementation:
 | 
			
		||||
//--------------------------
 | 
			
		||||
// X is guess/Solution
 | 
			
		||||
// B is RHS
 | 
			
		||||
// Solve A X_i = B_i    ;        i refers to Nblock index
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X) 
 | 
			
		||||
{
 | 
			
		||||
  Nblock = B.size();
 | 
			
		||||
  assert(Nblock == X.size());
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ 
 | 
			
		||||
    X[b].checkerboard = B[b].checkerboard;
 | 
			
		||||
    conformable(X[b], B[b]);
 | 
			
		||||
    conformable(X[b], X[0]); 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Field Fake(B[0]);
 | 
			
		||||
 | 
			
		||||
  std::vector<Field> tmp(Nblock,Fake);
 | 
			
		||||
  std::vector<Field>   Q(Nblock,Fake);
 | 
			
		||||
  std::vector<Field>   D(Nblock,Fake);
 | 
			
		||||
  std::vector<Field>   Z(Nblock,Fake);
 | 
			
		||||
  std::vector<Field>  AD(Nblock,Fake);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  // Initial residual computation & set up
 | 
			
		||||
  std::vector<RealD> residuals(Nblock);
 | 
			
		||||
  std::vector<RealD> ssq(Nblock);
 | 
			
		||||
 | 
			
		||||
  RealD sssum=0;
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
 | 
			
		||||
  for(int b=0;b<Nblock;b++) sssum+=ssq[b];
 | 
			
		||||
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  /************************************************************************
 | 
			
		||||
   * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
 | 
			
		||||
   ************************************************************************
 | 
			
		||||
   * Dimensions:
 | 
			
		||||
   *
 | 
			
		||||
   *   X,B==(Nferm x Nblock)
 | 
			
		||||
   *   A==(Nferm x Nferm)
 | 
			
		||||
   *  
 | 
			
		||||
   * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
 | 
			
		||||
   * 
 | 
			
		||||
   * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it)
 | 
			
		||||
   * for k: 
 | 
			
		||||
   *   Z  = AD
 | 
			
		||||
   *   M  = [D^dag Z]^{-1}
 | 
			
		||||
   *   X  = X + D MC
 | 
			
		||||
   *   QS = Q - ZM
 | 
			
		||||
   *   D  = Q + D S^dag
 | 
			
		||||
   *   C  = S C
 | 
			
		||||
   */
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  // Initial block: initial search dir is guess
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
 | 
			
		||||
 | 
			
		||||
  //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it)
 | 
			
		||||
  for(int b=0;b<Nblock;b++) {
 | 
			
		||||
    Linop.HermOp(X[b], AD[b]);
 | 
			
		||||
    tmp[b] = B[b] - AD[b];  
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
 | 
			
		||||
 | 
			
		||||
  for(int b=0;b<Nblock;b++) D[b]=Q[b];
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  // Timers
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  GridStopWatch sliceInnerTimer;
 | 
			
		||||
  GridStopWatch sliceMaddTimer;
 | 
			
		||||
  GridStopWatch QRTimer;
 | 
			
		||||
  GridStopWatch MatrixTimer;
 | 
			
		||||
  GridStopWatch SolverTimer;
 | 
			
		||||
  SolverTimer.Start();
 | 
			
		||||
 | 
			
		||||
  int k;
 | 
			
		||||
  for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
 | 
			
		||||
    //3. Z  = AD
 | 
			
		||||
    MatrixTimer.Start();
 | 
			
		||||
    for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);      
 | 
			
		||||
    MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    //4. M  = [D^dag Z]^{-1}
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    InnerProductMatrix(m_DZ,D,Z);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    m_M       = m_DZ.inverse();
 | 
			
		||||
    
 | 
			
		||||
    //5. X  = X + D MC
 | 
			
		||||
    m_tmp     = m_M * m_C;
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    MaddMatrix(X,m_tmp, D,X);     
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    //6. QS = Q - ZM
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    MaddMatrix(tmp,m_M,Z,Q,-1.0);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
    QRTimer.Start();
 | 
			
		||||
    ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
 | 
			
		||||
    QRTimer.Stop();
 | 
			
		||||
    
 | 
			
		||||
    //7. D  = Q + D S^dag
 | 
			
		||||
    m_tmp = m_S.adjoint();
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    MaddMatrix(D,m_tmp,D,Q);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    //8. C  = S C
 | 
			
		||||
    m_C = m_S*m_C;
 | 
			
		||||
    
 | 
			
		||||
    /*********************
 | 
			
		||||
     * convergence monitor
 | 
			
		||||
     *********************
 | 
			
		||||
     */
 | 
			
		||||
    m_rr = m_C.adjoint() * m_C;
 | 
			
		||||
 | 
			
		||||
    RealD max_resid=0;
 | 
			
		||||
    RealD rrsum=0;
 | 
			
		||||
    RealD rr;
 | 
			
		||||
 | 
			
		||||
    for(int b=0;b<Nblock;b++) {
 | 
			
		||||
      rrsum+=real(m_rr(b,b));
 | 
			
		||||
      rr = real(m_rr(b,b))/ssq[b];
 | 
			
		||||
      if ( rr > max_resid ) max_resid = rr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
 | 
			
		||||
 | 
			
		||||
    if ( max_resid < Tolerance*Tolerance ) { 
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
 | 
			
		||||
 | 
			
		||||
      for(int b=0;b<Nblock;b++){
 | 
			
		||||
	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
 | 
			
		||||
      for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
 | 
			
		||||
      std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl;
 | 
			
		||||
	    
 | 
			
		||||
      IterationsToComplete = k;
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
 | 
			
		||||
 | 
			
		||||
  if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
  IterationsToComplete = k;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -54,6 +54,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
 | 
			
		||||
  void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    psi.checkerboard = src.checkerboard;
 | 
			
		||||
    conformable(psi, src);
 | 
			
		||||
 | 
			
		||||
@@ -69,7 +70,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    Linop.HermOpAndNorm(psi, mmp, d, b);
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    r = src - mmp;
 | 
			
		||||
    p = r;
 | 
			
		||||
@@ -96,38 +96,44 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
              << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
 | 
			
		||||
 | 
			
		||||
    GridStopWatch LinalgTimer;
 | 
			
		||||
    GridStopWatch InnerTimer;
 | 
			
		||||
    GridStopWatch AxpyNormTimer;
 | 
			
		||||
    GridStopWatch LinearCombTimer;
 | 
			
		||||
    GridStopWatch MatrixTimer;
 | 
			
		||||
    GridStopWatch SolverTimer;
 | 
			
		||||
 | 
			
		||||
    SolverTimer.Start();
 | 
			
		||||
    int k;
 | 
			
		||||
    for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
    for (k = 1; k <= MaxIterations*1000; k++) {
 | 
			
		||||
      c = cp;
 | 
			
		||||
 | 
			
		||||
      MatrixTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(p, mmp, d, qq);
 | 
			
		||||
      Linop.HermOp(p, mmp);
 | 
			
		||||
      MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      LinalgTimer.Start();
 | 
			
		||||
      //  RealD    qqck = norm2(mmp);
 | 
			
		||||
      //  ComplexD dck  = innerProduct(p,mmp);
 | 
			
		||||
 | 
			
		||||
      InnerTimer.Start();
 | 
			
		||||
      ComplexD dc  = innerProduct(p,mmp);
 | 
			
		||||
      InnerTimer.Stop();
 | 
			
		||||
      d = dc.real();
 | 
			
		||||
      a = c / d;
 | 
			
		||||
      b_pred = a * (a * qq - d) / c;
 | 
			
		||||
 | 
			
		||||
      AxpyNormTimer.Start();
 | 
			
		||||
      cp = axpy_norm(r, -a, mmp, r);
 | 
			
		||||
      AxpyNormTimer.Stop();
 | 
			
		||||
      b = cp / c;
 | 
			
		||||
 | 
			
		||||
      // Fuse these loops ; should be really easy
 | 
			
		||||
      psi = a * p + psi;
 | 
			
		||||
      p = p * b + r;
 | 
			
		||||
 | 
			
		||||
      LinearCombTimer.Start();
 | 
			
		||||
      parallel_for(int ss=0;ss<src._grid->oSites();ss++){
 | 
			
		||||
	vstream(psi[ss], a      *  p[ss] + psi[ss]);
 | 
			
		||||
	vstream(p  [ss], b      *  p[ss] + r[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      LinearCombTimer.Stop();
 | 
			
		||||
      LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
 | 
			
		||||
                << " residual " << cp << " target " << rsq << std::endl;
 | 
			
		||||
      std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl;
 | 
			
		||||
      std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl;
 | 
			
		||||
                << " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
 | 
			
		||||
 | 
			
		||||
      // Stopping condition
 | 
			
		||||
      if (cp <= rsq) {
 | 
			
		||||
@@ -148,6 +154,9 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tInner      " << InnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
 | 
			
		||||
 | 
			
		||||
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
 | 
			
		||||
 | 
			
		||||
@@ -43,6 +43,7 @@ namespace Grid {
 | 
			
		||||
public:                                                
 | 
			
		||||
    RealD   Tolerance;
 | 
			
		||||
    Integer MaxIterations;
 | 
			
		||||
    Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
    int verbose;
 | 
			
		||||
    MultiShiftFunction shifts;
 | 
			
		||||
 | 
			
		||||
@@ -163,7 +164,16 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
  for(int s=0;s<nshift;s++) {
 | 
			
		||||
    axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  // Timers
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  GridStopWatch AXPYTimer;
 | 
			
		||||
  GridStopWatch ShiftTimer;
 | 
			
		||||
  GridStopWatch QRTimer;
 | 
			
		||||
  GridStopWatch MatrixTimer;
 | 
			
		||||
  GridStopWatch SolverTimer;
 | 
			
		||||
  SolverTimer.Start();
 | 
			
		||||
  
 | 
			
		||||
  // Iteration loop
 | 
			
		||||
  int k;
 | 
			
		||||
@@ -171,7 +181,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
  for (k=1;k<=MaxIterations;k++){
 | 
			
		||||
    
 | 
			
		||||
    a = c /cp;
 | 
			
		||||
    AXPYTimer.Start();
 | 
			
		||||
    axpy(p,a,p,r);
 | 
			
		||||
    AXPYTimer.Stop();
 | 
			
		||||
    
 | 
			
		||||
    // Note to self - direction ps is iterated seperately
 | 
			
		||||
    // for each shift. Does not appear to have any scope
 | 
			
		||||
@@ -180,6 +192,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
    // However SAME r is used. Could load "r" and update
 | 
			
		||||
    // ALL ps[s]. 2/3 Bandwidth saving
 | 
			
		||||
    // New Kernel: Load r, vector of coeffs, vector of pointers ps
 | 
			
		||||
    AXPYTimer.Start();
 | 
			
		||||
    for(int s=0;s<nshift;s++){
 | 
			
		||||
      if ( ! converged[s] ) { 
 | 
			
		||||
	if (s==0){
 | 
			
		||||
@@ -190,22 +203,34 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    AXPYTimer.Stop();
 | 
			
		||||
    
 | 
			
		||||
    cp=c;
 | 
			
		||||
    MatrixTimer.Start();  
 | 
			
		||||
    //Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
 | 
			
		||||
    // The below is faster on KNL
 | 
			
		||||
    Linop.HermOp(p,mmp); 
 | 
			
		||||
    d=real(innerProduct(p,mmp));
 | 
			
		||||
    
 | 
			
		||||
    Linop.HermOpAndNorm(p,mmp,d,qq);
 | 
			
		||||
    MatrixTimer.Stop();  
 | 
			
		||||
 | 
			
		||||
    AXPYTimer.Start();
 | 
			
		||||
    axpy(mmp,mass[0],p,mmp);
 | 
			
		||||
    AXPYTimer.Stop();
 | 
			
		||||
    RealD rn = norm2(p);
 | 
			
		||||
    d += rn*mass[0];
 | 
			
		||||
    
 | 
			
		||||
    bp=b;
 | 
			
		||||
    b=-cp/d;
 | 
			
		||||
    
 | 
			
		||||
    AXPYTimer.Start();
 | 
			
		||||
    c=axpy_norm(r,b,mmp,r);
 | 
			
		||||
    AXPYTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    // Toggle the recurrence history
 | 
			
		||||
    bs[0] = b;
 | 
			
		||||
    iz = 1-iz;
 | 
			
		||||
    ShiftTimer.Start();
 | 
			
		||||
    for(int s=1;s<nshift;s++){
 | 
			
		||||
      if((!converged[s])){
 | 
			
		||||
	RealD z0 = z[s][1-iz];
 | 
			
		||||
@@ -215,6 +240,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
	bs[s] = b*z[s][iz]/z0; // NB sign  rel to Mike
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    ShiftTimer.Stop();
 | 
			
		||||
    
 | 
			
		||||
    for(int s=0;s<nshift;s++){
 | 
			
		||||
      int ss = s;
 | 
			
		||||
@@ -257,6 +283,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
    
 | 
			
		||||
    if ( all_converged ){
 | 
			
		||||
 | 
			
		||||
    SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
@@ -269,8 +298,19 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
	RealD cn = norm2(src);
 | 
			
		||||
	std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tAXPY    " << AXPYTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMarix    " << MatrixTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tShift    " << ShiftTimer.Elapsed()     <<std::endl;
 | 
			
		||||
 | 
			
		||||
      IterationsToComplete = k;	
 | 
			
		||||
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
   
 | 
			
		||||
  }
 | 
			
		||||
  // ugly hack
 | 
			
		||||
  std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
 | 
			
		||||
@@ -30,22 +30,23 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
struct ZeroGuesser {
 | 
			
		||||
template<class Field>
 | 
			
		||||
class ZeroGuesser: public LinearFunction<Field> {
 | 
			
		||||
public:
 | 
			
		||||
  template<class Field> 
 | 
			
		||||
  void operator()(const Field &src,Field &guess) { guess = Zero(); };
 | 
			
		||||
  virtual void operator()(const Field &src, Field &guess) { guess = zero; };
 | 
			
		||||
};
 | 
			
		||||
struct SourceGuesser {
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class SourceGuesser: public LinearFunction<Field> {
 | 
			
		||||
public:
 | 
			
		||||
  template<class Field> 
 | 
			
		||||
  void operator()(const Field &src,Field &guess) { guess = src; };
 | 
			
		||||
  virtual void operator()(const Field &src, Field &guess) { guess = src; };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////
 | 
			
		||||
// Fine grid deflation
 | 
			
		||||
////////////////////////////////
 | 
			
		||||
template<class Field>
 | 
			
		||||
struct DeflatedGuesser {
 | 
			
		||||
class DeflatedGuesser: public LinearFunction<Field> {
 | 
			
		||||
private:
 | 
			
		||||
  const std::vector<Field> &evec;
 | 
			
		||||
  const std::vector<RealD> &eval;
 | 
			
		||||
@@ -54,7 +55,7 @@ public:
 | 
			
		||||
 | 
			
		||||
  DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
 | 
			
		||||
 | 
			
		||||
  void operator()(const Field &src,Field &guess) { 
 | 
			
		||||
  virtual void operator()(const Field &src,Field &guess) {
 | 
			
		||||
    guess = zero;
 | 
			
		||||
    assert(evec.size()==eval.size());
 | 
			
		||||
    auto N = evec.size();
 | 
			
		||||
@@ -62,11 +63,12 @@ public:
 | 
			
		||||
      const Field& tmp = evec[i];
 | 
			
		||||
      axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
 | 
			
		||||
    }
 | 
			
		||||
    guess.checkerboard = src.checkerboard;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class FineField, class CoarseField>
 | 
			
		||||
class LocalCoherenceDeflatedGuesser {
 | 
			
		||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
 | 
			
		||||
private:
 | 
			
		||||
  const std::vector<FineField>   &subspace;
 | 
			
		||||
  const std::vector<CoarseField> &evec_coarse;
 | 
			
		||||
@@ -92,6 +94,7 @@ public:
 | 
			
		||||
      axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
 | 
			
		||||
    }
 | 
			
		||||
    blockPromote(guess_coarse,guess,subspace);
 | 
			
		||||
    guess.checkerboard = src.checkerboard;
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -57,8 +57,9 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
 | 
			
		||||
      
 | 
			
		||||
  parallel_region
 | 
			
		||||
  {
 | 
			
		||||
    std::vector < vobj > B(Nm); // Thread private
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
    std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
 | 
			
		||||
       
 | 
			
		||||
    parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
 | 
			
		||||
      for(int j=j0; j<j1; ++j) B[j]=0.;
 | 
			
		||||
      
 | 
			
		||||
@@ -285,9 +285,11 @@ public:
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Orthogonalise(void ) {
 | 
			
		||||
    CoarseScalar InnerProd(_CoarseGrid); 
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
 | 
			
		||||
    CoarseScalar InnerProd(_CoarseGrid);
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<typename T>  static RealD normalise(T& v) 
 | 
			
		||||
@@ -333,7 +335,7 @@ public:
 | 
			
		||||
    // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    Chebyshev<FineField>                          ChebySmooth(cheby_smooth);
 | 
			
		||||
    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_subspace);
 | 
			
		||||
    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
 | 
			
		||||
    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
 | 
			
		||||
 | 
			
		||||
    for(int k=0;k<evec_coarse.size();k++){
 | 
			
		||||
@@ -374,14 +376,14 @@ public:
 | 
			
		||||
		  RealD MaxIt, RealD betastp, int MinRes)
 | 
			
		||||
  {
 | 
			
		||||
    Chebyshev<FineField>                          Cheby(cheby_op);
 | 
			
		||||
    ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,_subspace);
 | 
			
		||||
    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_subspace);
 | 
			
		||||
    ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,subspace);
 | 
			
		||||
    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    Chebyshev<FineField>                                           ChebySmooth(cheby_smooth);
 | 
			
		||||
    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_subspace,relax);
 | 
			
		||||
    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
 | 
			
		||||
 | 
			
		||||
    evals_coarse.resize(Nm);
 | 
			
		||||
    evec_coarse.resize(Nm,_CoarseGrid);
 | 
			
		||||
							
								
								
									
										473
									
								
								Grid/algorithms/iterative/SchurRedBlack.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										473
									
								
								Grid/algorithms/iterative/SchurRedBlack.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,473 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
#define GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
   * Red black Schur decomposition
 | 
			
		||||
   *
 | 
			
		||||
   *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo)
 | 
			
		||||
   *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         )
 | 
			
		||||
   *                =         L                     D                     U
 | 
			
		||||
   *
 | 
			
		||||
   * L^-1 = (1              0 )
 | 
			
		||||
   *        (-MoeMee^{-1}   1 )   
 | 
			
		||||
   * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} )
 | 
			
		||||
   *           ( 0       1                    )
 | 
			
		||||
   * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} )
 | 
			
		||||
   *           ( 0       1                    )
 | 
			
		||||
   *
 | 
			
		||||
   * U^-1 = (1   -Mee^{-1} Meo)
 | 
			
		||||
   *        (0    1           )
 | 
			
		||||
   * U^{dag} = ( 1                 0)
 | 
			
		||||
   *           (Meo^dag Mee^{-dag} 1)
 | 
			
		||||
   * U^{-dag} = (  1                 0)
 | 
			
		||||
   *            (-Meo^dag Mee^{-dag} 1)
 | 
			
		||||
   ***********************
 | 
			
		||||
   *     M psi = eta
 | 
			
		||||
   ***********************
 | 
			
		||||
   *Odd
 | 
			
		||||
   * i)                 D_oo psi_o =  L^{-1}  eta_o
 | 
			
		||||
   *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *
 | 
			
		||||
   * Wilson:
 | 
			
		||||
   *      (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o
 | 
			
		||||
   * Stag:
 | 
			
		||||
   *      D_oo psi_o = L^{-1}  eta =    (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *
 | 
			
		||||
   * L^-1 eta_o= (1              0 ) (e
 | 
			
		||||
   *             (-MoeMee^{-1}   1 )   
 | 
			
		||||
   *
 | 
			
		||||
   *Even
 | 
			
		||||
   * ii)  Mee psi_e + Meo psi_o = src_e
 | 
			
		||||
   *
 | 
			
		||||
   *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
   *
 | 
			
		||||
   * 
 | 
			
		||||
   * TODO: Other options:
 | 
			
		||||
   * 
 | 
			
		||||
   * a) change checkerboards for Schur e<->o
 | 
			
		||||
   *
 | 
			
		||||
   * Left precon by Moo^-1
 | 
			
		||||
   * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 =  (D_oo)^dag M_oo^-dag Moo^-1 L^{-1}  eta_o
 | 
			
		||||
   *                              eta_o'     = (D_oo)^dag  M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *
 | 
			
		||||
   * Right precon by Moo^-1
 | 
			
		||||
   * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o
 | 
			
		||||
   *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *                              psi_o = M_oo^-1 phi_o
 | 
			
		||||
   * TODO: Deflation 
 | 
			
		||||
   */
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Use base class to share code
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Take a matrix and form a Red Black solver calling a Herm solver
 | 
			
		||||
  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class Field> class SchurRedBlackBase {
 | 
			
		||||
  protected:
 | 
			
		||||
    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
 | 
			
		||||
    OperatorFunction<Field> & _HermitianRBSolver;
 | 
			
		||||
    int CBfactorise;
 | 
			
		||||
    bool subGuess;
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  :
 | 
			
		||||
    _HermitianRBSolver(HermitianRBSolver) 
 | 
			
		||||
    { 
 | 
			
		||||
      CBfactorise = 0;
 | 
			
		||||
      subtractGuess(initSubGuess);
 | 
			
		||||
    };
 | 
			
		||||
    void subtractGuess(const bool initSubGuess)
 | 
			
		||||
    {
 | 
			
		||||
      subGuess = initSubGuess;
 | 
			
		||||
    }
 | 
			
		||||
    bool isSubtractGuess(void)
 | 
			
		||||
    {
 | 
			
		||||
      return subGuess;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    // Shared code
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out){
 | 
			
		||||
      ZeroGuesser<Field> guess;
 | 
			
		||||
      (*this)(_Matrix,in,out,guess);
 | 
			
		||||
    }
 | 
			
		||||
    void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out) 
 | 
			
		||||
    {
 | 
			
		||||
      ZeroGuesser<Field> guess;
 | 
			
		||||
      (*this)(_Matrix,in,out,guess);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Guesser>
 | 
			
		||||
    void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess) 
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
      int nblock = in.size();
 | 
			
		||||
 | 
			
		||||
      std::vector<Field> src_o(nblock,grid);
 | 
			
		||||
      std::vector<Field> sol_o(nblock,grid);
 | 
			
		||||
      
 | 
			
		||||
      std::vector<Field> guess_save;
 | 
			
		||||
 | 
			
		||||
      Field resid(fgrid);
 | 
			
		||||
      Field tmp(grid);
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      // Prepare RedBlack source
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      for(int b=0;b<nblock;b++){
 | 
			
		||||
	RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
 | 
			
		||||
      }
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      // Make the guesses
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      if ( subGuess ) guess_save.resize(nblock,grid);
 | 
			
		||||
 | 
			
		||||
      for(int b=0;b<nblock;b++){
 | 
			
		||||
	guess(src_o[b],sol_o[b]); 
 | 
			
		||||
 | 
			
		||||
	if ( subGuess ) { 
 | 
			
		||||
	  guess_save[b] = sol_o[b];
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      // Call the block solver
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
 | 
			
		||||
      RedBlackSolve(_Matrix,src_o,sol_o);
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      // A2A boolean behavioural control & reconstruct other checkerboard
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      for(int b=0;b<nblock;b++) {
 | 
			
		||||
 | 
			
		||||
	if (subGuess)   sol_o[b] = sol_o[b] - guess_save[b];
 | 
			
		||||
 | 
			
		||||
	///////// Needs even source //////////////
 | 
			
		||||
	pickCheckerboard(Even,tmp,in[b]);
 | 
			
		||||
	RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
 | 
			
		||||
 | 
			
		||||
	/////////////////////////////////////////////////
 | 
			
		||||
	// Check unprec residual if possible
 | 
			
		||||
	/////////////////////////////////////////////////
 | 
			
		||||
	if ( ! subGuess ) {
 | 
			
		||||
	  _Matrix.M(out[b],resid); 
 | 
			
		||||
	  resid = resid-in[b];
 | 
			
		||||
	  RealD ns = norm2(in[b]);
 | 
			
		||||
	  RealD nr = norm2(resid);
 | 
			
		||||
	
 | 
			
		||||
	  std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
 | 
			
		||||
	} else {
 | 
			
		||||
	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    template<class Guesser>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
 | 
			
		||||
 | 
			
		||||
      // FIXME CGdiagonalMee not implemented virtual function
 | 
			
		||||
      // FIXME use CBfactorise to control schur decomp
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      Field resid(fgrid);
 | 
			
		||||
      Field src_o(grid);
 | 
			
		||||
      Field src_e(grid);
 | 
			
		||||
      Field sol_o(grid);
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      // RedBlack source
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      RedBlackSource(_Matrix,in,src_e,src_o);
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      // Construct the guess
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      guess(src_o,sol_o);
 | 
			
		||||
 | 
			
		||||
      Field  guess_save(grid);
 | 
			
		||||
      guess_save = sol_o;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      // Call the red-black solver
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      RedBlackSolve(_Matrix,src_o,sol_o);
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      // Fionn A2A boolean behavioural control
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      if (subGuess)      sol_o= sol_o-guess_save;
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // RedBlack solution needs the even source
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      RedBlackSolution(_Matrix,sol_o,src_e,out);
 | 
			
		||||
 | 
			
		||||
      // Verify the unprec residual
 | 
			
		||||
      if ( ! subGuess ) {
 | 
			
		||||
        _Matrix.M(out,resid); 
 | 
			
		||||
        resid = resid-in;
 | 
			
		||||
        RealD ns = norm2(in);
 | 
			
		||||
        RealD nr = norm2(resid);
 | 
			
		||||
 | 
			
		||||
        std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
 | 
			
		||||
      } else {
 | 
			
		||||
        std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }     
 | 
			
		||||
    
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    // Override in derived. Not virtual as template methods
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0;
 | 
			
		||||
    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0;
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0;
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)=0;
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
 | 
			
		||||
 | 
			
		||||
    SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) 
 | 
			
		||||
      :    SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) 
 | 
			
		||||
    {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    // Override RedBlack specialisation
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  Mtmp(grid);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,src);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,src);
 | 
			
		||||
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
 | 
			
		||||
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
 | 
			
		||||
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
 | 
			
		||||
 | 
			
		||||
      _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
 | 
			
		||||
    }
 | 
			
		||||
    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field   sol_e(grid);
 | 
			
		||||
      Field   src_e(grid);
 | 
			
		||||
 | 
			
		||||
      src_e = src_e_c; // Const correctness
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
 | 
			
		||||
      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
 | 
			
		||||
      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
     
 | 
			
		||||
      setCheckerboard(sol,sol_e); assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
      setCheckerboard(sol,sol_o); assert(  sol_o.checkerboard ==Odd );
 | 
			
		||||
    }
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)
 | 
			
		||||
    {
 | 
			
		||||
      SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
 | 
			
		||||
    };
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)
 | 
			
		||||
    {
 | 
			
		||||
      SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); 
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Site diagonal has Mooee on it.
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
 | 
			
		||||
 | 
			
		||||
    SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  
 | 
			
		||||
      : SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    // Override RedBlack specialisation
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  Mtmp(grid);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,src);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,src);
 | 
			
		||||
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
 | 
			
		||||
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
 | 
			
		||||
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
 | 
			
		||||
 | 
			
		||||
      // get the right MpcDag
 | 
			
		||||
      SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  sol_e(grid);
 | 
			
		||||
      Field  src_e_i(grid);
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.Meooe(sol_o,tmp);          assert(  tmp.checkerboard   ==Even);
 | 
			
		||||
      src_e_i = src_e-tmp;               assert(  src_e_i.checkerboard ==Even);
 | 
			
		||||
      _Matrix.MooeeInv(src_e_i,sol_e);   assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
     
 | 
			
		||||
      setCheckerboard(sol,sol_e); assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
      setCheckerboard(sol,sol_o); assert(  sol_o.checkerboard ==Odd );
 | 
			
		||||
    }
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)
 | 
			
		||||
    {
 | 
			
		||||
      SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
 | 
			
		||||
    };
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)
 | 
			
		||||
    {
 | 
			
		||||
      SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); 
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Site diagonal is identity, right preconditioned by Mee^inv
 | 
			
		||||
  // ( 1 - Meo Moo^inv Moe Mee^inv  ) phi =( 1 - Meo Moo^inv Moe Mee^inv  ) Mee psi =  = eta  = eta
 | 
			
		||||
  //=> psi = MeeInv phi
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
    // Wrap the usual normal equations Schur trick
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
  SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  
 | 
			
		||||
    : SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
 | 
			
		||||
 | 
			
		||||
    virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  Mtmp(grid);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,src);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,src);
 | 
			
		||||
    
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
 | 
			
		||||
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
 | 
			
		||||
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
 | 
			
		||||
 | 
			
		||||
      // get the right MpcDag
 | 
			
		||||
      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      Field   sol_o_i(grid);
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field   sol_e(grid);
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      // MooeeInv due to pecond
 | 
			
		||||
      ////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(sol_o,tmp);
 | 
			
		||||
      sol_o_i = tmp;
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.Meooe(sol_o_i,tmp);    assert(  tmp.checkerboard   ==Even);
 | 
			
		||||
      tmp = src_e-tmp;               assert(  src_e.checkerboard ==Even);
 | 
			
		||||
      _Matrix.MooeeInv(tmp,sol_e);   assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
     
 | 
			
		||||
      setCheckerboard(sol,sol_e);    assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
      setCheckerboard(sol,sol_o_i);  assert(  sol_o_i.checkerboard ==Odd );
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)
 | 
			
		||||
    {
 | 
			
		||||
      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
 | 
			
		||||
    };
 | 
			
		||||
    virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o)
 | 
			
		||||
    {
 | 
			
		||||
      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
      this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); 
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -50,8 +50,6 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
 | 
			
		||||
      assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Grid_quiesce_nodes();
 | 
			
		||||
 | 
			
		||||
  // Never clean up as done once.
 | 
			
		||||
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
			
		||||
 | 
			
		||||
@@ -124,10 +122,8 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
 | 
			
		||||
  // split the communicator
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //  int Nparent = parent._processors ; 
 | 
			
		||||
  //  std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
 | 
			
		||||
  int Nparent;
 | 
			
		||||
  MPI_Comm_size(parent.communicator,&Nparent);
 | 
			
		||||
  //  std::cout << " Parent size  "<<Nparent <<std::endl;
 | 
			
		||||
 | 
			
		||||
  int childsize=1;
 | 
			
		||||
  for(int d=0;d<processors.size();d++) {
 | 
			
		||||
@@ -136,8 +132,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
 | 
			
		||||
  int Nchild = Nparent/childsize;
 | 
			
		||||
  assert (childsize * Nchild == Nparent);
 | 
			
		||||
 | 
			
		||||
  //  std::cout << " child size  "<<childsize <<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> ccoor(_ndimension); // coor within subcommunicator
 | 
			
		||||
  std::vector<int> scoor(_ndimension); // coor of split within parent
 | 
			
		||||
  std::vector<int> ssize(_ndimension); // coor of split within parent
 | 
			
		||||
@@ -132,7 +132,6 @@ int Log2Size(int TwoToPower,int MAXLOG2)
 | 
			
		||||
}
 | 
			
		||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 | 
			
		||||
{
 | 
			
		||||
#undef HYPERCUBE 
 | 
			
		||||
#ifdef HYPERCUBE
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Assert power of two shm_size.
 | 
			
		||||
@@ -175,7 +174,7 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
 | 
			
		||||
 | 
			
		||||
  std::string hname(name);
 | 
			
		||||
  std::cout << "hostname "<<hname<<std::endl;
 | 
			
		||||
  std::cout << "R " << R << " I " << I << " N "<< N<<
 | 
			
		||||
  std::cout << "R " << R << " I " << I << " N "<< N
 | 
			
		||||
            << " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -414,7 +413,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
    assert(((uint64_t)ptr&0x3F)==0);
 | 
			
		||||
    close(fd);
 | 
			
		||||
    WorldShmCommBufs[r] =ptr;
 | 
			
		||||
    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
			
		||||
    //    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  _ShmAlloc=1;
 | 
			
		||||
  _ShmAllocBytes  = bytes;
 | 
			
		||||
@@ -456,7 +455,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
    assert(((uint64_t)ptr&0x3F)==0);
 | 
			
		||||
    close(fd);
 | 
			
		||||
    WorldShmCommBufs[r] =ptr;
 | 
			
		||||
    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
			
		||||
    //    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  _ShmAlloc=1;
 | 
			
		||||
  _ShmAllocBytes  = bytes;
 | 
			
		||||
@@ -500,7 +499,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
#endif
 | 
			
		||||
      void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
 | 
			
		||||
      
 | 
			
		||||
      std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
 | 
			
		||||
      //      std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
 | 
			
		||||
      if ( ptr == (void * )MAP_FAILED ) {       
 | 
			
		||||
	perror("failed mmap");     
 | 
			
		||||
	assert(0);    
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -244,19 +244,11 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
  template<class sobj,class vobj> strong_inline
 | 
			
		||||
  RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
 | 
			
		||||
    ret.checkerboard = x.checkerboard;
 | 
			
		||||
    conformable(ret,x);
 | 
			
		||||
    conformable(x,y);
 | 
			
		||||
    axpy(ret,a,x,y);
 | 
			
		||||
    return norm2(ret);
 | 
			
		||||
    return axpy_norm_fast(ret,a,x,y);
 | 
			
		||||
  }
 | 
			
		||||
  template<class sobj,class vobj> strong_inline
 | 
			
		||||
  RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
 | 
			
		||||
    ret.checkerboard = x.checkerboard;
 | 
			
		||||
    conformable(ret,x);
 | 
			
		||||
    conformable(x,y);
 | 
			
		||||
    axpby(ret,a,b,x,y);
 | 
			
		||||
    return norm2(ret); // FIXME implement parallel norm in ss loop
 | 
			
		||||
    return axpby_norm_fast(ret,a,b,x,y);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -33,41 +33,94 @@ namespace Grid {
 | 
			
		||||
  // Deterministic Reduction operations
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
 | 
			
		||||
  ComplexD nrm = innerProduct(arg,arg);
 | 
			
		||||
  auto nrm = innerProduct(arg,arg);
 | 
			
		||||
  return std::real(nrm); 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Double inner product
 | 
			
		||||
template<class vobj>
 | 
			
		||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) 
 | 
			
		||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
  typedef typename vobj::vector_typeD vector_type;
 | 
			
		||||
  scalar_type  nrm;
 | 
			
		||||
  
 | 
			
		||||
  GridBase *grid = left._grid;
 | 
			
		||||
  
 | 
			
		||||
  std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
 | 
			
		||||
  
 | 
			
		||||
  const int pad = 8;
 | 
			
		||||
 | 
			
		||||
  ComplexD  inner;
 | 
			
		||||
  Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
 | 
			
		||||
 | 
			
		||||
  parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
			
		||||
    int nwork, mywork, myoff;
 | 
			
		||||
    GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
 | 
			
		||||
    
 | 
			
		||||
    decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
 | 
			
		||||
    decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
 | 
			
		||||
    for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
			
		||||
      vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
 | 
			
		||||
      vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
    sumarray[thr]=TensorRemove(vnrm) ;
 | 
			
		||||
    // All threads sum across SIMD; reduce serial work at end
 | 
			
		||||
    // one write per cacheline with streaming store
 | 
			
		||||
    ComplexD tmp = Reduce(TensorRemove(vinner)) ;
 | 
			
		||||
    vstream(sumarray[thr*pad],tmp);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  vector_type vvnrm; vvnrm=zero;  // sum across threads
 | 
			
		||||
  inner=0.0;
 | 
			
		||||
  for(int i=0;i<grid->SumArraySize();i++){
 | 
			
		||||
    vvnrm = vvnrm+sumarray[i];
 | 
			
		||||
    inner = inner+sumarray[i*pad];
 | 
			
		||||
  } 
 | 
			
		||||
  nrm = Reduce(vvnrm);// sum across simd
 | 
			
		||||
  right._grid->GlobalSum(nrm);
 | 
			
		||||
  return nrm;
 | 
			
		||||
  right._grid->GlobalSum(inner);
 | 
			
		||||
  return inner;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/////////////////////////
 | 
			
		||||
// Fast axpby_norm
 | 
			
		||||
// z = a x + b y
 | 
			
		||||
// return norm z
 | 
			
		||||
/////////////////////////
 | 
			
		||||
template<class sobj,class vobj> strong_inline RealD 
 | 
			
		||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y) 
 | 
			
		||||
{
 | 
			
		||||
  sobj one(1.0);
 | 
			
		||||
  return axpby_norm_fast(z,a,one,x,y);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class sobj,class vobj> strong_inline RealD 
 | 
			
		||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y) 
 | 
			
		||||
{
 | 
			
		||||
  const int pad = 8;
 | 
			
		||||
  z.checkerboard = x.checkerboard;
 | 
			
		||||
  conformable(z,x);
 | 
			
		||||
  conformable(x,y);
 | 
			
		||||
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
  typedef typename vobj::vector_typeD vector_type;
 | 
			
		||||
  RealD  nrm;
 | 
			
		||||
  
 | 
			
		||||
  GridBase *grid = x._grid;
 | 
			
		||||
  
 | 
			
		||||
  Vector<RealD> sumarray(grid->SumArraySize()*pad);
 | 
			
		||||
  
 | 
			
		||||
  parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
			
		||||
    int nwork, mywork, myoff;
 | 
			
		||||
    GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
 | 
			
		||||
    
 | 
			
		||||
    // private to thread; sub summation
 | 
			
		||||
    decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero; 
 | 
			
		||||
    for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
			
		||||
      vobj tmp = a*x._odata[ss]+b*y._odata[ss];
 | 
			
		||||
      vnrm = vnrm + innerProductD(tmp,tmp);
 | 
			
		||||
      vstream(z._odata[ss],tmp);
 | 
			
		||||
    }
 | 
			
		||||
    vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  nrm = 0.0; // sum across threads; linear in thread count but fast
 | 
			
		||||
  for(int i=0;i<grid->SumArraySize();i++){
 | 
			
		||||
    nrm = nrm+sumarray[i*pad];
 | 
			
		||||
  } 
 | 
			
		||||
  z._grid->GlobalSum(nrm);
 | 
			
		||||
  return nrm; 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
template<class Op,class T1>
 | 
			
		||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
 | 
			
		||||
@@ -221,6 +274,115 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
 | 
			
		||||
{
 | 
			
		||||
  // std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
 | 
			
		||||
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
  std::vector<scalar_type> lsSum;
 | 
			
		||||
  localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
 | 
			
		||||
  globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
 | 
			
		||||
  // std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class vobj>
 | 
			
		||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
 | 
			
		||||
{
 | 
			
		||||
  // std::cout << GridLogMessage << "Start prep" << std::endl;
 | 
			
		||||
  typedef typename vobj::vector_type   vector_type;
 | 
			
		||||
  typedef typename vobj::scalar_type   scalar_type;
 | 
			
		||||
  GridBase  *grid = lhs._grid;
 | 
			
		||||
  assert(grid!=NULL);
 | 
			
		||||
  conformable(grid,rhs._grid);
 | 
			
		||||
 | 
			
		||||
  const int    Nd = grid->_ndimension;
 | 
			
		||||
  const int Nsimd = grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
  assert(orthogdim >= 0);
 | 
			
		||||
  assert(orthogdim < Nd);
 | 
			
		||||
 | 
			
		||||
  int fd=grid->_fdimensions[orthogdim];
 | 
			
		||||
  int ld=grid->_ldimensions[orthogdim];
 | 
			
		||||
  int rd=grid->_rdimensions[orthogdim];
 | 
			
		||||
  // std::cout << GridLogMessage << "Start alloc" << std::endl;
 | 
			
		||||
 | 
			
		||||
  std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
 | 
			
		||||
  lsSum.resize(ld,scalar_type(0.0));                    // sum across these down to scalars
 | 
			
		||||
  std::vector<iScalar<scalar_type>> extracted(Nsimd);   // splitting the SIMD  
 | 
			
		||||
  // std::cout << GridLogMessage << "End alloc" << std::endl;
 | 
			
		||||
 | 
			
		||||
  result.resize(fd); // And then global sum to return the same vector to every node for IO to file
 | 
			
		||||
  for(int r=0;r<rd;r++){
 | 
			
		||||
    lvSum[r]=zero;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int e1=    grid->_slice_nblock[orthogdim];
 | 
			
		||||
  int e2=    grid->_slice_block [orthogdim];
 | 
			
		||||
  int stride=grid->_slice_stride[orthogdim];
 | 
			
		||||
  // std::cout << GridLogMessage << "End prep" << std::endl;
 | 
			
		||||
  // std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
 | 
			
		||||
  vector_type vv;
 | 
			
		||||
  parallel_for(int r=0;r<rd;r++)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    int so=r*grid->_ostride[orthogdim]; // base offset for start of plane 
 | 
			
		||||
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
        int ss = so + n * stride + b;
 | 
			
		||||
        vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
 | 
			
		||||
        lvSum[r] = lvSum[r] + vv;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // std::cout << GridLogMessage << "End parallel inner product" << std::endl;
 | 
			
		||||
 | 
			
		||||
  // Sum across simd lanes in the plane, breaking out orthog dir.
 | 
			
		||||
  std::vector<int> icoor(Nd);
 | 
			
		||||
  for(int rt=0;rt<rd;rt++){
 | 
			
		||||
 | 
			
		||||
    iScalar<vector_type> temp; 
 | 
			
		||||
    temp._internal = lvSum[rt];
 | 
			
		||||
    extract(temp,extracted);
 | 
			
		||||
 | 
			
		||||
    for(int idx=0;idx<Nsimd;idx++){
 | 
			
		||||
 | 
			
		||||
      grid->iCoorFromIindex(icoor,idx);
 | 
			
		||||
 | 
			
		||||
      int ldx =rt+icoor[orthogdim]*rd;
 | 
			
		||||
 | 
			
		||||
      lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
 | 
			
		||||
}
 | 
			
		||||
template <class vobj>
 | 
			
		||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
  GridBase *grid = lhs._grid;
 | 
			
		||||
  int fd = result.size();
 | 
			
		||||
  int ld = lsSum.size();
 | 
			
		||||
  // sum over nodes.
 | 
			
		||||
  std::vector<scalar_type> gsum;
 | 
			
		||||
  gsum.resize(fd, scalar_type(0.0));
 | 
			
		||||
  // std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
 | 
			
		||||
  for(int t=0;t<fd;t++){
 | 
			
		||||
    int pt = t/ld; // processor plane
 | 
			
		||||
    int lt = t%ld;
 | 
			
		||||
    if ( pt == grid->_processor_coor[orthogdim] ) {
 | 
			
		||||
      gsum[t]=lsSum[lt];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
 | 
			
		||||
  // std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
 | 
			
		||||
  grid->GlobalSumVector(&gsum[0], fd);
 | 
			
		||||
  // std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
 | 
			
		||||
 | 
			
		||||
  result = gsum;
 | 
			
		||||
}
 | 
			
		||||
template<class vobj>
 | 
			
		||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim) 
 | 
			
		||||
{
 | 
			
		||||
@@ -158,10 +158,19 @@ namespace Grid {
 | 
			
		||||
      // tens of seconds per trajectory so this is clean in all reasonable cases,
 | 
			
		||||
      // and margin of safety is orders of magnitude.
 | 
			
		||||
      // We could hack Sitmo to skip in the higher order words of state if necessary
 | 
			
		||||
      //
 | 
			
		||||
      // Replace with 2^30 ; avoid problem on large volumes
 | 
			
		||||
      //
 | 
			
		||||
      /////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
      //      uint64_t skip = site+1;  //   Old init Skipped then drew.  Checked compat with faster init
 | 
			
		||||
      const int shift = 30;
 | 
			
		||||
 | 
			
		||||
      uint64_t skip = site;
 | 
			
		||||
      skip = skip<<40;
 | 
			
		||||
 | 
			
		||||
      skip = skip<<shift;
 | 
			
		||||
 | 
			
		||||
      assert((skip >> shift)==site); // check for overflow
 | 
			
		||||
 | 
			
		||||
      eng.discard(skip);
 | 
			
		||||
      //      std::cout << " Engine  " <<site << " state " <<eng<<std::endl;
 | 
			
		||||
    } 
 | 
			
		||||
@@ -242,7 +251,7 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int idx=0;idx<words;idx++){
 | 
			
		||||
  fillScalar(buf[idx],dist[0],_generators[0]);
 | 
			
		||||
	fillScalar(buf[idx],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
@@ -274,7 +283,7 @@ namespace Grid {
 | 
			
		||||
      RealF *pointer=(RealF *)&l;
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int i=0;i<2*vComplexF::Nsimd();i++){
 | 
			
		||||
  fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
	fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
@@ -282,7 +291,7 @@ namespace Grid {
 | 
			
		||||
      RealD *pointer=(RealD *)&l;
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int i=0;i<2*vComplexD::Nsimd();i++){
 | 
			
		||||
  fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
	fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
@@ -290,7 +299,7 @@ namespace Grid {
 | 
			
		||||
      RealF *pointer=(RealF *)&l;
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int i=0;i<vRealF::Nsimd();i++){
 | 
			
		||||
  fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
	fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
@@ -308,6 +317,19 @@ namespace Grid {
 | 
			
		||||
      std::seed_seq src(seeds.begin(),seeds.end());
 | 
			
		||||
      Seed(src,0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void SeedUniqueString(const std::string &s){
 | 
			
		||||
      std::vector<int> seeds;
 | 
			
		||||
      std::stringstream sha;
 | 
			
		||||
      seeds = GridChecksum::sha256_seeds(s);
 | 
			
		||||
      for(int i=0;i<seeds.size();i++) { 
 | 
			
		||||
        sha << std::hex << seeds[i];
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << GridLogMessage << "Intialising serial RNG with unique string '" 
 | 
			
		||||
                << s << "'" << std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
 | 
			
		||||
      SeedFixedIntegers(seeds);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class GridParallelRNG : public GridRNGbase {
 | 
			
		||||
@@ -368,6 +390,14 @@ namespace Grid {
 | 
			
		||||
      _time_counter += usecond()- inner_time_counter;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    void SeedUniqueString(const std::string &s){
 | 
			
		||||
      std::vector<int> seeds;
 | 
			
		||||
      seeds = GridChecksum::sha256_seeds(s);
 | 
			
		||||
      std::cout << GridLogMessage << "Intialising parallel RNG with unique string '" 
 | 
			
		||||
                << s << "'" << std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
 | 
			
		||||
      SeedFixedIntegers(seeds);
 | 
			
		||||
    }
 | 
			
		||||
    void SeedFixedIntegers(const std::vector<int> &seeds){
 | 
			
		||||
 | 
			
		||||
      // Everyone generates the same seed_seq based on input seeds
 | 
			
		||||
@@ -464,8 +464,10 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    assert(lg->_processors[d]  == hg->_processors[d]);
 | 
			
		||||
    assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
 | 
			
		||||
    if ( d!=orthog ) {
 | 
			
		||||
      assert(lg->_processors[d]  == hg->_processors[d]);
 | 
			
		||||
      assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
@@ -499,8 +501,10 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    assert(lg->_processors[d]  == hg->_processors[d]);
 | 
			
		||||
    assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
 | 
			
		||||
    if ( d!=orthog ) {
 | 
			
		||||
      assert(lg->_processors[d]  == hg->_processors[d]);
 | 
			
		||||
      assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
@@ -86,7 +86,7 @@ protected:
 | 
			
		||||
  Colours &Painter;
 | 
			
		||||
  int active;
 | 
			
		||||
  int timing_mode;
 | 
			
		||||
  int topWidth{-1};
 | 
			
		||||
  int topWidth{-1}, chanWidth{-1};
 | 
			
		||||
  static int timestamp;
 | 
			
		||||
  std::string name, topName;
 | 
			
		||||
  std::string COLOUR;
 | 
			
		||||
@@ -126,6 +126,7 @@ public:
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  void setTopWidth(const int w) {topWidth = w;}
 | 
			
		||||
  void setChanWidth(const int w) {chanWidth = w;}
 | 
			
		||||
 | 
			
		||||
  friend std::ostream& operator<< (std::ostream& stream, Logger& log){
 | 
			
		||||
 | 
			
		||||
@@ -136,13 +137,20 @@ public:
 | 
			
		||||
        stream << std::setw(log.topWidth);
 | 
			
		||||
      }
 | 
			
		||||
      stream << log.topName << log.background()<< " : ";
 | 
			
		||||
      stream << log.colour() <<  std::left << log.name << log.background() << " : ";
 | 
			
		||||
      stream << log.colour() <<  std::left;
 | 
			
		||||
      if (log.chanWidth > 0)
 | 
			
		||||
      {
 | 
			
		||||
        stream << std::setw(log.chanWidth);
 | 
			
		||||
      }
 | 
			
		||||
      stream << log.name << log.background() << " : ";
 | 
			
		||||
      if ( log.timestamp ) {
 | 
			
		||||
	log.StopWatch->Stop();
 | 
			
		||||
	GridTime now = log.StopWatch->Elapsed();
 | 
			
		||||
	
 | 
			
		||||
	if ( log.timing_mode==1 ) log.StopWatch->Reset();
 | 
			
		||||
	log.StopWatch->Start();
 | 
			
		||||
	stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ;
 | 
			
		||||
	stream << log.evidence()
 | 
			
		||||
	       << now	       << log.background() << " : " ;
 | 
			
		||||
      }
 | 
			
		||||
      stream << log.colour();
 | 
			
		||||
      return stream;
 | 
			
		||||
@@ -358,17 +358,10 @@ PARALLEL_CRITICAL
 | 
			
		||||
 | 
			
		||||
      if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
 | 
			
		||||
#ifdef USE_MPI_IO
 | 
			
		||||
	std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< " and offset " << offset << std::endl;
 | 
			
		||||
	std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl;
 | 
			
		||||
	ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);    assert(ierr==0);
 | 
			
		||||
	ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);    assert(ierr==0);
 | 
			
		||||
	ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status);    assert(ierr==0);
 | 
			
		||||
 | 
			
		||||
	MPI_Offset os;
 | 
			
		||||
	MPI_File_get_position(fh, &os);
 | 
			
		||||
	MPI_File_get_byte_offset(fh, os, &disp);
 | 
			
		||||
	offset = disp;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	MPI_File_close(&fh);
 | 
			
		||||
	MPI_Type_free(&fileArray);
 | 
			
		||||
	MPI_Type_free(&localArray);
 | 
			
		||||
@@ -377,22 +370,20 @@ PARALLEL_CRITICAL
 | 
			
		||||
#endif
 | 
			
		||||
      } else {
 | 
			
		||||
	std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
 | 
			
		||||
                  << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
 | 
			
		||||
                  << iodata.size() * sizeof(fobj) << " bytes" << std::endl;
 | 
			
		||||
        std::ifstream fin;
 | 
			
		||||
	fin.open(file, std::ios::binary | std::ios::in);
 | 
			
		||||
        if (control & BINARYIO_MASTER_APPEND)
 | 
			
		||||
	  {
 | 
			
		||||
	    fin.seekg(-sizeof(fobj), fin.end);
 | 
			
		||||
	  }
 | 
			
		||||
        {
 | 
			
		||||
          fin.seekg(-sizeof(fobj), fin.end);
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
	  {
 | 
			
		||||
	    fin.seekg(offset + myrank * lsites * sizeof(fobj));
 | 
			
		||||
	  }
 | 
			
		||||
        {
 | 
			
		||||
          fin.seekg(offset + myrank * lsites * sizeof(fobj));
 | 
			
		||||
        }
 | 
			
		||||
        fin.read((char *)&iodata[0], iodata.size() * sizeof(fobj));
 | 
			
		||||
        
 | 
			
		||||
        assert(fin.fail() == 0);
 | 
			
		||||
        offset = fin.tellg();
 | 
			
		||||
	fin.close();
 | 
			
		||||
        fin.close();
 | 
			
		||||
      }
 | 
			
		||||
      timer.Stop();
 | 
			
		||||
 | 
			
		||||
@@ -647,11 +638,6 @@ PARALLEL_CRITICAL
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file nersc_checksum   " << std::hex << nersc_csum << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    timer.Start();
 | 
			
		||||
    parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
 | 
			
		||||
      std::vector<RngStateType> tmp(RngStateCount);
 | 
			
		||||
@@ -670,11 +656,6 @@ PARALLEL_CRITICAL
 | 
			
		||||
      serial.SetState(tmp,0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksum t " << std::hex << nersc_csum_tmp    << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksuma t " << std::hex << scidac_csuma_tmp << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksumb t " << std::hex << scidac_csumb_tmp << std::dec << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    nersc_csum   = nersc_csum   + nersc_csum_tmp;
 | 
			
		||||
    scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
 | 
			
		||||
    scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
 | 
			
		||||
@@ -725,11 +706,6 @@ PARALLEL_CRITICAL
 | 
			
		||||
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum    << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
 | 
			
		||||
   
 | 
			
		||||
    iodata.resize(1);
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<RngStateType> tmp(RngStateCount);
 | 
			
		||||
@@ -739,11 +715,6 @@ PARALLEL_CRITICAL
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND,
 | 
			
		||||
	     nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksum t " << std::hex << nersc_csum_tmp    << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksuma t " << std::hex << scidac_csuma_tmp << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksumb t " << std::hex << scidac_csumb_tmp << std::dec << std::endl;
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
    nersc_csum   = nersc_csum   + nersc_csum_tmp;
 | 
			
		||||
    scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
 | 
			
		||||
    scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
 | 
			
		||||
@@ -182,6 +182,11 @@ class GridLimeReader : public BinaryIO {
 | 
			
		||||
   {
 | 
			
		||||
     filename= _filename;
 | 
			
		||||
     File = fopen(filename.c_str(), "r");
 | 
			
		||||
     if (File == nullptr)
 | 
			
		||||
     {
 | 
			
		||||
       std::cerr << "cannot open file '" << filename << "'" << std::endl;
 | 
			
		||||
       abort();
 | 
			
		||||
     }
 | 
			
		||||
     LimeR = limeCreateReader(File);
 | 
			
		||||
   }
 | 
			
		||||
   /////////////////////////////////////////////
 | 
			
		||||
@@ -228,7 +233,8 @@ class GridLimeReader : public BinaryIO {
 | 
			
		||||
	//	std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
 | 
			
		||||
	BinarySimpleMunger<sobj,sobj> munge;
 | 
			
		||||
	BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
 | 
			
		||||
	/////////////////////////////////////////////
 | 
			
		||||
	// Insist checksum is next record
 | 
			
		||||
	/////////////////////////////////////////////
 | 
			
		||||
@@ -238,57 +244,14 @@ class GridLimeReader : public BinaryIO {
 | 
			
		||||
	// Verify checksums
 | 
			
		||||
	/////////////////////////////////////////////
 | 
			
		||||
	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
 | 
			
		||||
  std::cout << GridLogMessage<< " readLimeLatticeBinaryObject checksums match ! " <<std::endl;
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Read an RNG object and verify checksum
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  void readLimeRNGObject(GridSerialRNG &sRNG, GridParallelRNG &pRNG,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    scidacChecksum scidacChecksum_;
 | 
			
		||||
    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
 | 
			
		||||
    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
 | 
			
		||||
      uint64_t file_bytes =limeReaderBytes(LimeR);
 | 
			
		||||
 | 
			
		||||
      //      std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
 | 
			
		||||
      //      std::cout << GridLogMessage<< " readLimeObject seeking "<<  record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) {
 | 
			
		||||
 | 
			
		||||
        const int RngStateCount = GridSerialRNG::RngStateCount;
 | 
			
		||||
        typedef std::array<typename GridSerialRNG::RngStateType,RngStateCount> RNGstate;
 | 
			
		||||
 | 
			
		||||
	      uint64_t PayloadSize = sizeof(RNGstate) * (pRNG._grid->_gsites+1);
 | 
			
		||||
 | 
			
		||||
    	  assert(PayloadSize == file_bytes);// Must match or user error
 | 
			
		||||
        uint64_t offset= ftello(File);
 | 
			
		||||
	    	std::cout << GridLogDebug << " ReadLatticeObject from offset "<<offset << std::endl;
 | 
			
		||||
	      BinaryIO::readRNG(sRNG, pRNG, filename, offset, nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
	      /////////////////////////////////////////////
 | 
			
		||||
	      // Insist checksum is next record
 | 
			
		||||
	      /////////////////////////////////////////////
 | 
			
		||||
	      readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
 | 
			
		||||
 | 
			
		||||
	      /////////////////////////////////////////////
 | 
			
		||||
	      // Verify checksums
 | 
			
		||||
	      /////////////////////////////////////////////
 | 
			
		||||
	      assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
 | 
			
		||||
        std::cout << GridLogMessage<< " readLimeRNGObject checksums match ! " <<std::endl;
 | 
			
		||||
	      return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Read a generic serialisable object
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  template<class serialisable_object>
 | 
			
		||||
  void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
 | 
			
		||||
  void readLimeObject(std::string &xmlstring,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    // should this be a do while; can we miss a first record??
 | 
			
		||||
    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
 | 
			
		||||
@@ -303,15 +266,23 @@ class GridLimeReader : public BinaryIO {
 | 
			
		||||
	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);    
 | 
			
		||||
	//	std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::string xmlstring(&xmlc[0]);
 | 
			
		||||
	XmlReader RD(xmlstring, true, "");
 | 
			
		||||
	read(RD,object_name,object);
 | 
			
		||||
   xmlstring = std::string(&xmlc[0]);
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }  
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class serialisable_object>
 | 
			
		||||
  void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    std::string xmlstring;
 | 
			
		||||
 | 
			
		||||
    readLimeObject(xmlstring, record_name);
 | 
			
		||||
	  XmlReader RD(xmlstring, true, "");
 | 
			
		||||
	  read(RD,object_name,object);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class GridLimeWriter : public BinaryIO 
 | 
			
		||||
@@ -362,16 +333,11 @@ class GridLimeWriter : public BinaryIO
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Write a generic serialisable object
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  template<class serialisable_object>
 | 
			
		||||
  void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
 | 
			
		||||
  void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      std::string xmlstring;
 | 
			
		||||
      {
 | 
			
		||||
	XmlWriter WR("","");
 | 
			
		||||
	write(WR,object_name,object);
 | 
			
		||||
	xmlstring = WR.XmlString();
 | 
			
		||||
      }
 | 
			
		||||
      std::string xmlstring = writer.docString();
 | 
			
		||||
 | 
			
		||||
      //    std::cout << "WriteLimeObject" << record_name <<std::endl;
 | 
			
		||||
      uint64_t nbytes = xmlstring.size();
 | 
			
		||||
      //    std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
 | 
			
		||||
@@ -385,6 +351,20 @@ class GridLimeWriter : public BinaryIO
 | 
			
		||||
      limeDestroyHeader(h);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class serialisable_object>
 | 
			
		||||
  void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
 | 
			
		||||
  {
 | 
			
		||||
    XmlWriter WR("","");
 | 
			
		||||
 | 
			
		||||
    if (scientificPrec)
 | 
			
		||||
    {
 | 
			
		||||
      WR.scientificFormat(true);
 | 
			
		||||
      WR.setPrecision(scientificPrec);
 | 
			
		||||
    }
 | 
			
		||||
    write(WR,object_name,object);
 | 
			
		||||
    writeLimeObject(MB, ME, WR, object_name, record_name);
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  // Write a generic lattice field and csum
 | 
			
		||||
  // This routine is Collectively called by all nodes
 | 
			
		||||
@@ -471,78 +451,6 @@ class GridLimeWriter : public BinaryIO
 | 
			
		||||
      writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  // Write an rng object and csum
 | 
			
		||||
  // This routine is Collectively called by all nodes
 | 
			
		||||
  // in communicator used by the field._grid
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  void writeLimeRNGObject(GridSerialRNG &sRNG, GridParallelRNG &pRNG, std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *grid = pRNG._grid;
 | 
			
		||||
    assert(boss_node == pRNG._grid->IsBoss() );
 | 
			
		||||
 | 
			
		||||
    const int RngStateCount = GridSerialRNG::RngStateCount;
 | 
			
		||||
    typedef std::array<typename GridSerialRNG::RngStateType,RngStateCount> RNGstate;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////
 | 
			
		||||
    // Create record header
 | 
			
		||||
    ////////////////////////////////////////////
 | 
			
		||||
    int err;
 | 
			
		||||
    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
    uint64_t PayloadSize = sizeof(RNGstate) * (grid->_gsites+1);
 | 
			
		||||
    std::cout << GridLogDebug << "Computed payload size " << PayloadSize << std::endl;
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      createLimeRecordHeader(record_name, 0, 0, PayloadSize);
 | 
			
		||||
      fflush(File);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    ////////////////////////////////////////////////
 | 
			
		||||
    // Check all nodes agree on file position
 | 
			
		||||
    ////////////////////////////////////////////////
 | 
			
		||||
    uint64_t offset1;
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      offset1 = ftello(File);    
 | 
			
		||||
    }
 | 
			
		||||
    grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    // The above is collective. Write by other means into the binary record
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    uint64_t offset = offset1;
 | 
			
		||||
    BinaryIO::writeRNG(sRNG, pRNG,filename, offset, nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    // Wind forward and close the record
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      fseek(File,0,SEEK_END);             
 | 
			
		||||
      uint64_t offset2 = ftello(File);    
 | 
			
		||||
      std::cout << GridLogDebug << " now at offset "<<offset2 << std::endl;
 | 
			
		||||
      assert( (offset2-offset1) == PayloadSize);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    // Check MPI-2 I/O did what we expect to file
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    if ( boss_node ) { 
 | 
			
		||||
      err=limeWriterCloseRecord(LimeW);  assert(err>=0);
 | 
			
		||||
    }
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    // Write checksum element, propagaing forward from the BinaryIO
 | 
			
		||||
    // Always pair a checksum with a binary object, and close message
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    scidacChecksum checksum;
 | 
			
		||||
    std::stringstream streama; streama << std::hex << scidac_csuma;
 | 
			
		||||
    std::stringstream streamb; streamb << std::hex << scidac_csumb;
 | 
			
		||||
    checksum.suma= streama.str();
 | 
			
		||||
    checksum.sumb= streamb.str();
 | 
			
		||||
    if ( boss_node ) { 
 | 
			
		||||
      writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ScidacWriter : public GridLimeWriter {
 | 
			
		||||
@@ -559,32 +467,12 @@ class ScidacWriter : public GridLimeWriter {
 | 
			
		||||
      writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void writeScidacRNGRecord(GridSerialRNG &sRNG, GridParallelRNG &pRNG) 
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *grid = pRNG._grid;
 | 
			
		||||
    FieldMetaData header;
 | 
			
		||||
 | 
			
		||||
    header.floating_point = "IEEE64BIG";
 | 
			
		||||
    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
 | 
			
		||||
    GridMetaData(grid,header); 
 | 
			
		||||
    MachineCharacteristics(header);
 | 
			
		||||
    
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    // Fill the Lime file record by record
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    if ( this->boss_node ) {
 | 
			
		||||
      writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message 
 | 
			
		||||
    }
 | 
			
		||||
    // Collective call
 | 
			
		||||
    writeLimeRNGObject(sRNG, pRNG,std::string(ILDG_BINARY_DATA));      // Closes message with checksum
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Write generic lattice field in scidac format
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  template <class vobj, class userRecord>
 | 
			
		||||
  void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord) 
 | 
			
		||||
  void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
 | 
			
		||||
                              const unsigned int recordScientificPrec = 0) 
 | 
			
		||||
  {
 | 
			
		||||
    GridBase * grid = field._grid;
 | 
			
		||||
 | 
			
		||||
@@ -602,13 +490,12 @@ class ScidacWriter : public GridLimeWriter {
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    if ( this->boss_node ) {
 | 
			
		||||
      writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message 
 | 
			
		||||
      writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
 | 
			
		||||
      writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
 | 
			
		||||
      writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
 | 
			
		||||
    }
 | 
			
		||||
    // Collective call
 | 
			
		||||
    writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));      // Closes message with checksum
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -622,27 +509,8 @@ class ScidacReader : public GridLimeReader {
 | 
			
		||||
     readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
 | 
			
		||||
     readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Read RNGobject in scidac format
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  void readScidacRNGRecord(GridSerialRNG &sRNG, GridParallelRNG &pRNG) 
 | 
			
		||||
  {
 | 
			
		||||
    GridBase * grid = pRNG._grid;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    // fill the Grid header
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    FieldMetaData header;
 | 
			
		||||
 
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    // Fill the Lime file record by record
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message 
 | 
			
		||||
    readLimeRNGObject(sRNG, pRNG, std::string(ILDG_BINARY_DATA));
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Read generic lattice field in scidac format
 | 
			
		||||
  // Write generic lattice field in scidac format
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  template <class vobj, class userRecord>
 | 
			
		||||
  void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord) 
 | 
			
		||||
@@ -49,15 +49,35 @@ inline double usecond(void) {
 | 
			
		||||
 | 
			
		||||
typedef  std::chrono::system_clock          GridClock;
 | 
			
		||||
typedef  std::chrono::time_point<GridClock> GridTimePoint;
 | 
			
		||||
typedef  std::chrono::milliseconds          GridTime;
 | 
			
		||||
typedef  std::chrono::microseconds          GridUsecs;
 | 
			
		||||
 | 
			
		||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
 | 
			
		||||
typedef  std::chrono::seconds               GridSecs;
 | 
			
		||||
typedef  std::chrono::milliseconds          GridMillisecs;
 | 
			
		||||
typedef  std::chrono::microseconds          GridUsecs;
 | 
			
		||||
typedef  std::chrono::microseconds          GridTime;
 | 
			
		||||
 | 
			
		||||
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
 | 
			
		||||
{
 | 
			
		||||
  stream << time.count()<<" ms";
 | 
			
		||||
  stream << time.count()<<" s";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
 | 
			
		||||
{
 | 
			
		||||
  GridSecs second(1);
 | 
			
		||||
  auto     secs       = now/second ; 
 | 
			
		||||
  auto     subseconds = now%second ; 
 | 
			
		||||
  stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
 | 
			
		||||
{
 | 
			
		||||
  GridSecs second(1);
 | 
			
		||||
  auto     seconds    = now/second ; 
 | 
			
		||||
  auto     subseconds = now%second ; 
 | 
			
		||||
  stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GridStopWatch {
 | 
			
		||||
private:
 | 
			
		||||
  bool running;
 | 
			
		||||
@@ -96,6 +116,9 @@ public:
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
    return (uint64_t) accumulator.count();
 | 
			
		||||
  }
 | 
			
		||||
  bool isRunning(void){
 | 
			
		||||
    return running;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -90,17 +90,20 @@ namespace QCD {
 | 
			
		||||
    // That probably makes for GridRedBlack4dCartesian grid.
 | 
			
		||||
 | 
			
		||||
    // s,sp,c,spc,lc
 | 
			
		||||
    template<typename vtype> using iSinglet                   = iScalar<iScalar<iScalar<vtype> > >;
 | 
			
		||||
    template<typename vtype> using iSpinMatrix                = iScalar<iMatrix<iScalar<vtype>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
 | 
			
		||||
    template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
 | 
			
		||||
    template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
 | 
			
		||||
    template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >;
 | 
			
		||||
    template<typename vtype> using iSpinColourVector          = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iHalfSpinVector            = iScalar<iVector<iScalar<vtype>, Nhs> >;
 | 
			
		||||
    template<typename vtype> using iHalfSpinColourVector      = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
 | 
			
		||||
 | 
			
		||||
    template<typename vtype> using iSinglet                     = iScalar<iScalar<iScalar<vtype> > >;
 | 
			
		||||
    template<typename vtype> using iSpinMatrix                  = iScalar<iMatrix<iScalar<vtype>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iColourMatrix                = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
 | 
			
		||||
    template<typename vtype> using iSpinColourMatrix            = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iLorentzColourMatrix         = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
 | 
			
		||||
    template<typename vtype> using iDoubleStoredColourMatrix    = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
 | 
			
		||||
    template<typename vtype> using iSpinVector                  = iScalar<iVector<iScalar<vtype>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iColourVector                = iScalar<iScalar<iVector<vtype, Nc> > >;
 | 
			
		||||
    template<typename vtype> using iSpinColourVector            = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
 | 
			
		||||
    template<typename vtype> using iHalfSpinVector              = iScalar<iVector<iScalar<vtype>, Nhs> >;
 | 
			
		||||
    template<typename vtype> using iHalfSpinColourVector        = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
 | 
			
		||||
    template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
 | 
			
		||||
    template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
 | 
			
		||||
@@ -127,10 +130,28 @@ namespace QCD {
 | 
			
		||||
    typedef iSpinColourMatrix<Complex  >    SpinColourMatrix;
 | 
			
		||||
    typedef iSpinColourMatrix<ComplexF >    SpinColourMatrixF;
 | 
			
		||||
    typedef iSpinColourMatrix<ComplexD >    SpinColourMatrixD;
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    typedef iSpinColourMatrix<vComplex >    vSpinColourMatrix;
 | 
			
		||||
    typedef iSpinColourMatrix<vComplexF>    vSpinColourMatrixF;
 | 
			
		||||
    typedef iSpinColourMatrix<vComplexD>    vSpinColourMatrixD;
 | 
			
		||||
    
 | 
			
		||||
    // SpinColourSpinColour matrix
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<Complex  >    SpinColourSpinColourMatrix;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<ComplexF >    SpinColourSpinColourMatrixF;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<ComplexD >    SpinColourSpinColourMatrixD;
 | 
			
		||||
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<vComplex >    vSpinColourSpinColourMatrix;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD;
 | 
			
		||||
 | 
			
		||||
    // SpinColourSpinColour matrix
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<Complex  >    SpinColourSpinColourMatrix;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<ComplexF >    SpinColourSpinColourMatrixF;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<ComplexD >    SpinColourSpinColourMatrixD;
 | 
			
		||||
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<vComplex >    vSpinColourSpinColourMatrix;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF;
 | 
			
		||||
    typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD;
 | 
			
		||||
 | 
			
		||||
    // LorentzColour
 | 
			
		||||
    typedef iLorentzColourMatrix<Complex  > LorentzColourMatrix;
 | 
			
		||||
@@ -229,6 +250,9 @@ namespace QCD {
 | 
			
		||||
    typedef Lattice<vSpinColourMatrixF>     LatticeSpinColourMatrixF;
 | 
			
		||||
    typedef Lattice<vSpinColourMatrixD>     LatticeSpinColourMatrixD;
 | 
			
		||||
 | 
			
		||||
    typedef Lattice<vSpinColourSpinColourMatrix>      LatticeSpinColourSpinColourMatrix;
 | 
			
		||||
    typedef Lattice<vSpinColourSpinColourMatrixF>     LatticeSpinColourSpinColourMatrixF;
 | 
			
		||||
    typedef Lattice<vSpinColourSpinColourMatrixD>     LatticeSpinColourSpinColourMatrixD;
 | 
			
		||||
 | 
			
		||||
    typedef Lattice<vLorentzColourMatrix>  LatticeLorentzColourMatrix;
 | 
			
		||||
    typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user