Peter Boyle
							
						 
					 | 
					
						
						
							
						
						82fc4b1e94
					 | 
					
						
						
							
							Finalise
						
						
						
						
						
						
					 | 
					
						2023-11-23 18:19:41 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						b4f1740380
					 | 
					
						
						
							
							Finalise message
						
						
						
						
						
						
					 | 
					
						2023-11-23 18:19:16 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						031f85247c
					 | 
					
						
						
							
							multRHS initial support -- needs optimisation for multi project/promote.
						
						
						
						
						
						
						
						Bug fix in freeing intermediate grids to stop double free 
						
						
					 | 
					
						2023-11-23 18:18:35 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						639cc6f73a
					 | 
					
						
						
							
							better support for multiRHS coarse space
						
						
						
						
						
						
						
						Still to add restriction of domain of last loop to interior of padded cell (expect about 4.5x on test volume on Crusher) 
						
						
					 | 
					
						2023-11-23 18:16:26 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						09946cf1ba
					 | 
					
						
						
							
							Improved, works on 48^3 moving to multiRHS optimisations
						
						
						
						
						
						
					 | 
					
						2023-11-15 18:03:05 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						f4fa95e7cb
					 | 
					
						
						
							
							Use 5.3.0
						
						
						
						
						
						
					 | 
					
						2023-11-15 18:01:38 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						100e29e35e
					 | 
					
						
						
							
							Allow expression as argument to norm2
						
						
						
						
						
						
					 | 
					
						2023-11-15 18:00:44 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						4cbe471a83
					 | 
					
						
						
							
							devVector
						
						
						
						
						
						
					 | 
					
						2023-11-15 18:00:07 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						8bece1f861
					 | 
					
						
						
							
							Faster to transpose the matrix and apply with column major order
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:58:38 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						a3ca71ec01
					 | 
					
						
						
							
							Lots more setup options, still working on them
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:58:04 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						e0543e8af5
					 | 
					
						
						
							
							Implement flexible preconditioned CG
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:57:39 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						c1eb80d01a
					 | 
					
						
						
							
							Print which have converged
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:57:08 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						a26121d97b
					 | 
					
						
						
							
							Better printing
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:56:45 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						043031a757
					 | 
					
						
						
							
							Report resid on failed convergence
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:56:22 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						807aeebe4c
					 | 
					
						
						
							
							Resize tol in constructor
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:55:57 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						8aa1a37aad
					 | 
					
						
						
							
							For Mirs preconditioner solver
						
						
						
						
						
						
					 | 
					
						2023-11-15 17:55:32 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						7d077fe493
					 | 
					
						
						
							
							Frontier compiel
						
						
						
						
						
						
					 | 
					
						2023-11-09 13:58:44 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								david clarke
							
						 
					 | 
					
						
						
							
						
						9cd4128833
					 | 
					
						
						
							
							fix naik bug
						
						
						
						
						
						
					 | 
					
						2023-11-03 14:11:38 -06:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								david clarke
							
						 
					 | 
					
						
						
							
						
						c8b17c9526
					 | 
					
						
						
							
							Naik to CShift
						
						
						
						
						
						
					 | 
					
						2023-11-02 12:43:22 -06:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								david clarke
							
						 
					 | 
					
						
						
							
						
						2ae2a81e85
					 | 
					
						
						
							
							attempt to fix Naik
						
						
						
						
						
						
					 | 
					
						2023-10-31 13:54:55 -06:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								david clarke
							
						 
					 | 
					
						
						
							
						
						69c869d345
					 | 
					
						
						
							
							fixed stupid typo
						
						
						
						
						
						
					 | 
					
						2023-10-30 17:41:52 -06:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								david clarke
							
						 
					 | 
					
						
						
							
						
						df9b958c40
					 | 
					
						
						
							
							naik now returns separately
						
						
						
						
						
						
					 | 
					
						2023-10-30 17:40:53 -06:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								david clarke
							
						 
					 | 
					
						
						
							
						
						3d3376d1a3
					 | 
					
						
						
							
							LePage works, trying Naik
						
						
						
						
						
						
					 | 
					
						2023-10-27 16:26:31 -06:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						4efa042f50
					 | 
					
						
						
							
							C++17 change
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:57:50 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						c7cb37e970
					 | 
					
						
						
							
							c++17 accepted
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:57:24 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						d34b207eab
					 | 
					
						
						
							
							Avoid HIP warnings
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:57:04 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						0e6fa6f6b8
					 | 
					
						
						
							
							DOn't need the Cshift for the period optimisation
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:56:31 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						38b87de53f
					 | 
					
						
						
							
							This works around a stacksize limit on AMD GPU
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:56:07 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						aa5047a9e4
					 | 
					
						
						
							
							Faster blockProject blockPromote
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:49:55 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						24b6ee0df9
					 | 
					
						
						
							
							M4 file
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:36:48 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						1e79cc9cbe
					 | 
					
						
						
							
							Avoid compiler error
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:36:09 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						b3925df9c3
					 | 
					
						
						
							
							Verbose on CPU-GPU xfer, remove performance by default
						
						
						
						
						
						
					 | 
					
						2023-10-24 10:25:01 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						f2648e94b9
					 | 
					
						
						
							
							getHostPointer added to Lattice
						
						
						
						
						
						
					 | 
					
						2023-10-23 13:47:41 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						351795ac3a
					 | 
					
						
						
							
							Better messaging
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:33:04 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						9c9c42d0df
					 | 
					
						
						
							
							Tests on frontier with real speed up . 3.5x on 16^3 at mq=0.01
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						b6ad1bafc7
					 | 
					
						
						
							
							Normal memory SendToRecvFrom asynchronous for use in general stencil
						
						
						
						
						
						
						
						code 
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						a5ca40f446
					 | 
					
						
						
							
							Better verbose -- track CPU GPU motion under --log Memory, others go to
						
						
						
						
						
						
						
						debug output stream 
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						9ab54c5565
					 | 
					
						
						
							
							Overlap comms & data copy/buffer assembly in Ghost zone exchange
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						4341d96bde
					 | 
					
						
						
							
							Massively sped up coarse grid mult, comms
						
						
						
						
						
						
						
						Save 3ms spend (60% of time !) on cudaMalloc !! 
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						5fac47a26d
					 | 
					
						
						
							
							Faster halo exchange
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						e064f17346
					 | 
					
						
						
							
							Faster halo exchange
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						afe10ba2a2
					 | 
					
						
						
							
							More digits
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						7cc3435ba8
					 | 
					
						
						
							
							Imporved General coarsened matrix
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						541772313c
					 | 
					
						
						
							
							Verbosity
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						3747494a09
					 | 
					
						
						
							
							Notify delet public
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						f2b98d0dcc
					 | 
					
						
						
							
							Const safety
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						80471bf762
					 | 
					
						
						
							
							Alternate implementation involving face operations
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						a06f63c110
					 | 
					
						
						
							
							Improved I/O and non-lexico option exposed to SciDAC format
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						0ae4478cd9
					 | 
					
						
						
							
							Checkpoint the subspace and ldop
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						ae4e705e09
					 | 
					
						
						
							
							Use random vec as easier for debug
						
						
						
						
						
						
					 | 
					
						2023-10-20 19:27:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 |