Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						9ffd1ed4ce
					 | 
					
						
						
							
							Merged
						
						
						
						
						
						
					 | 
					
						2025-03-08 15:30:08 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						3d014864e2
					 | 
					
						
						
							
							Makinig LLVM happy
						
						
						
						
						
						
					 | 
					
						2025-03-06 14:19:25 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						1d22841811
					 | 
					
						
						
							
							Working on aurora, GPT issue turned up is fixed
						
						
						
						
						
						
					 | 
					
						2025-03-06 03:20:18 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						a1cdda833f
					 | 
					
						
						
							
							Update WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 14:04:23 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						ad6db92690
					 | 
					
						
						
							
							Update WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 14:00:26 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						e8ff9d8e50
					 | 
					
						
						
							
							Update WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 14:00:04 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						795769c636
					 | 
					
						
						
							
							Update WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 13:50:41 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						267a39d943
					 | 
					
						
						
							
							Update WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 13:49:43 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						3624bd3d22
					 | 
					
						
						
							
							Update WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 13:45:09 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						bc12dbbb38
					 | 
					
						
						
							
							Update WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 12:48:56 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						eb8a008a8f
					 | 
					
						
						
							
							Create WorkArounds.txt
						
						
						
						
						
						
					 | 
					
						2025-03-05 12:41:59 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						c4d9aa1a21
					 | 
					
						
						
							
							Config command that makes GPT happier
						
						
						
						
						
						
					 | 
					
						2025-02-27 20:12:49 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						6ae809ed40
					 | 
					
						
						
							
							Print not liked on GPT compile
						
						
						
						
						
						
					 | 
					
						2025-02-27 20:12:49 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						311e2aab3f
					 | 
					
						
						
							
							Update Accelerator.h
						
						
						
						
						
						
					 | 
					
						2025-02-26 11:42:52 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						438dfbdb83
					 | 
					
						
						
							
							Only throw if there is a pending list entry in CommsComplete
						
						
						
						
						
						
					 | 
					
						2025-02-25 16:57:27 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						b2ce760cf4
					 | 
					
						
						
							
							Verbose issue with GPT
						
						
						
						
						
						
					 | 
					
						2025-02-25 16:55:23 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						ba9bbe0221
					 | 
					
						
						
							
							Bounce MPI through host
						
						
						
						
						
						
					 | 
					
						2025-02-12 19:34:59 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						4c3dd82d84
					 | 
					
						
						
							
							CSHIFT with bounce throuhgh Host memory on MPI packets
						
						
						
						
						
						
					 | 
					
						2025-02-12 19:09:53 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						44e911b5b7
					 | 
					
						
						
							
							Comment change
						
						
						
						
						
						
					 | 
					
						2025-02-12 17:37:55 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						a7a16df9d0
					 | 
					
						
						
							
							GET not put has kinder barrier sequence for NVLINK type access as when
						
						
						
						
						
						
						
						GET is done, I can use it without barrier. Moves a barrier to a nicer
place, overlapped with DtoH DMA 
						
						
					 | 
					
						2025-02-12 14:59:28 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						382e0abefd
					 | 
					
						
						
							
							Was issueing a double fence -- the gather also fences
						
						
						
						
						
						
					 | 
					
						2025-02-12 14:57:28 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						6fdefe5b90
					 | 
					
						
						
							
							Barrier sequencing if doing "GET" not "PUT" is different.
						
						
						
						
						
						
						
						This is somewhat better timing for Barriers 
						
						
					 | 
					
						2025-02-12 14:55:20 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						4788dd8e2e
					 | 
					
						
						
							
							More states in packet progression for GPU non aware MPI
						
						
						
						
						
						
					 | 
					
						2025-02-12 14:53:57 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						1cc5f221f3
					 | 
					
						
						
							
							GET not put ordering is better as I know when I've got all MY data
						
						
						
						
						
						
					 | 
					
						2025-02-12 14:53:05 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						93251bfba0
					 | 
					
						
						
							
							GET not put for better ordering in the downstream dependent kernels -- I
						
						
						
						
						
						
						
						know when I'm done, so we can move a barrier / handshake between ranks
intranode to a point off critical path 
						
						
					 | 
					
						2025-02-12 14:50:21 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						18b79508b8
					 | 
					
						
						
							
							New line better for pretty print
						
						
						
						
						
						
					 | 
					
						2025-02-12 14:49:48 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						4de5ed1613
					 | 
					
						
						
							
							Remove vector view. The std::vector will not inform Memory manager of
						
						
						
						
						
						
						
						deletion and so a stale entry could be left. It is not and should not be
used. 
						
						
					 | 
					
						2025-02-12 14:48:46 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						0baaddbe98
					 | 
					
						
						
							
							Pipeline mode commit on Aurora. 5+ TF/s on 16^3x32 per tile at 384
						
						
						
						
						
						
						
						nodes.
More concurrency/fine grained scheduling is possible. 
						
						
					 | 
					
						2025-02-04 19:27:26 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						b50fb34e71
					 | 
					
						
						
							
							Perf on Aurora
						
						
						
						
						
						
					 | 
					
						2025-02-01 18:39:34 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						de84d730ff
					 | 
					
						
						
							
							Fastest run config on Aurora to date
						
						
						
						
						
						
					 | 
					
						2025-02-01 18:08:40 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						c74d11e3d7
					 | 
					
						
						
							
							PVdagM MG
						
						
						
						
						
						
					 | 
					
						2025-02-01 11:04:13 -05:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						84cab5e6e7
					 | 
					
						
						
							
							no comms and log cleanup
						
						
						
						
						
						
					 | 
					
						2025-02-01 16:37:21 +01:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						c4fc972fec
					 | 
					
						
						
							
							Merge branch 'feature/deprecate-uvm' into develop
						
						
						
						
						
						
					 | 
					
						2025-01-31 16:32:36 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						8cf809e231
					 | 
					
						
						
							
							Best results on Aurora so far
						
						
						
						
						
						
					 | 
					
						2025-01-31 16:14:45 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						94019a922e
					 | 
					
						
						
							
							Significantly better performance on Aurora without using pipeline mode
						
						
						
						
						
						
					 | 
					
						2025-01-30 16:36:46 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						d6b2727f86
					 | 
					
						
						
							
							Pipeline mode getting better -- 2 nodes @ 10TF/s per node on Aurora
						
						
						
						
						
						
					 | 
					
						2025-01-29 09:22:21 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						74a4f43946
					 | 
					
						
						
							
							Optional host buffer bounce for no CUDA aware MPI
						
						
						
						
						
						
					 | 
					
						2025-01-28 15:22:46 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						1caf8b0f86
					 | 
					
						
						
							
							Rename
						
						
						
						
						
						
					 | 
					
						2025-01-28 15:22:37 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						3f3661a86f
					 | 
					
						
						
							
							Heading towards PVdagM multigrid
						
						
						
						
						
						
					 | 
					
						2025-01-17 14:33:35 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						8fe429346f
					 | 
					
						
						
							
							Dslash testing for reproduce
						
						
						
						
						
						
					 | 
					
						2024-11-11 23:11:11 +00:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						5a4f9bf2e3
					 | 
					
						
						
							
							Force the ROCM version
						
						
						
						
						
						
					 | 
					
						2024-10-29 18:12:31 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						b91fc1b6b4
					 | 
					
						
						
							
							Merge branch 'feature/boosted' into feature/deprecate-uvm
						
						
						
						
						
						
						
						Fixed boosted free field test 
						
						
					 | 
					
						2024-10-28 16:53:09 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						eafc150034
					 | 
					
						
						
							
							Test fft asserts
						
						
						
						
						
						
					 | 
					
						2024-10-23 16:46:26 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						2877f1a268
					 | 
					
						
						
							
							Verbose reduce
						
						
						
						
						
						
					 | 
					
						2024-10-23 15:14:16 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						1e893af775
					 | 
					
						
						
							
							GPU happy
						
						
						
						
						
						
					 | 
					
						2024-10-23 14:52:15 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						d9f430a575
					 | 
					
						
						
							
							Happy GPU
						
						
						
						
						
						
					 | 
					
						2024-10-23 14:51:16 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						63abe87f36
					 | 
					
						
						
							
							Memory manager verbose improvements that were useful to track an error
						
						
						
						
						
						
					 | 
					
						2024-10-23 14:49:13 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						368d649c8a
					 | 
					
						
						
							
							feature/deprecate-uvm happier -- preallocate device resident neigbour table
						
						
						
						
						
						
					 | 
					
						2024-10-23 14:47:55 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						5603464f39
					 | 
					
						
						
							
							Fix in partial fraction import/export physical and
						
						
						
						
						
						
						
						make the GPU happier on the deprecate-uvm -- don't use static vectors, make member of class 
						
						
					 | 
					
						2024-10-23 14:45:58 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						655c79f39e
					 | 
					
						
						
							
							Suppress warning on partial override
						
						
						
						
						
						
					 | 
					
						2024-10-23 14:44:41 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 |