Bartosz Kostrzewa
							
						 
					 | 
					
						
						
							
						
						a9b92867a8
					 | 
					
						
						
							
							use tabulator
						
						
						
						
						
						
					 | 
					
						2020-08-31 18:41:17 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Bartosz Kostrzewa
							
						 
					 | 
					
						
						
							
						
						65920faeba
					 | 
					
						
						
							
							correct formatting of Benchmark_wilson_sweep output
						
						
						
						
						
						
					 | 
					
						2020-08-31 18:39:27 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						249e2db87d
					 | 
					
						
						
							
							Merge pull request #14 from DanielRichtmann/feature/gpt-coarsenedmatrix
						
						
						
						
						
						
						
						Expose more functions in CMat 
						
						
					 | 
					
						2020-08-27 15:18:56 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Daniel Richtmann
							
						 
					 | 
					
						
						
							
						
						cf3535d16e
					 | 
					
						
						
							
							Expose more functions in CMat
						
						
						
						
						
						
					 | 
					
						2020-08-27 14:06:48 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						d61ee817f4
					 | 
					
						
						
							
							Merge pull request #13 from DanielRichtmann/feature/gpt-coarsenedmatrix
						
						
						
						
						
						
						
						Changes needed for GPT MG 
						
						
					 | 
					
						2020-08-27 12:11:06 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						3448b7387c
					 | 
					
						
						
							
							Almost there to coalesced ET
						
						
						
						
						
						
					 | 
					
						2020-08-26 17:04:49 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						47b89d2739
					 | 
					
						
						
							
							Pragma protection improvementt
						
						
						
						
						
						
					 | 
					
						2020-08-26 17:04:27 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						2a75516330
					 | 
					
						
						
							
							state MPI/SLURM message only on world_rank zero
						
						
						
						
						
						
					 | 
					
						2020-08-26 12:34:17 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Daniel Richtmann
							
						 
					 | 
					
						
						
							
						
						b2087f14c4
					 | 
					
						
						
							
							Fix CoarsenedMatrix regarding illegal memory accesses
						
						
						
						
						
						
						
						Need a reference to geom since the lambda copies the this pointer which points to host memory, see
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/#star-this-capture
- https://devblogs.nvidia.com/new-compiler-features-cuda-8/ 
						
						
					 | 
					
						2020-08-24 17:46:47 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Daniel Richtmann
							
						 
					 | 
					
						
						
							
						
						dd1ba266b2
					 | 
					
						
						
							
							Fix mapping between dir + disp and point in CMat
						
						
						
						
						
						
					 | 
					
						2020-08-24 17:46:46 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Daniel Richtmann
							
						 
					 | 
					
						
						
							
						
						1292d59563
					 | 
					
						
						
							
							Add a typedef + broaden interface of CMat
						
						
						
						
						
						
					 | 
					
						2020-08-24 17:46:45 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						9877ed9bf8
					 | 
					
						
						
							
							Merge pull request #12 from paboyle/develop
						
						
						
						
						
						
						
						Sync 
						
						
					 | 
					
						2020-08-22 16:35:35 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						f0dc0f3621
					 | 
					
						
						
							
							fix compile issue on Qpace3
						
						
						
						
						
						
					 | 
					
						2020-08-22 13:57:33 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						1efe30d6cc
					 | 
					
						
						
							
							SLurm stop nodes using same GPU
						
						
						
						
						
						
					 | 
					
						2020-08-21 02:02:53 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						0b787e9fe0
					 | 
					
						
						
							
							Avoid namespaec collision to make gcc happy
						
						
						
						
						
						
					 | 
					
						2020-08-20 22:23:29 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						37ec4b241c
					 | 
					
						
						
							
							Default thread count sensible
						
						
						
						
						
						
					 | 
					
						2020-08-20 22:12:31 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						63b0a19f37
					 | 
					
						
						
							
							Merge pull request #11 from paboyle/develop
						
						
						
						
						
						
						
						Sync 
						
						
					 | 
					
						2020-08-20 20:53:39 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						90ea7dfa99
					 | 
					
						
						
							
							Accelerator loops for device resident comms buf
						
						
						
						
						
						
					 | 
					
						2020-08-19 22:40:44 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						f866d7c33e
					 | 
					
						
						
							
							Merge pull request #307 from lehner/feature/gpt
						
						
						
						
						
						
						
						Merged Nils's A64FX and minor fixes (MemoryManager::InitMessage, Tensor_index zeroit, ...) 
						
						
					 | 
					
						2020-08-18 23:27:21 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						542bdef198
					 | 
					
						
						
							
							cleanup comments
						
						
						
						
						
						
					 | 
					
						2020-08-14 18:39:44 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						06007db3d9
					 | 
					
						
						
							
							true shm_none implementation with GPUs that disables the use of device shared memory for the stencils
						
						
						
						
						
						
					 | 
					
						2020-08-14 18:37:00 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						12e6059a70
					 | 
					
						
						
							
							Merge branch 'feature/gpt' of https://github.com/lehner/Grid into feature/gpt
						
						
						
						
						
						
					 | 
					
						2020-08-13 16:16:52 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						dbaa24ebf6
					 | 
					
						
						
							
							further GPU memory access fixes (with this GPT passes all single-rank tests on non-summit GPUs)
						
						
						
						
						
						
					 | 
					
						2020-08-13 16:14:15 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						3276aa67dc
					 | 
					
						
						
							
							Update
						
						
						
						
						
						
					 | 
					
						2020-08-12 14:15:53 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						3b30b9f0c0
					 | 
					
						
						
							
							Merge branch 'feature/gpt' of https://github.com/lehner/Grid into feature/gpt
						
						
						
						
						
						
					 | 
					
						2020-08-06 16:59:17 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						69db4816f7
					 | 
					
						
						
							
							fix variable capture in Scatter_plane_merge on accelerators
						
						
						
						
						
						
					 | 
					
						2020-08-06 16:57:16 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						3abe09025a
					 | 
					
						
						
							
							when using SHM_NONE allow multiple ranks per node but without using shared memory
						
						
						
						
						
						
					 | 
					
						2020-08-06 14:42:38 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						e33878e0de
					 | 
					
						
						
							
							Trigger re-run of CI
						
						
						
						
						
						
					 | 
					
						2020-08-06 11:50:24 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						27b4fbf3f0
					 | 
					
						
						
							
							assert for forbidden code path and fix check for faster CPU codepath in basisRotate
						
						
						
						
						
						
					 | 
					
						2020-08-03 07:57:33 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						968a90633a
					 | 
					
						
						
							
							Zero -> zeroit in Tensor_index
						
						
						
						
						
						
					 | 
					
						2020-07-31 02:07:17 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						6365a89ba3
					 | 
					
						
						
							
							create separate InitMessage for MemoryManager that can be called after communicator setup
						
						
						
						
						
						
					 | 
					
						2020-07-30 07:25:05 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						ddbb008694
					 | 
					
						
						
							
							Merge pull request #10 from lehner/feature/gpt-sycl
						
						
						
						
						
						
						
						Feature/gpt sycl 
						
						
					 | 
					
						2020-07-30 13:12:09 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						7997e0a449
					 | 
					
						
						
							
							Merge branch 'feature/gpt' into feature/gpt-sycl
						
						
						
						
						
						
					 | 
					
						2020-07-30 13:11:31 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						197612bc7a
					 | 
					
						
						
							
							fast cpu basisRotate and other small cleanups
						
						
						
						
						
						
					 | 
					
						2020-07-30 07:08:54 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						0e88bf4bff
					 | 
					
						
						
							
							remove Nils's default pragma
						
						
						
						
						
						
					 | 
					
						2020-07-29 10:24:35 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						3e64d78469
					 | 
					
						
						
							
							include versions.h again and add back asserts in Test_simd
						
						
						
						
						
						
					 | 
					
						2020-07-29 10:18:05 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						2004611def
					 | 
					
						
						
							
							Merge pull request #9 from nmeyer-ur/feature/a64fx-2
						
						
						
						
						
						
						
						Feature/a64fx 2 
						
						
					 | 
					
						2020-07-29 14:54:20 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						a2868c96a4
					 | 
					
						
						
							
							Merge pull request #8 from paboyle/develop
						
						
						
						
						
						
						
						Doc recompile 
						
						
					 | 
					
						2020-07-29 14:10:07 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Peter Boyle
							
						 
					 | 
					
						
						
							
						
						7cf7f11e1a
					 | 
					
						
						
							
							Doc recompile
						
						
						
						
						
						
					 | 
					
						2020-07-22 14:44:11 -04:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						ea7f8fda5e
					 | 
					
						
						
							
							fix typo
						
						
						
						
						
						
					 | 
					
						2020-07-22 09:34:05 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						906b78811b
					 | 
					
						
						
							
							exit in Init when using --comms-overlap
						
						
						
						
						
						
					 | 
					
						2020-07-22 08:57:01 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christoph Lehner
							
						 
					 | 
					
						
						
							
						
						97703b181b
					 | 
					
						
						
							
							Merge pull request #7 from paboyle/develop
						
						
						
						
						
						
						
						Merge current develop 
						
						
					 | 
					
						2020-07-12 16:24:53 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						d9474c6cb6
					 | 
					
						
						
							
							compiler-independent build using --enable-simd=A64FX
						
						
						
						
						
						
					 | 
					
						2020-07-09 10:07:02 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						bbd145382b
					 | 
					
						
						
							
							enable --enable-simd=A64FX in configure
						
						
						
						
						
						
					 | 
					
						2020-07-08 12:43:51 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						1b08cb7300
					 | 
					
						
						
							
							Merge branch 'develop' into feature/a64fx-2
						
						
						
						
						
						
					 | 
					
						2020-07-08 08:18:18 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						337d9dc043
					 | 
					
						
						
							
							move barrier in Benchmark_wilson
						
						
						
						
						
						
					 | 
					
						2020-07-08 08:13:40 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						8726e94ea7
					 | 
					
						
						
							
							merge upstream develop
						
						
						
						
						
						
					 | 
					
						2020-07-07 20:26:47 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								nmeyer-ur
							
						 
					 | 
					
						
						
							
						
						67db4993c2
					 | 
					
						
						
							
							reset head, update SVE readme
						
						
						
						
						
						
					 | 
					
						2020-07-07 19:54:52 +02:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						f1f655d92b
					 | 
					
						
						
							
							Merge pull request #304 from Heinrich-BR/develop
						
						
						
						
						
						
						
						ScalarImpl.h updates 
						
						
					 | 
					
						2020-07-06 10:16:03 +01:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
							
						
						43334e88c3
					 | 
					
						
						
							
							Tiny change in a comment for clarity
						
						
						
						
						
						
					 | 
					
						2020-07-04 16:11:16 +01:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 |