nmeyer-ur 
							
						 
					 
					
						
						
							
						
						5cb3530c34 
					 
					
						
						
							
							enable counters in Benchmark_wilson  
						
						
						
						
					 
					
						2020-05-29 15:44:52 +02:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						006cc8a8f1 
					 
					
						
						
							
							Staggereed move to accelerator  
						
						
						
						
					 
					
						2020-05-28 08:33:06 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						cf2938688a 
					 
					
						
						
							
							Sycl unhappy fix  
						
						
						
						
					 
					
						2020-05-25 08:36:53 -07:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						a7abda89e2 
					 
					
						
						
							
							View location & access mode  
						
						
						
						
					 
					
						2020-05-21 16:13:59 -04:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						015d8bb38a 
					 
					
						
						
							
							introduced assertions in Benchmark_wilson, removed data output from Benchmark_dwf  
						
						
						
						
					 
					
						2020-05-15 09:15:50 +02:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						ea08f193e7 
					 
					
						
						
							
							Allocator cache spliit into large/small pools  
						
						
						
						
					 
					
						2020-05-10 05:24:26 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						ee1de82a53 
					 
					
						
						
							
							Working ITT benchmark again  
						
						
						
						
					 
					
						2020-05-08 18:54:50 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						2b576fc185 
					 
					
						
						
							
							Comment deadd codde remove  
						
						
						
						
					 
					
						2020-05-08 18:54:29 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						6859a3e1d4 
					 
					
						
						
							
							Schur operator  
						
						
						
						
					 
					
						2020-05-08 09:19:12 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						28a1fcaaff 
					 
					
						
						
							
							First compile against SYCL  
						
						
						
						
					 
					
						2020-05-05 11:13:27 -07:00 
						 
				 
			
				
					
						
							
							
								u37294 
							
						 
					 
					
						
						
							
						
						59c51d2c35 
					 
					
						
						
							
							Make compile if HAVE_LIME=0  
						
						
						
						
					 
					
						2020-05-04 10:26:20 -07:00 
						 
				 
			
				
					
						
							
							
								nils meyer 
							
						 
					 
					
						
						
							
						
						64b72fc17f 
					 
					
						
						
							
							testing gcc 10.0.1: build errors in Exchange1 using -DA64FX and in Lattice_base.h building Dslash only  
						
						
						
						
					 
					
						2020-04-19 01:25:40 +02:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						e279b2be29 
					 
					
						
						
							
							Merge develop  
						
						
						
						
					 
					
						2019-08-14 23:01:59 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						48e6efc7c9 
					 
					
						
						
							
							Merge branch 'develop' into feature/gpu-port  
						
						... 
						
						
						
						Conflicts:
	Grid/qcd/action/fermion/WilsonKernelsAsm.cc
	Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h
	Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h
	benchmarks/Benchmark_comms.cc 
						
						
					 
					
						2019-08-14 18:56:54 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						263dcbabab 
					 
					
						
						
							
							Simplify the comms benchmark  
						
						
						
						
					 
					
						2019-07-30 22:51:04 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						d85dcc72df 
					 
					
						
						
							
							Multinode fix  
						
						
						
						
					 
					
						2019-07-20 07:13:28 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						0561c2edeb 
					 
					
						
						
							
							Benchmarks modified for new GPU constructs  
						
						
						
						
					 
					
						2019-06-15 12:52:56 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						3e41b1055c 
					 
					
						
						
							
							Remove Gpu only kernels.  
						
						
						
						
					 
					
						2019-06-09 11:20:01 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						da8d87e9da 
					 
					
						
						
							
							Cuda switch off  
						
						
						
						
					 
					
						2019-06-08 17:11:38 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						6d77941990 
					 
					
						
						
							
							Drop the 5D vec actions  
						
						
						
						
					 
					
						2019-06-08 13:38:05 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						47c063f984 
					 
					
						
						
							
							Remove Ls Vec cases from benchmarks  
						
						
						
						
					 
					
						2019-06-04 20:45:35 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						ee6f96d85c 
					 
					
						
						
							
							Merge pull request  #210  from grid-test-organisation/feature/gpu-port-develop  
						
						... 
						
						
						
						Cayley fermion functions for GPUs 
						
						
					 
					
						2019-05-18 19:06:20 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						4e9df9e93c 
					 
					
						
						
							
							GPU patches  
						
						
						
						
					 
					
						2019-05-18 17:43:11 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						e3c56fd9b3 
					 
					
						
						
							
							CayleyZeroCounters before benchmark loop  
						
						
						
						
					 
					
						2019-05-13 15:52:00 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						d9438627d9 
					 
					
						
						
							
							M5D benchmark without vector copy overhead  
						
						
						
						
					 
					
						2019-05-02 11:10:57 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						6da9aa9971 
					 
					
						
						
							
							replace std::vector with Vector in benchmark  
						
						
						
						
					 
					
						2019-05-02 10:56:22 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						b52fa38f8c 
					 
					
						
						
							
							seed initialisation of RNG5  
						
						
						
						
					 
					
						2019-05-02 10:36:09 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						c43a2b599a 
					 
					
						
						
							
							GPU support  
						
						
						
						
					 
					
						2019-01-01 15:07:29 +00:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						b57a4d32aa 
					 
					
						
						
							
							Merge branch 'develop' into feature/gpu-port  
						
						
						
						
					 
					
						2018-12-13 05:11:34 +00:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						0ba3d469c7 
					 
					
						
						
							
							Benchmark IO in single and double precision  
						
						
						
						
					 
					
						2018-10-17 20:27:34 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						291bc2a1f0 
					 
					
						
						
							
							IO benchmark on a list of directories  
						
						
						
						
					 
					
						2018-10-15 17:25:08 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						adbdc4e65b 
					 
					
						
						
							
							Half comms not working on GPU yet, so disable.  
						
						
						
						
					 
					
						2018-09-11 05:15:22 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						f4bfeb835d 
					 
					
						
						
							
							Drop back to smaller Ls  
						
						
						
						
					 
					
						2018-09-09 14:25:06 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						a15a2dfd29 
					 
					
						
						
							
							Merge branch 'develop' into feature/hadrons  
						
						
						
						
					 
					
						2018-08-10 16:08:22 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						27cdb79063 
					 
					
						
						
							
							Sha used to seed from a unique string  
						
						
						
						
					 
					
						2018-08-10 15:11:01 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						00b92a91b5 
					 
					
						
						
							
							Optimising  
						
						
						
						
					 
					
						2018-07-28 23:46:22 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						65533741f7 
					 
					
						
						
							
							7 moms  
						
						
						
						
					 
					
						2018-07-28 16:17:47 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						131a6785d4 
					 
					
						
						
							
							Merge branch 'feature/hadrons-a2a' into feature/hadrons-a2a  
						
						
						
						
					 
					
						2018-07-27 23:03:42 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						44f4f5c8e2 
					 
					
						
						
							
							Momentum loop  
						
						
						
						
					 
					
						2018-07-27 23:00:16 +01:00 
						 
				 
			
				
					
						
							
							
								fionnoh 
							
						 
					 
					
						
						
							
						
						2679df034f 
					 
					
						
						
							
							Changes to meson field benchmark. Now includes the gammas in the final part of the naive method, both methods compute  
						
						... 
						
						
						
						lhs^dag*Gamma*rhs (previously Gamma*lhs^dag*rhs), and checks results. 
						
						
					 
					
						2018-07-27 18:31:10 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						71e1006ba8 
					 
					
						
						
							
							Updated meson field benchmark for dirac structures  
						
						
						
						
					 
					
						2018-07-26 09:09:29 +01:00 
						 
				 
			
				
					
						
							
							
								fionnoh 
							
						 
					 
					
						
						
							
						
						24128ff109 
					 
					
						
						
							
							Changes needed for MF benchmark to work with comms correctly  
						
						
						
						
					 
					
						2018-07-23 15:51:37 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						21a1710b43 
					 
					
						
						
							
							Verbose vector length  
						
						
						
						
					 
					
						2018-07-23 06:08:39 -04:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						ec9939c1ba 
					 
					
						
						
							
							Test for faster implementation of meson field inner loop  
						
						... 
						
						
						
						This should be possible to cache block at outer levels, global sum across nodes not performed
and deferred to caller to block them all into a big all reduce.
Nc=3 and Fermion is hard coded in an ugly way. We might think about benchmarking whether
a product without the conjugate should be made available by Grid.
It is not clear whether the explicit unroll, or the performing of conjugate on left once
was the real source of the speed up.
Gives 70-80 GF/s on my laptop (single) half that double, and 70GB/s to cache.
This is competitive with dslash and a reasonable stopping point for the optimisation. If necessary we can revisit. 
						
						
					 
					
						2018-07-10 12:38:51 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						4b04ae3611 
					 
					
						
						
							
							Printing improvement  
						
						
						
						
					 
					
						2018-07-05 06:59:38 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						2f776d51c6 
					 
					
						
						
							
							Gpu specific benchmark saturates memory. Can enhance Grid to do this for expressions,  
						
						... 
						
						
						
						but a bitof (known) work. 
						
						
					 
					
						2018-07-05 06:58:37 -04:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						25becc9324 
					 
					
						
						
							
							GPU tweaks for benchmarking; really necessary?  
						
						
						
						
					 
					
						2018-06-13 20:26:07 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						eb921041d0 
					 
					
						
						
							
							Perf count control  
						
						
						
						
					 
					
						2018-05-12 17:57:32 -04:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						bfbf2f1fa0 
					 
					
						
						
							
							no threaded stencil benchmark if OpenMP is not supported  
						
						
						
						
					 
					
						2018-05-03 16:20:01 +01:00 
						 
				 
			
				
					
						
							
							
								Dr Peter Boyle 
							
						 
					 
					
						
						
							
						
						1dddd17e3c 
					 
					
						
						
							
							Benchmark improvements from tesseract  
						
						
						
						
					 
					
						2018-04-27 11:44:46 +01:00