Peter Boyle 
							
						 
					 
					
						
						
							
						
						e279b2be29 
					 
					
						
						
							
							Merge develop  
						
						
						
						
					 
					
						2019-08-14 23:01:59 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						48e6efc7c9 
					 
					
						
						
							
							Merge branch 'develop' into feature/gpu-port  
						
						... 
						
						
						
						Conflicts:
	Grid/qcd/action/fermion/WilsonKernelsAsm.cc
	Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h
	Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h
	benchmarks/Benchmark_comms.cc 
						
						
					 
					
						2019-08-14 18:56:54 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						263dcbabab 
					 
					
						
						
							
							Simplify the comms benchmark  
						
						
						
						
					 
					
						2019-07-30 22:51:04 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						d85dcc72df 
					 
					
						
						
							
							Multinode fix  
						
						
						
						
					 
					
						2019-07-20 07:13:28 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						0561c2edeb 
					 
					
						
						
							
							Benchmarks modified for new GPU constructs  
						
						
						
						
					 
					
						2019-06-15 12:52:56 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						3e41b1055c 
					 
					
						
						
							
							Remove Gpu only kernels.  
						
						
						
						
					 
					
						2019-06-09 11:20:01 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						da8d87e9da 
					 
					
						
						
							
							Cuda switch off  
						
						
						
						
					 
					
						2019-06-08 17:11:38 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						6d77941990 
					 
					
						
						
							
							Drop the 5D vec actions  
						
						
						
						
					 
					
						2019-06-08 13:38:05 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						47c063f984 
					 
					
						
						
							
							Remove Ls Vec cases from benchmarks  
						
						
						
						
					 
					
						2019-06-04 20:45:35 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						ee6f96d85c 
					 
					
						
						
							
							Merge pull request  #210  from grid-test-organisation/feature/gpu-port-develop  
						
						... 
						
						
						
						Cayley fermion functions for GPUs 
						
						
					 
					
						2019-05-18 19:06:20 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						4e9df9e93c 
					 
					
						
						
							
							GPU patches  
						
						
						
						
					 
					
						2019-05-18 17:43:11 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						e3c56fd9b3 
					 
					
						
						
							
							CayleyZeroCounters before benchmark loop  
						
						
						
						
					 
					
						2019-05-13 15:52:00 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						d9438627d9 
					 
					
						
						
							
							M5D benchmark without vector copy overhead  
						
						
						
						
					 
					
						2019-05-02 11:10:57 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						6da9aa9971 
					 
					
						
						
							
							replace std::vector with Vector in benchmark  
						
						
						
						
					 
					
						2019-05-02 10:56:22 +01:00 
						 
				 
			
				
					
						
							
							
								gfilaci 
							
						 
					 
					
						
						
							
						
						b52fa38f8c 
					 
					
						
						
							
							seed initialisation of RNG5  
						
						
						
						
					 
					
						2019-05-02 10:36:09 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						c43a2b599a 
					 
					
						
						
							
							GPU support  
						
						
						
						
					 
					
						2019-01-01 15:07:29 +00:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						b57a4d32aa 
					 
					
						
						
							
							Merge branch 'develop' into feature/gpu-port  
						
						
						
						
					 
					
						2018-12-13 05:11:34 +00:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						0ba3d469c7 
					 
					
						
						
							
							Benchmark IO in single and double precision  
						
						
						
						
					 
					
						2018-10-17 20:27:34 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						291bc2a1f0 
					 
					
						
						
							
							IO benchmark on a list of directories  
						
						
						
						
					 
					
						2018-10-15 17:25:08 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						adbdc4e65b 
					 
					
						
						
							
							Half comms not working on GPU yet, so disable.  
						
						
						
						
					 
					
						2018-09-11 05:15:22 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						f4bfeb835d 
					 
					
						
						
							
							Drop back to smaller Ls  
						
						
						
						
					 
					
						2018-09-09 14:25:06 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						a15a2dfd29 
					 
					
						
						
							
							Merge branch 'develop' into feature/hadrons  
						
						
						
						
					 
					
						2018-08-10 16:08:22 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						27cdb79063 
					 
					
						
						
							
							Sha used to seed from a unique string  
						
						
						
						
					 
					
						2018-08-10 15:11:01 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						00b92a91b5 
					 
					
						
						
							
							Optimising  
						
						
						
						
					 
					
						2018-07-28 23:46:22 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						65533741f7 
					 
					
						
						
							
							7 moms  
						
						
						
						
					 
					
						2018-07-28 16:17:47 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						131a6785d4 
					 
					
						
						
							
							Merge branch 'feature/hadrons-a2a' into feature/hadrons-a2a  
						
						
						
						
					 
					
						2018-07-27 23:03:42 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						44f4f5c8e2 
					 
					
						
						
							
							Momentum loop  
						
						
						
						
					 
					
						2018-07-27 23:00:16 +01:00 
						 
				 
			
				
					
						
							
							
								fionnoh 
							
						 
					 
					
						
						
							
						
						2679df034f 
					 
					
						
						
							
							Changes to meson field benchmark. Now includes the gammas in the final part of the naive method, both methods compute  
						
						... 
						
						
						
						lhs^dag*Gamma*rhs (previously Gamma*lhs^dag*rhs), and checks results. 
						
						
					 
					
						2018-07-27 18:31:10 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						71e1006ba8 
					 
					
						
						
							
							Updated meson field benchmark for dirac structures  
						
						
						
						
					 
					
						2018-07-26 09:09:29 +01:00 
						 
				 
			
				
					
						
							
							
								fionnoh 
							
						 
					 
					
						
						
							
						
						24128ff109 
					 
					
						
						
							
							Changes needed for MF benchmark to work with comms correctly  
						
						
						
						
					 
					
						2018-07-23 15:51:37 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						21a1710b43 
					 
					
						
						
							
							Verbose vector length  
						
						
						
						
					 
					
						2018-07-23 06:08:39 -04:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						ec9939c1ba 
					 
					
						
						
							
							Test for faster implementation of meson field inner loop  
						
						... 
						
						
						
						This should be possible to cache block at outer levels, global sum across nodes not performed
and deferred to caller to block them all into a big all reduce.
Nc=3 and Fermion is hard coded in an ugly way. We might think about benchmarking whether
a product without the conjugate should be made available by Grid.
It is not clear whether the explicit unroll, or the performing of conjugate on left once
was the real source of the speed up.
Gives 70-80 GF/s on my laptop (single) half that double, and 70GB/s to cache.
This is competitive with dslash and a reasonable stopping point for the optimisation. If necessary we can revisit. 
						
						
					 
					
						2018-07-10 12:38:51 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						4b04ae3611 
					 
					
						
						
							
							Printing improvement  
						
						
						
						
					 
					
						2018-07-05 06:59:38 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						2f776d51c6 
					 
					
						
						
							
							Gpu specific benchmark saturates memory. Can enhance Grid to do this for expressions,  
						
						... 
						
						
						
						but a bitof (known) work. 
						
						
					 
					
						2018-07-05 06:58:37 -04:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						25becc9324 
					 
					
						
						
							
							GPU tweaks for benchmarking; really necessary?  
						
						
						
						
					 
					
						2018-06-13 20:26:07 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						eb921041d0 
					 
					
						
						
							
							Perf count control  
						
						
						
						
					 
					
						2018-05-12 17:57:32 -04:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						bfbf2f1fa0 
					 
					
						
						
							
							no threaded stencil benchmark if OpenMP is not supported  
						
						
						
						
					 
					
						2018-05-03 16:20:01 +01:00 
						 
				 
			
				
					
						
							
							
								Dr Peter Boyle 
							
						 
					 
					
						
						
							
						
						1dddd17e3c 
					 
					
						
						
							
							Benchmark improvements from tesseract  
						
						
						
						
					 
					
						2018-04-27 11:44:46 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						fa0d8feff4 
					 
					
						
						
							
							Performance of CovariantCshift now non-embarrassing.  
						
						
						
						
					 
					
						2018-04-26 17:56:27 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						05b44aef6b 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/paboyle/Grid  into develop  
						
						... 
						
						
						
						Conflicts:
	benchmarks/Benchmark_su3.cc 
						
						
					 
					
						2018-04-26 15:38:49 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						91a0a3f820 
					 
					
						
						
							
							Improvement  
						
						
						
						
					 
					
						2018-04-26 14:48:35 +01:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						8f44c799a6 
					 
					
						
						
							
							Saving the benchmarking tests for Cshift  
						
						
						
						
					 
					
						2018-04-26 14:48:03 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						43f5a0df50 
					 
					
						
						
							
							More timers in the integrator  
						
						
						
						
					 
					
						2018-04-26 12:01:56 +09:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						2baf193031 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/paboyle/Grid  into develop  
						
						
						
						
					 
					
						2018-04-25 00:14:03 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						362ba0443a 
					 
					
						
						
							
							Cshift updates  
						
						
						
						
					 
					
						2018-04-25 00:12:11 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						c5b9147b53 
					 
					
						
						
							
							Correction of a minor bug in the su3 benchmark  
						
						
						
						
					 
					
						2018-04-24 08:03:57 -07:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						a1be533329 
					 
					
						
						
							
							Corrected Flop count in Benchmark su3 and expanded the Wilson flow output  
						
						
						
						
					 
					
						2018-04-24 01:19:53 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						b5510427f9 
					 
					
						
						
							
							physical fermion interface, cshift benchmark in SU3.  
						
						
						
						
					 
					
						2018-04-18 01:43:29 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						276f113f28 
					 
					
						
						
							
							IO uses master boss node for metadata.  
						
						
						
						
					 
					
						2018-03-30 16:17:05 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						ab6afd18ac 
					 
					
						
						
							
							Still compile if no LIME  
						
						
						
						
					 
					
						2018-03-30 13:39:20 +01:00