azusayamaguchi 
							
						 
					 
					
						
						
							
						
						c190221fd3 
					 
					
						
						
							
							Internal SHM comms in non-simd directions working  
						
						... 
						
						
						
						Need to fix simd directions 
						
						
					 
					
						2016-10-22 18:14:27 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a762b1fb71 
					 
					
						
						
							
							MPI3 working with a bounce through shared memory on my laptop.  
						
						... 
						
						
						
						Longer term plan: make the "u_comm_buf" in Stencil point to the shared region and avoid the
send between ranks on same node. 
						
						
					 
					
						2016-10-21 09:03:26 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						81f2aeaece 
					 
					
						
						
							
							KNL streaming stores, and KNL performance coutners  
						
						
						
						
					 
					
						2016-10-12 11:45:22 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						2e453dfbf5 
					 
					
						
						
							
							Added some instrumentation to benchmark the force computation  
						
						
						
						
					 
					
						2016-10-06 17:52:45 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						4089984431 
					 
					
						
						
							
							Timing hooks  
						
						
						
						
					 
					
						2016-10-06 09:25:12 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						0fd179fb33 
					 
					
						
						
							
							Merge branch 'develop' into feature/hirep  
						
						
						
						
					 
					
						2016-09-01 12:59:53 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						fd5614738d 
					 
					
						
						
							
							Merge branch 'develop' into feature/hirep  
						
						
						
						
					 
					
						2016-08-30 18:21:36 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						5a68715be3 
					 
					
						
						
							
							Richards sweep test  
						
						
						
						
					 
					
						2016-08-05 10:51:57 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						32bc7a6ab8 
					 
					
						
						
							
							MPI back out of change that hangs  
						
						... 
						
						
						
						AVX2 for clang, gcc needs the -mfma flag. 
						
						
					 
					
						2016-08-05 10:36:00 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						b65e72e521 
					 
					
						
						
							
							Merge pull request  #43  from rprollins/bench/output-format  
						
						... 
						
						
						
						Benchmark_dwf_sweep and Benchmark_zmm output formats 
						
						
					 
					
						2016-08-04 16:47:01 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						629283726b 
					 
					
						
						
							
							build system: local Grid link flag moved to configure.ac  
						
						
						
						
					 
					
						2016-08-03 15:07:42 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						9e5b934d21 
					 
					
						
						
							
							improved LAPACK configuration  
						
						
						
						
					 
					
						2016-08-02 17:26:54 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						e9f30cab2c 
					 
					
						
						
							
							first working version for the new build system  
						
						
						
						
					 
					
						2016-07-30 17:53:18 +01:00 
						 
				 
			
				
					
						
							
							
								Richard Rollins 
							
						 
					 
					
						
						
							
						
						df6c9f55d1 
					 
					
						
						
							
							Use common benchmark output format for dwf_sweep and zmm  
						
						
						
						
					 
					
						2016-07-20 17:38:56 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						f4dd5062d7 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/paboyle/Grid  into develop  
						
						
						
						
					 
					
						2016-07-15 19:26:06 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						9db2c6525d 
					 
					
						
						
							
							updating benchmarks for red black 4d for Ls vectorised code  
						
						
						
						
					 
					
						2016-07-14 23:44:02 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						ef97e32152 
					 
					
						
						
							
							Adding persistent communicators  
						
						
						
						
					 
					
						2016-07-08 17:16:08 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						5028969d4b 
					 
					
						
						
							
							Added generators for the adjoint representation  
						
						
						
						
					 
					
						2016-07-08 15:40:11 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a0676beeb1 
					 
					
						
						
							
							Open up dependency on Eigen and FFTW  
						
						
						
						
					 
					
						2016-07-07 22:31:07 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						fdfbf11c6d 
					 
					
						
						
							
							Merge branch 'develop' into temporary-smearing  
						
						
						
						
					 
					
						2016-07-04 18:45:10 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						9cb90f714e 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop' into temporary-smearing  
						
						
						
						
					 
					
						2016-07-04 17:28:40 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						bfe14000a9 
					 
					
						
						
							
							Double compile fix  
						
						
						
						
					 
					
						2016-07-01 16:33:51 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						680645f849 
					 
					
						
						
							
							Merge branch 'release/v0.5.0'  
						
						
						
						
					 
					
						2016-06-30 15:15:03 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						2d8bb4c594 
					 
					
						
						
							
							Tweaks  
						
						
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						51cb2d4328 
					 
					
						
						
							
							update file lists  
						
						
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						6d58cb2a68 
					 
					
						
						
							
							Enable reordering of the loops in the assembler for cache friendly.  
						
						... 
						
						
						
						This gets in the way of L2 prefetching however. Do next next link in stencil
prefetching. 
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						565e9329ba 
					 
					
						
						
							
							Changed the colouring classes  
						
						
						
						
					 
					
						2016-06-30 16:51:03 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						5e02392f9c 
					 
					
						
						
							
							Fixed compilation error for benchmark_dwf  
						
						... 
						
						
						
						Some parts were assuming floating point precision 
						
						
					 
					
						2016-06-20 12:30:51 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						55f65b81b5 
					 
					
						
						
							
							Improvements to the assembler interface that let us move chunks of the  
						
						... 
						
						
						
						site and s loop into the kernels. This will save on function call overhead and
guarantee L2 prefetching strategy is right since OMP can't distribute the
sub-chunks of work. 
						
						
					 
					
						2016-06-09 01:12:36 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						05acc22920 
					 
					
						
						
							
							placeholder for non temporal loads optimisation  
						
						
						
						
					 
					
						2016-06-07 13:18:21 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						8ac021de73 
					 
					
						
						
							
							Added a test an fixed it for red black precon Ls innermost vectorised DWF  
						
						
						
						
					 
					
						2016-06-07 13:16:56 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						786ca52c43 
					 
					
						
						
							
							Problems remain in the red black preconditioning of the Ls vectorisation  
						
						
						
						
					 
					
						2016-06-06 07:05:51 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						53d06046b0 
					 
					
						
						
							
							Compiling updates for KNL  
						
						
						
						
					 
					
						2016-06-03 03:47:54 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						139cc5f1ae 
					 
					
						
						
							
							Large change with KNL preparation  
						
						
						
						
					 
					
						2016-06-03 03:24:26 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						f2ae9682ff 
					 
					
						
						
							
							Remove some timing hacks  
						
						
						
						
					 
					
						2016-04-19 15:14:32 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						528eb773ad 
					 
					
						
						
							
							Merged.  
						
						... 
						
						
						
						Merge branch 'master' of https://github.com/paboyle/Grid  
						
						
					 
					
						2016-04-19 22:24:34 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						c323425496 
					 
					
						
						
							
							Small change  
						
						
						
						
					 
					
						2016-04-11 10:38:43 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						650e02b344 
					 
					
						
						
							
							Smaller vols too  
						
						
						
						
					 
					
						2016-04-06 06:52:09 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a524ca2a4b 
					 
					
						
						
							
							New benchmark update  
						
						
						
						
					 
					
						2016-04-06 03:35:56 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						23a7176b71 
					 
					
						
						
							
							Loop over volumes  
						
						
						
						
					 
					
						2016-04-06 03:22:11 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						b1192a8908 
					 
					
						
						
							
							Benchmark_zmm added  
						
						
						
						
					 
					
						2016-04-06 03:00:07 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						e8dddb1596 
					 
					
						
						
							
							Adding extra benchmark  
						
						
						
						
					 
					
						2016-04-06 10:32:54 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						c77b7ee897 
					 
					
						
						
							
							AddSub based alternate SU3 routine  
						
						
						
						
					 
					
						2016-03-28 17:55:22 -06:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						e17c773a0b 
					 
					
						
						
							
							Longer runs for vtune  
						
						
						
						
					 
					
						2016-03-16 02:29:13 -07:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						f7be108e35 
					 
					
						
						
							
							100 iters faster  
						
						
						
						
					 
					
						2016-02-15 16:03:04 -06:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						fc6ad65751 
					 
					
						
						
							
							Pushed the overlap comms tweaks  
						
						
						
						
					 
					
						2016-01-11 06:34:22 -08:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						02452afd36 
					 
					
						
						
							
							Optional overlap of comms with compute  
						
						
						
						
					 
					
						2016-01-04 14:18:40 +00:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						331768dcff 
					 
					
						
						
							
							Added overlap comms compute mode  
						
						
						
						
					 
					
						2016-01-03 01:38:11 +00:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						aae8bf31a7 
					 
					
						
						
							
							Global edit adding copyright and license info to every source file.  
						
						
						
						
					 
					
						2016-01-02 14:51:32 +00:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						3ce10aa975 
					 
					
						
						
							
							Fix a regression failure on Mobius; chroma regression added  
						
						
						
						
					 
					
						2015-12-10 22:55:00 +00:00