Peter Boyle 
							
						 
					 
					
						
						
							
						
						e27c6b217c 
					 
					
						
						
							
							Updating  
						
						
						
						
					 
					
						2016-12-01 12:42:53 +00:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						6adf35da54 
					 
					
						
						
							
							Faster Mobius  
						
						
						
						
					 
					
						2016-12-01 11:39:04 +00:00 
						 
				 
			
				
					
						
							
							
								Lanny91 
							
						 
					 
					
						
						
							
						
						b18950f776 
					 
					
						
						
							
							Added simd real divide test with QPX divide fixes  
						
						
						
						
					 
					
						2016-11-25 13:21:33 +00:00 
						 
				 
			
				
					
						
							
							
								Lanny91 
							
						 
					 
					
						
						
							
						
						0acbf77bc6 
					 
					
						
						
							
							Add QPX Div structure  
						
						
						
						
					 
					
						2016-11-24 13:24:12 +00:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						a2cffb0304 
					 
					
						
						
							
							AVXFMA target fixed  
						
						
						
						
					 
					
						2016-11-21 17:47:18 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						97cddda49e 
					 
					
						
						
							
							Merge branch 'feature/gen-simd' into feature/doxygen  
						
						... 
						
						
						
						# Conflicts:
#	Makefile.am
#	configure.ac 
						
						
					 
					
						2016-11-19 13:11:13 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						b873504b90 
					 
					
						
						
							
							fully generic SIMD  
						
						
						
						
					 
					
						2016-11-19 01:32:39 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						042ae5b87c 
					 
					
						
						
							
							generic 256bits SIMD  
						
						
						
						
					 
					
						2016-11-15 12:16:15 +00:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						f7b60004f3 
					 
					
						
						
							
							Merge branch 'develop' into release/v0.6.0  
						
						
						
						
					 
					
						2016-11-04 16:08:07 +00:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						d5e95bc350 
					 
					
						
						
							
							Merge branch 'release/v0.6.0' into feature/feynman-rules  
						
						
						
						
					 
					
						2016-10-31 18:36:21 +00:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						e1042aef77 
					 
					
						
						
							
							First version of the doube prec for testing purposes  
						
						... 
						
						
						
						It does not compile single and double version at the same time 
						
						
					 
					
						2016-10-28 17:20:04 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						aa6a839c60 
					 
					
						
						
							
							avx512 build fix; detect clang/gcc intrinsics  vs. ICPC  
						
						
						
						
					 
					
						2016-10-28 09:13:09 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						ca21003f01 
					 
					
						
						
							
							Merge branch 'feature/fft-opt' into feature/feynman-rules  
						
						... 
						
						
						
						# Conflicts:
#	lib/FFT.h
#	lib/qcd/action/fermion/WilsonFermion5D.h
#	tests/core/Test_fft.cc 
						
						
					 
					
						2016-10-26 18:44:47 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						460d0753a1 
					 
					
						
						
							
							Merge branch 'develop' into feature/mpi3  
						
						... 
						
						
						
						Conflicts:
	lib/simd/Grid_avx512.h 
						
						
					 
					
						2016-10-25 01:08:51 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						75ebd3a0d1 
					 
					
						
						
							
							Typo fixes and rotate for CLANG  
						
						
						
						
					 
					
						2016-10-21 22:34:29 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						bd6a228af6 
					 
					
						
						
							
							Merge commit '20a091c3eddfdb67a82ece6413740a93650a2f98' into feature/feynman-rules  
						
						
						
						
					 
					
						2016-10-21 13:10:30 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						20a091c3ed 
					 
					
						
						
							
							Intel vs. Clang intrinsics differences absorbed  
						
						
						
						
					 
					
						2016-10-21 09:08:36 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						997fd882ff 
					 
					
						
						
							
							Merge branch 'develop' into feature/feynman-rules  
						
						... 
						
						
						
						# Conflicts:
#	lib/Threads.h
#	lib/qcd/action/fermion/WilsonFermion.cc
#	lib/qcd/action/fermion/WilsonFermion.h
#	lib/qcd/utils/SUn.h
#	lib/simd/Grid_avx.h
#	lib/simd/Intel512common.h 
						
						
					 
					
						2016-10-19 18:35:18 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						811ca45473 
					 
					
						
						
							
							GNU clang hack for AVX512 since there are missing reduce intrinsics in Clang 3.9 and GCC-6 AVX512 support  
						
						
						
						
					 
					
						2016-10-17 16:23:21 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						81f2aeaece 
					 
					
						
						
							
							KNL streaming stores, and KNL performance coutners  
						
						
						
						
					 
					
						2016-10-12 11:45:22 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						6f408256bc 
					 
					
						
						
							
							FMA4 option moved on the align  
						
						
						
						
					 
					
						2016-10-11 10:03:01 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						8d11681aac 
					 
					
						
						
							
							verbose remove  
						
						
						
						
					 
					
						2016-10-10 23:50:42 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						3d5c9a1ee9 
					 
					
						
						
							
							No compile fix on clang++ 3.9  
						
						
						
						
					 
					
						2016-10-10 23:50:13 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						611b5d74ba 
					 
					
						
						
							
							Fix for AVX+FMA3 compilation  
						
						
						
						
					 
					
						2016-10-10 15:26:17 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						cb02b7088f 
					 
					
						
						
							
							Merge branch 'develop' into feature/doxygen  
						
						... 
						
						
						
						# Conflicts:
#	configure.ac 
						
						
					 
					
						2016-10-09 13:35:44 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						87acd06990 
					 
					
						
						
							
							Use streaming stores  
						
						
						
						
					 
					
						2016-09-26 10:11:34 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						836e929565 
					 
					
						
						
							
							Divide handling improved  
						
						
						
						
					 
					
						2016-09-26 09:42:22 +01:00 
						 
				 
			
				
					
						
							
							
								Antonin Portelli 
							
						 
					 
					
						
						
							
						
						0724f7af75 
					 
					
						
						
							
							QPX single precision implementation  
						
						
						
						
					 
					
						2016-09-19 18:09:12 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						4d11a6f5f2 
					 
					
						
						
							
							first commit for QPX intrinsics  
						
						
						
						
					 
					
						2016-08-23 14:41:44 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						17097a93ec 
					 
					
						
						
							
							FFTW test ran over 4 mpi processes.  
						
						
						
						
					 
					
						2016-08-17 01:33:55 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						b1cfb4d661 
					 
					
						
						
							
							first try at a nicer Doxygen implementation  
						
						
						
						
					 
					
						2016-08-05 15:29:18 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						93d29bb699 
					 
					
						
						
							
							build system improvements after discussion with Peter  
						
						
						
						
					 
					
						2016-08-04 16:19:59 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						e9f30cab2c 
					 
					
						
						
							
							first working version for the new build system  
						
						
						
						
					 
					
						2016-07-30 17:53:18 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						4908b77d46 
					 
					
						
						
							
							Fixed conflicts. PLEASE avoid making wholesale cosmetic only changes, this created  
						
						... 
						
						
						
						a HUGE amount of difficult to resolve and understand conflicts .
Wholesale formatting, reordering functions etc... in a central file like Tensor_class
or Grid_vector_types while others are also editing without making substantial functionality
changes creates pain. 
						
						
					 
					
						2016-07-15 20:59:07 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						f4dd5062d7 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/paboyle/Grid  into develop  
						
						
						
						
					 
					
						2016-07-15 19:26:06 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						8f47d0b5ab 
					 
					
						
						
							
							Rotation needed for hopping term in fifth dim with Ls vectorised fields  
						
						
						
						
					 
					
						2016-07-14 23:45:36 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a0676beeb1 
					 
					
						
						
							
							Open up dependency on Eigen and FFTW  
						
						
						
						
					 
					
						2016-07-07 22:31:07 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						e3d5319470 
					 
					
						
						
							
							Debugged the real() and imag() functions and added tests to Test_Simd  
						
						
						
						
					 
					
						2016-07-06 14:16:03 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						fdfbf11c6d 
					 
					
						
						
							
							Merge branch 'develop' into temporary-smearing  
						
						
						
						
					 
					
						2016-07-04 18:45:10 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						9cb90f714e 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop' into temporary-smearing  
						
						
						
						
					 
					
						2016-07-04 17:28:40 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						1a6d65c6a4 
					 
					
						
						
							
							Converted set_uw and set_fj to all complex functions  
						
						
						
						
					 
					
						2016-07-03 10:27:43 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						bdaa5b1767 
					 
					
						
						
							
							Updated to have perfect prefetching for the s-vectorised kernel with any cache blocking.  
						
						
						
						
					 
					
						2016-06-30 14:35:02 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						8fcefc021a 
					 
					
						
						
							
							Improved the prefetching when using cache blocking codes  
						
						
						
						
					 
					
						2016-06-30 14:35:02 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						1445189361 
					 
					
						
						
							
							COntrol the prefetch strategy  
						
						
						
						
					 
					
						2016-06-30 14:35:02 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a25bec87d9 
					 
					
						
						
							
							Prefetch during save  
						
						
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						2d8bb4c594 
					 
					
						
						
							
							Tweaks  
						
						
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						6d58cb2a68 
					 
					
						
						
							
							Enable reordering of the loops in the assembler for cache friendly.  
						
						... 
						
						
						
						This gets in the way of L2 prefetching however. Do next next link in stencil
prefetching. 
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						87418e7df1 
					 
					
						
						
							
							Slightly faster prefetching perf.  
						
						
						
						
					 
					
						2016-06-13 02:32:52 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						55f65b81b5 
					 
					
						
						
							
							Improvements to the assembler interface that let us move chunks of the  
						
						... 
						
						
						
						site and s loop into the kernels. This will save on function call overhead and
guarantee L2 prefetching strategy is right since OMP can't distribute the
sub-chunks of work. 
						
						
					 
					
						2016-06-09 01:12:36 -07:00 
						 
				 
			
				
					
						
							
							
								Azusa Yamaguchi 
							
						 
					 
					
						
						
							
						
						d9408893b3 
					 
					
						
						
							
							Prefetching in the normal kernel implementation.  
						
						
						
						
					 
					
						2016-06-08 05:43:48 -07:00