d5e95bc350 
					 
					
						
						
							
							Merge branch 'release/v0.6.0' into feature/feynman-rules  
						
						
						
						
					 
					
						2016-10-31 18:36:21 +00:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						e1042aef77 
					 
					
						
						
							
							First version of the doube prec for testing purposes  
						
						... 
						
						
						
						It does not compile single and double version at the same time 
						
						
					 
					
						2016-10-28 17:20:04 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						aa6a839c60 
					 
					
						
						
							
							avx512 build fix; detect clang/gcc intrinsics  vs. ICPC  
						
						
						
						
					 
					
						2016-10-28 09:13:09 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						ca21003f01 
					 
					
						
						
							
							Merge branch 'feature/fft-opt' into feature/feynman-rules  
						
						... 
						
						
						
						# Conflicts:
#	lib/FFT.h
#	lib/qcd/action/fermion/WilsonFermion5D.h
#	tests/core/Test_fft.cc 
						
						
					 
					
						2016-10-26 18:44:47 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						460d0753a1 
					 
					
						
						
							
							Merge branch 'develop' into feature/mpi3  
						
						... 
						
						
						
						Conflicts:
	lib/simd/Grid_avx512.h 
						
						
					 
					
						2016-10-25 01:08:51 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						75ebd3a0d1 
					 
					
						
						
							
							Typo fixes and rotate for CLANG  
						
						
						
						
					 
					
						2016-10-21 22:34:29 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						bd6a228af6 
					 
					
						
						
							
							Merge commit '20a091c3eddfdb67a82ece6413740a93650a2f98' into feature/feynman-rules  
						
						
						
						
					 
					
						2016-10-21 13:10:30 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						20a091c3ed 
					 
					
						
						
							
							Intel vs. Clang intrinsics differences absorbed  
						
						
						
						
					 
					
						2016-10-21 09:08:36 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						997fd882ff 
					 
					
						
						
							
							Merge branch 'develop' into feature/feynman-rules  
						
						... 
						
						
						
						# Conflicts:
#	lib/Threads.h
#	lib/qcd/action/fermion/WilsonFermion.cc
#	lib/qcd/action/fermion/WilsonFermion.h
#	lib/qcd/utils/SUn.h
#	lib/simd/Grid_avx.h
#	lib/simd/Intel512common.h 
						
						
					 
					
						2016-10-19 18:35:18 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						811ca45473 
					 
					
						
						
							
							GNU clang hack for AVX512 since there are missing reduce intrinsics in Clang 3.9 and GCC-6 AVX512 support  
						
						
						
						
					 
					
						2016-10-17 16:23:21 +01:00 
						 
				 
			
				
					
						
							
							
								azusayamaguchi 
							
						 
					 
					
						
						
							
						
						81f2aeaece 
					 
					
						
						
							
							KNL streaming stores, and KNL performance coutners  
						
						
						
						
					 
					
						2016-10-12 11:45:22 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						6f408256bc 
					 
					
						
						
							
							FMA4 option moved on the align  
						
						
						
						
					 
					
						2016-10-11 10:03:01 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						8d11681aac 
					 
					
						
						
							
							verbose remove  
						
						
						
						
					 
					
						2016-10-10 23:50:42 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						3d5c9a1ee9 
					 
					
						
						
							
							No compile fix on clang++ 3.9  
						
						
						
						
					 
					
						2016-10-10 23:50:13 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						611b5d74ba 
					 
					
						
						
							
							Fix for AVX+FMA3 compilation  
						
						
						
						
					 
					
						2016-10-10 15:26:17 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						cb02b7088f 
					 
					
						
						
							
							Merge branch 'develop' into feature/doxygen  
						
						... 
						
						
						
						# Conflicts:
#	configure.ac 
						
						
					 
					
						2016-10-09 13:35:44 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						87acd06990 
					 
					
						
						
							
							Use streaming stores  
						
						
						
						
					 
					
						2016-09-26 10:11:34 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						836e929565 
					 
					
						
						
							
							Divide handling improved  
						
						
						
						
					 
					
						2016-09-26 09:42:22 +01:00 
						 
				 
			
				
					
						
							
							
								Antonin Portelli 
							
						 
					 
					
						
						
							
						
						0724f7af75 
					 
					
						
						
							
							QPX single precision implementation  
						
						
						
						
					 
					
						2016-09-19 18:09:12 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						4d11a6f5f2 
					 
					
						
						
							
							first commit for QPX intrinsics  
						
						
						
						
					 
					
						2016-08-23 14:41:44 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						17097a93ec 
					 
					
						
						
							
							FFTW test ran over 4 mpi processes.  
						
						
						
						
					 
					
						2016-08-17 01:33:55 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						b1cfb4d661 
					 
					
						
						
							
							first try at a nicer Doxygen implementation  
						
						
						
						
					 
					
						2016-08-05 15:29:18 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						93d29bb699 
					 
					
						
						
							
							build system improvements after discussion with Peter  
						
						
						
						
					 
					
						2016-08-04 16:19:59 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						e9f30cab2c 
					 
					
						
						
							
							first working version for the new build system  
						
						
						
						
					 
					
						2016-07-30 17:53:18 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						4908b77d46 
					 
					
						
						
							
							Fixed conflicts. PLEASE avoid making wholesale cosmetic only changes, this created  
						
						... 
						
						
						
						a HUGE amount of difficult to resolve and understand conflicts .
Wholesale formatting, reordering functions etc... in a central file like Tensor_class
or Grid_vector_types while others are also editing without making substantial functionality
changes creates pain. 
						
						
					 
					
						2016-07-15 20:59:07 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						f4dd5062d7 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/paboyle/Grid  into develop  
						
						
						
						
					 
					
						2016-07-15 19:26:06 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						8f47d0b5ab 
					 
					
						
						
							
							Rotation needed for hopping term in fifth dim with Ls vectorised fields  
						
						
						
						
					 
					
						2016-07-14 23:45:36 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a0676beeb1 
					 
					
						
						
							
							Open up dependency on Eigen and FFTW  
						
						
						
						
					 
					
						2016-07-07 22:31:07 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						e3d5319470 
					 
					
						
						
							
							Debugged the real() and imag() functions and added tests to Test_Simd  
						
						
						
						
					 
					
						2016-07-06 14:16:03 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						fdfbf11c6d 
					 
					
						
						
							
							Merge branch 'develop' into temporary-smearing  
						
						
						
						
					 
					
						2016-07-04 18:45:10 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						9cb90f714e 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop' into temporary-smearing  
						
						
						
						
					 
					
						2016-07-04 17:28:40 +01:00 
						 
				 
			
				
					
						
							
							
								Guido Cossu 
							
						 
					 
					
						
						
							
						
						1a6d65c6a4 
					 
					
						
						
							
							Converted set_uw and set_fj to all complex functions  
						
						
						
						
					 
					
						2016-07-03 10:27:43 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						bdaa5b1767 
					 
					
						
						
							
							Updated to have perfect prefetching for the s-vectorised kernel with any cache blocking.  
						
						
						
						
					 
					
						2016-06-30 14:35:02 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						8fcefc021a 
					 
					
						
						
							
							Improved the prefetching when using cache blocking codes  
						
						
						
						
					 
					
						2016-06-30 14:35:02 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						1445189361 
					 
					
						
						
							
							COntrol the prefetch strategy  
						
						
						
						
					 
					
						2016-06-30 14:35:02 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a25bec87d9 
					 
					
						
						
							
							Prefetch during save  
						
						
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						2d8bb4c594 
					 
					
						
						
							
							Tweaks  
						
						
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						6d58cb2a68 
					 
					
						
						
							
							Enable reordering of the loops in the assembler for cache friendly.  
						
						... 
						
						
						
						This gets in the way of L2 prefetching however. Do next next link in stencil
prefetching. 
						
						
					 
					
						2016-06-30 14:35:01 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						87418e7df1 
					 
					
						
						
							
							Slightly faster prefetching perf.  
						
						
						
						
					 
					
						2016-06-13 02:32:52 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						55f65b81b5 
					 
					
						
						
							
							Improvements to the assembler interface that let us move chunks of the  
						
						... 
						
						
						
						site and s loop into the kernels. This will save on function call overhead and
guarantee L2 prefetching strategy is right since OMP can't distribute the
sub-chunks of work. 
						
						
					 
					
						2016-06-09 01:12:36 -07:00 
						 
				 
			
				
					
						
							
							
								Azusa Yamaguchi 
							
						 
					 
					
						
						
							
						
						d9408893b3 
					 
					
						
						
							
							Prefetching in the normal kernel implementation.  
						
						
						
						
					 
					
						2016-06-08 05:43:48 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						139cc5f1ae 
					 
					
						
						
							
							Large change with KNL preparation  
						
						
						
						
					 
					
						2016-06-03 03:24:26 -07:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						9d5f693cbe 
					 
					
						
						
							
							empty SIMD fix  
						
						
						
						
					 
					
						2016-05-24 10:56:27 +01:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						91e04056f9 
					 
					
						
						
							
							fix of the empty SIMD  
						
						
						
						
					 
					
						2016-05-12 19:24:10 +01:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						c23375cd65 
					 
					
						
						
							
							Testing travis CI integration  
						
						
						
						
					 
					
						2016-04-30 06:30:56 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						c79ea0dcef 
					 
					
						
						
							
							Fixingn IMCI  
						
						
						
						
					 
					
						2016-04-22 21:52:54 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						e3f141f82f 
					 
					
						
						
							
							Fixed SSE compile with typecasts  
						
						
						
						
					 
					
						2016-04-22 10:30:30 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						a6dfa2386b 
					 
					
						
						
							
							GCC choked on intrinsics calls that ICPC did not  
						
						
						
						
					 
					
						2016-04-22 06:33:41 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						587f80cd93 
					 
					
						
						
							
							Updated to compile and pass under intel SDE  
						
						
						
						
					 
					
						2016-04-19 15:13:54 -07:00 
						 
				 
			
				
					
						
							
							
								paboyle 
							
						 
					 
					
						
						
							
						
						528eb773ad 
					 
					
						
						
							
							Merged.  
						
						... 
						
						
						
						Merge branch 'master' of https://github.com/paboyle/Grid  
						
						
					 
					
						2016-04-19 22:24:34 +01:00