nmeyer-ur 
							
						 
					 
					
						
						
							
						
						046b1cbbc0 
					 
					
						
						
							
							enable fcmla in tensor arithmetics; fixed-size works, VLA does not compile  
						
						
						
						
					 
					
						2020-05-21 19:39:07 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						a65ce237c1 
					 
					
						
						
							
							clean up; Exch1 VLA sp+dp integrate, tested, working  
						
						
						
						
					 
					
						2020-05-21 09:48:06 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						cd27f1005d 
					 
					
						
						
							
							clean up; Exch1 sp integrate, tested, working  
						
						
						
						
					 
					
						2020-05-21 08:45:43 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						f8c0a59221 
					 
					
						
						
							
							clean up; Exch1 dp integrate, tested, working  
						
						
						
						
					 
					
						2020-05-21 02:48:14 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						832485699f 
					 
					
						
						
							
							save some cycles in HtoD and DtoH by direct instead of multi-pass conversion  
						
						
						
						
					 
					
						2020-05-20 23:04:35 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						81484a4760 
					 
					
						
						
							
							symmetrize Mult and MultAddComplex  
						
						
						
						
					 
					
						2020-05-20 22:36:45 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						9a86059761 
					 
					
						
						
							
							symmetrize VLA and fixed size build messages  
						
						
						
						
					 
					
						2020-05-20 20:05:42 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						b780b7b7a0 
					 
					
						
						
							
							guard prevents multiple TOFU messages  
						
						
						
						
					 
					
						2020-05-20 19:20:59 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						9e085bd04e 
					 
					
						
						
							
							guard prevents multiple A64FX build messages  
						
						
						
						
					 
					
						2020-05-20 19:16:30 +02:00 
						 
				 
			
				
					
						
							
							
								ferben 
							
						 
					 
					
						
						
							
						
						6c6812a5ca 
					 
					
						
						
							
							GB/s output  
						
						
						
						
					 
					
						2020-05-20 12:26:57 +01:00 
						 
				 
			
				
					
						
							
							
								Christoph Lehner 
							
						 
					 
					
						
						
							
						
						8358ee38c4 
					 
					
						
						
							
							pull develop  
						
						
						
						
					 
					
						2020-05-19 08:56:18 -04:00 
						 
				 
			
				
					
						
							
							
								ferben 
							
						 
					 
					
						
						
							
						
						1f154fe652 
					 
					
						
						
							
							some cleanup in BaryonUtils  
						
						
						
						
					 
					
						2020-05-19 13:48:56 +01:00 
						 
				 
			
				
					
						
							
							
								ferben 
							
						 
					 
					
						
						
							
						
						d708c0258d 
					 
					
						
						
							
							some cleanup in BaryonUtils  
						
						
						
						
					 
					
						2020-05-19 13:48:00 +01:00 
						 
				 
			
				
					
						
							
							
								Christoph Lehner 
							
						 
					 
					
						
						
							
						
						a7635fd5ba 
					 
					
						
						
							
							summit mem  
						
						
						
						
					 
					
						2020-05-18 17:52:26 -04:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						6b6bf537d3 
					 
					
						
						
							
							comment out mac in vector types  
						
						
						
						
					 
					
						2020-05-18 20:36:16 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						323a651c71 
					 
					
						
						
							
							correct typo  
						
						
						
						
					 
					
						2020-05-18 19:58:27 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						9f212679f1 
					 
					
						
						
							
							support fcmla in vector_types, untested  
						
						
						
						
					 
					
						2020-05-18 19:55:18 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						032f7dde1a 
					 
					
						
						
							
							update SVE readme, asm generator  
						
						
						
						
					 
					
						2020-05-18 19:10:36 +02:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						ebb60330c9 
					 
					
						
						
							
							Automatic data motion options beginning  
						
						
						
						
					 
					
						2020-05-17 16:34:25 -04:00 
						 
				 
			
				
					
						
					 
					
						
						
							
						
						5aa60be17d 
					 
					
						
						
							
							SerialisableClassName method for serialisable enum, and boolean to test if a serialisable object is an enum  
						
						
						
						
					 
					
						2020-05-15 20:00:34 +01:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						50b1db1e8b 
					 
					
						
						
							
							implemented correct _m form (using 3 operands instead of 2)  
						
						
						
						
					 
					
						2020-05-15 10:01:05 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						015d8bb38a 
					 
					
						
						
							
							introduced assertions in Benchmark_wilson, removed data output from Benchmark_dwf  
						
						
						
						
					 
					
						2020-05-15 09:15:50 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						10a34312dc 
					 
					
						
						
							
							some fixed-size code clean up  
						
						
						
						
					 
					
						2020-05-14 23:20:16 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						db8c0e7584 
					 
					
						
						
							
							replaced _x form with _m form when using even/odd predication  
						
						
						
						
					 
					
						2020-05-14 23:17:35 +02:00 
						 
				 
			
				
					
						
							
							
								Christoph Lehner 
							
						 
					 
					
						
						
							
						
						32fbdf4fb1 
					 
					
						
						
							
							Merge pull request  #5  from paboyle/develop  
						
						... 
						
						
						
						Sync upstream 
						
						
					 
					
						2020-05-13 09:02:56 +02:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						a9847aa866 
					 
					
						
						
							
							Dependence fix  
						
						
						
						
					 
					
						2020-05-12 20:03:37 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						2e652431e5 
					 
					
						
						
							
							No compile on summiit fix  
						
						
						
						
					 
					
						2020-05-12 18:56:47 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						8b5b55b682 
					 
					
						
						
							
							Make tests all compile ccurrent Grid, mostly MdagM removal of norms fixes but a few minor  
						
						... 
						
						
						
						issues fiixed too 
						
						
					 
					
						2020-05-12 17:57:24 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						0e3c49f687 
					 
					
						
						
							
							TransposeIndex was broken by Christoph  
						
						
						
						
					 
					
						2020-05-12 17:57:01 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						cb7ee37562 
					 
					
						
						
							
							Close expressions in arg to cshift  
						
						
						
						
					 
					
						2020-05-12 17:56:40 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						82f71643a4 
					 
					
						
						
							
							Remove the norm in MdagM  
						
						
						
						
					 
					
						2020-05-12 17:55:53 -04:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						d15ccad8a7 
					 
					
						
						
							
							switched to vec* in Reduce  
						
						
						
						
					 
					
						2020-05-12 20:41:14 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						0009b5cee8 
					 
					
						
						
							
							updated SVE_README  
						
						
						
						
					 
					
						2020-05-12 19:02:33 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						20d1941a45 
					 
					
						
						
							
							enabled asm kernels for fixed-size A64FXFIXEDSIZE  
						
						
						
						
					 
					
						2020-05-12 19:01:12 +02:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						d24d8e8398 
					 
					
						
						
							
							Use X-direction as more bits meaningful on CUDA.  
						
						... 
						
						
						
						2^31-1 shoulddd always bee enough for SIMD and thread reduced local volume
e.g. 32*2^31 = 2^36 = (2^9)^4 or 512^4 ias big enough.
Where 32 is gpu_threads * Nsimd = 8*4 
						
						
					 
					
						2020-05-12 10:35:49 -04:00 
						 
				 
			
				
					
						
							
							
								Christoph Lehner 
							
						 
					 
					
						
						
							
						
						162e4bb567 
					 
					
						
						
							
							no automatic prefetching for now  
						
						
						
						
					 
					
						2020-05-12 07:01:23 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						07c0c02f8c 
					 
					
						
						
							
							Speed up Cshift  
						
						
						
						
					 
					
						2020-05-11 17:02:01 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						8c31c065b5 
					 
					
						
						
							
							Keep the Vector fixed to protect it from realloc  
						
						
						
						
					 
					
						2020-05-11 17:00:30 -04:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						b7c76ede29 
					 
					
						
						
							
							Removed some assertions in Test_simd and removed exit() in Reduce  
						
						
						
						
					 
					
						2020-05-11 22:43:00 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						05edf803bd 
					 
					
						
						
							
							corrected typo  
						
						
						
						
					 
					
						2020-05-12 03:59:59 +09:00 
						 
				 
			
				
					
						
							
							
								Christoph Lehner 
							
						 
					 
					
						
						
							
						
						b1c86900b2 
					 
					
						
						
							
							Merge pull request  #4  from paboyle/develop  
						
						... 
						
						
						
						merge 
						
						
					 
					
						2020-05-11 20:59:29 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						78b8e40f83 
					 
					
						
						
							
							switched to gcc's internal data types  
						
						
						
						
					 
					
						2020-05-11 18:11:23 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						fc2e9850d3 
					 
					
						
						
							
							temporarily enable TOFU by default when using A64FX or A64FXFIXEDSIZE  
						
						
						
						
					 
					
						2020-05-11 13:25:02 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						ffaaed679e 
					 
					
						
						
							
							MPI_THREAD_SINGLE hack for Fugaku, enabled by -DTOFU  
						
						
						
						
					 
					
						2020-05-11 13:21:39 +02:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						bbbee5660d 
					 
					
						
						
							
							First compiile on HiP  
						
						
						
						
					 
					
						2020-05-10 05:28:09 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						ea08f193e7 
					 
					
						
						
							
							Allocator cache spliit into large/small pools  
						
						
						
						
					 
					
						2020-05-10 05:24:26 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						2bb2c68e15 
					 
					
						
						
							
							Separate pools for small and large allocations cache  
						
						
						
						
					 
					
						2020-05-09 22:57:21 -04:00 
						 
				 
			
				
					
						
							
							
								Peter Boyle 
							
						 
					 
					
						
						
							
						
						efe5bc6a3c 
					 
					
						
						
							
							Split allocator cache into two pools of different sizes  
						
						
						
						
					 
					
						2020-05-09 22:27:56 -04:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						b2fd8b993a 
					 
					
						
						
							
							fixed-size clean up  
						
						
						
						
					 
					
						2020-05-09 22:53:42 +02:00 
						 
				 
			
				
					
						
							
							
								nmeyer-ur 
							
						 
					 
					
						
						
							
						
						291ee8c3d0 
					 
					
						
						
							
							updated fixed-size implementation; only Exch1 and prefetches missing  
						
						
						
						
					 
					
						2020-05-09 22:18:02 +02:00