mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-30 11:34:32 +00:00 
			
		
		
		
	Inline assembly hooks for AVX 512. Better way in some ways than BAGEL to generate assembly.
Updated Grid_avx512.h
This commit is contained in:
		
							
								
								
									
										1112
									
								
								lib/simd/Avx512Asm.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1112
									
								
								lib/simd/Avx512Asm.h
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -8,10 +8,7 @@ | ||||
| //---------------------------------------------------------------------- | ||||
|  | ||||
| #include <immintrin.h> | ||||
| #ifndef KNC_ONLY_STORES | ||||
| #define  _mm512_storenrngo_ps _mm512_store_ps  // not present in AVX512 | ||||
| #define  _mm512_storenrngo_pd _mm512_store_pd  // not present in AVX512 | ||||
| #endif | ||||
|  | ||||
|  | ||||
|  | ||||
| namespace Optimization { | ||||
| @@ -59,14 +56,15 @@ namespace Optimization { | ||||
|   struct Vstream{ | ||||
|     //Float | ||||
|     inline void operator()(float * a, __m512 b){ | ||||
|       _mm512_storenrngo_ps(a,b); | ||||
|       //_mm512_stream_ps(a,b); | ||||
|       _mm512_store_ps(a,b); | ||||
|     } | ||||
|     //Double | ||||
|     inline void operator()(double * a, __m512d b){ | ||||
|       _mm512_storenrngo_pd(a,b); | ||||
|       //_mm512_stream_pd(a,b); | ||||
|       _mm512_store_pd(a,b); | ||||
|     } | ||||
|  | ||||
|  | ||||
|   }; | ||||
|  | ||||
|  | ||||
| @@ -180,6 +178,15 @@ namespace Optimization { | ||||
|   }; | ||||
|    | ||||
|   struct Mult{ | ||||
|  | ||||
|     inline void mac(__m512 &a, __m512 b, __m512 c){          | ||||
|        a= _mm512_fmadd_ps( b, c, a);                          | ||||
|     } | ||||
|  | ||||
|     inline void mac(__m512d &a, __m512d b, __m512d c){ | ||||
|       a= _mm512_fmadd_pd( b, c, a);                    | ||||
|     }                                              | ||||
|  | ||||
|     // Real float | ||||
|     inline __m512 operator()(__m512 a, __m512 b){ | ||||
|       return _mm512_mul_ps(a,b); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user