1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-11 14:40:46 +01:00

Inline assembly hooks for AVX 512. Better way in some ways than BAGEL to generate assembly.

Updated Grid_avx512.h
This commit is contained in:
paboyle 2015-11-04 03:09:06 -08:00
parent 1878bf97d0
commit a38762159c
2 changed files with 1126 additions and 7 deletions

1112
lib/simd/Avx512Asm.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -8,10 +8,7 @@
//---------------------------------------------------------------------- //----------------------------------------------------------------------
#include <immintrin.h> #include <immintrin.h>
#ifndef KNC_ONLY_STORES
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
#endif
namespace Optimization { namespace Optimization {
@ -59,14 +56,15 @@ namespace Optimization {
struct Vstream{ struct Vstream{
//Float //Float
inline void operator()(float * a, __m512 b){ inline void operator()(float * a, __m512 b){
_mm512_storenrngo_ps(a,b); //_mm512_stream_ps(a,b);
_mm512_store_ps(a,b);
} }
//Double //Double
inline void operator()(double * a, __m512d b){ inline void operator()(double * a, __m512d b){
_mm512_storenrngo_pd(a,b); //_mm512_stream_pd(a,b);
_mm512_store_pd(a,b);
} }
}; };
@ -180,6 +178,15 @@ namespace Optimization {
}; };
struct Mult{ struct Mult{
inline void mac(__m512 &a, __m512 b, __m512 c){
a= _mm512_fmadd_ps( b, c, a);
}
inline void mac(__m512d &a, __m512d b, __m512d c){
a= _mm512_fmadd_pd( b, c, a);
}
// Real float // Real float
inline __m512 operator()(__m512 a, __m512 b){ inline __m512 operator()(__m512 a, __m512 b){
return _mm512_mul_ps(a,b); return _mm512_mul_ps(a,b);