1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Inline assembly hooks for AVX 512. Better way in some ways than BAGEL to generate assembly.

Updated Grid_avx512.h
This commit is contained in:
paboyle 2015-11-04 03:09:06 -08:00
parent 1878bf97d0
commit a38762159c
2 changed files with 1126 additions and 7 deletions

1112
lib/simd/Avx512Asm.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -8,10 +8,7 @@
//----------------------------------------------------------------------
#include <immintrin.h>
#ifndef KNC_ONLY_STORES
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
#endif
namespace Optimization {
@ -59,14 +56,15 @@ namespace Optimization {
struct Vstream{
//Float
inline void operator()(float * a, __m512 b){
_mm512_storenrngo_ps(a,b);
//_mm512_stream_ps(a,b);
_mm512_store_ps(a,b);
}
//Double
inline void operator()(double * a, __m512d b){
_mm512_storenrngo_pd(a,b);
//_mm512_stream_pd(a,b);
_mm512_store_pd(a,b);
}
};
@ -180,6 +178,15 @@ namespace Optimization {
};
struct Mult{
inline void mac(__m512 &a, __m512 b, __m512 c){
a= _mm512_fmadd_ps( b, c, a);
}
inline void mac(__m512d &a, __m512d b, __m512d c){
a= _mm512_fmadd_pd( b, c, a);
}
// Real float
inline __m512 operator()(__m512 a, __m512 b){
return _mm512_mul_ps(a,b);