mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-26 05:35:55 +01:00
NAMESPACE & format
This commit is contained in:
parent
08682c5461
commit
fbc2380cb8
@ -29,9 +29,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
namespace Grid{
|
NAMESPACE_BEGIN(Optimization);
|
||||||
namespace Optimization {
|
|
||||||
|
|
||||||
union u512f {
|
union u512f {
|
||||||
__m512 v;
|
__m512 v;
|
||||||
@ -82,7 +81,6 @@ namespace Optimization {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Vstream{
|
struct Vstream{
|
||||||
//Float
|
//Float
|
||||||
inline void operator()(float * a, __m512 b){
|
inline void operator()(float * a, __m512 b){
|
||||||
@ -97,8 +95,6 @@ namespace Optimization {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct Vset{
|
struct Vset{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline __m512 operator()(Grid::ComplexF *a){
|
inline __m512 operator()(Grid::ComplexF *a){
|
||||||
@ -127,7 +123,6 @@ namespace Optimization {
|
|||||||
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Out_type, typename In_type>
|
template <typename Out_type, typename In_type>
|
||||||
@ -142,8 +137,6 @@ namespace Optimization {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
@ -308,8 +301,6 @@ namespace Optimization {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Gpermute utilities consider coalescing into 1 Gpermute
|
// Gpermute utilities consider coalescing into 1 Gpermute
|
||||||
struct Permute{
|
struct Permute{
|
||||||
|
|
||||||
@ -354,6 +345,7 @@ namespace Optimization {
|
|||||||
#endif
|
#endif
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void HtoS (__m512i h,__m512 &sa,__m512 &sb) {
|
static inline void HtoS (__m512i h,__m512 &sa,__m512 &sb) {
|
||||||
#ifdef USE_FP16
|
#ifdef USE_FP16
|
||||||
sa = _mm512_cvtph_ps((__m256i)_mm512_extractf64x4_pd((__m512d)h,0));
|
sa = _mm512_cvtph_ps((__m256i)_mm512_extractf64x4_pd((__m512d)h,0));
|
||||||
@ -362,6 +354,7 @@ namespace Optimization {
|
|||||||
assert(0);
|
assert(0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __m512 DtoS (__m512d a,__m512d b) {
|
static inline __m512 DtoS (__m512d a,__m512d b) {
|
||||||
__m256 sa = _mm512_cvtpd_ps(a);
|
__m256 sa = _mm512_cvtpd_ps(a);
|
||||||
__m256 sb = _mm512_cvtpd_ps(b);
|
__m256 sb = _mm512_cvtpd_ps(b);
|
||||||
@ -369,16 +362,19 @@ namespace Optimization {
|
|||||||
s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1);
|
s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void StoD (__m512 s,__m512d &a,__m512d &b) {
|
static inline void StoD (__m512 s,__m512d &a,__m512d &b) {
|
||||||
a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0));
|
a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0));
|
||||||
b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1));
|
b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
|
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
|
||||||
__m512 sa,sb;
|
__m512 sa,sb;
|
||||||
sa = DtoS(a,b);
|
sa = DtoS(a,b);
|
||||||
sb = DtoS(c,d);
|
sb = DtoS(c,d);
|
||||||
return StoH(sa,sb);
|
return StoH(sa,sb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void HtoD (__m512i h,__m512d &a,__m512d &b,__m512d &c,__m512d &d) {
|
static inline void HtoD (__m512i h,__m512d &a,__m512d &b,__m512d &c,__m512d &d) {
|
||||||
__m512 sa,sb;
|
__m512 sa,sb;
|
||||||
HtoS(h,sa,sb);
|
HtoS(h,sa,sb);
|
||||||
@ -517,7 +513,6 @@ namespace Optimization {
|
|||||||
return conv.f[0];
|
return conv.f[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//Complex double Reduce
|
//Complex double Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
|
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
|
||||||
@ -593,12 +588,11 @@ namespace Optimization {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
}
|
NAMESPACE_END(Optimization);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Here assign types
|
// Here assign types
|
||||||
|
|
||||||
|
|
||||||
typedef __m512i SIMD_Htype; // Single precision type
|
typedef __m512i SIMD_Htype; // Single precision type
|
||||||
typedef __m512 SIMD_Ftype; // Single precision type
|
typedef __m512 SIMD_Ftype; // Single precision type
|
||||||
typedef __m512d SIMD_Dtype; // Double precision type
|
typedef __m512d SIMD_Dtype; // Double precision type
|
||||||
@ -615,8 +609,6 @@ namespace Optimization {
|
|||||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
_mm_prefetch(ptr,_MM_HINT_T0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Function name aliases
|
// Function name aliases
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
typedef Optimization::Vstore VstoreSIMD;
|
typedef Optimization::Vstore VstoreSIMD;
|
||||||
@ -624,7 +616,6 @@ namespace Optimization {
|
|||||||
typedef Optimization::Vstream VstreamSIMD;
|
typedef Optimization::Vstream VstreamSIMD;
|
||||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||||
|
|
||||||
|
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
typedef Optimization::Sum SumSIMD;
|
typedef Optimization::Sum SumSIMD;
|
||||||
typedef Optimization::Sub SubSIMD;
|
typedef Optimization::Sub SubSIMD;
|
||||||
@ -637,4 +628,4 @@ namespace Optimization {
|
|||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
}
|
NAMESPACE_END(Grid);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user