1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-26 05:35:55 +01:00

NAMESPACE & format

This commit is contained in:
paboyle 2018-01-12 18:05:36 +00:00
parent 08682c5461
commit fbc2380cb8

View File

@ -29,9 +29,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#include <immintrin.h> #include <immintrin.h>
NAMESPACE_BEGIN(Grid);
namespace Grid{ NAMESPACE_BEGIN(Optimization);
namespace Optimization {
union u512f { union u512f {
__m512 v; __m512 v;
@ -82,7 +81,6 @@ namespace Optimization {
}; };
struct Vstream{ struct Vstream{
//Float //Float
inline void operator()(float * a, __m512 b){ inline void operator()(float * a, __m512 b){
@ -97,8 +95,6 @@ namespace Optimization {
}; };
struct Vset{ struct Vset{
// Complex float // Complex float
inline __m512 operator()(Grid::ComplexF *a){ inline __m512 operator()(Grid::ComplexF *a){
@ -127,7 +123,6 @@ namespace Optimization {
a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
} }
}; };
template <typename Out_type, typename In_type> template <typename Out_type, typename In_type>
@ -142,8 +137,6 @@ namespace Optimization {
}; };
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Arithmetic operations // Arithmetic operations
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
@ -308,8 +301,6 @@ namespace Optimization {
}; };
// Gpermute utilities consider coalescing into 1 Gpermute // Gpermute utilities consider coalescing into 1 Gpermute
struct Permute{ struct Permute{
@ -354,6 +345,7 @@ namespace Optimization {
#endif #endif
return h; return h;
} }
static inline void HtoS (__m512i h,__m512 &sa,__m512 &sb) { static inline void HtoS (__m512i h,__m512 &sa,__m512 &sb) {
#ifdef USE_FP16 #ifdef USE_FP16
sa = _mm512_cvtph_ps((__m256i)_mm512_extractf64x4_pd((__m512d)h,0)); sa = _mm512_cvtph_ps((__m256i)_mm512_extractf64x4_pd((__m512d)h,0));
@ -362,6 +354,7 @@ namespace Optimization {
assert(0); assert(0);
#endif #endif
} }
static inline __m512 DtoS (__m512d a,__m512d b) { static inline __m512 DtoS (__m512d a,__m512d b) {
__m256 sa = _mm512_cvtpd_ps(a); __m256 sa = _mm512_cvtpd_ps(a);
__m256 sb = _mm512_cvtpd_ps(b); __m256 sb = _mm512_cvtpd_ps(b);
@ -369,16 +362,19 @@ namespace Optimization {
s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1); s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1);
return s; return s;
} }
static inline void StoD (__m512 s,__m512d &a,__m512d &b) { static inline void StoD (__m512 s,__m512d &a,__m512d &b) {
a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0)); a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0));
b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1)); b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1));
} }
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) { static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
__m512 sa,sb; __m512 sa,sb;
sa = DtoS(a,b); sa = DtoS(a,b);
sb = DtoS(c,d); sb = DtoS(c,d);
return StoH(sa,sb); return StoH(sa,sb);
} }
static inline void HtoD (__m512i h,__m512d &a,__m512d &b,__m512d &c,__m512d &d) { static inline void HtoD (__m512i h,__m512d &a,__m512d &b,__m512d &c,__m512d &d) {
__m512 sa,sb; __m512 sa,sb;
HtoS(h,sa,sb); HtoS(h,sa,sb);
@ -517,7 +513,6 @@ namespace Optimization {
return conv.f[0]; return conv.f[0];
} }
//Complex double Reduce //Complex double Reduce
template<> template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){ inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
@ -593,12 +588,11 @@ namespace Optimization {
#endif #endif
} NAMESPACE_END(Optimization);
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
typedef __m512i SIMD_Htype; // Single precision type typedef __m512i SIMD_Htype; // Single precision type
typedef __m512 SIMD_Ftype; // Single precision type typedef __m512 SIMD_Ftype; // Single precision type
typedef __m512d SIMD_Dtype; // Double precision type typedef __m512d SIMD_Dtype; // Double precision type
@ -615,8 +609,6 @@ namespace Optimization {
_mm_prefetch(ptr,_MM_HINT_T0); _mm_prefetch(ptr,_MM_HINT_T0);
} }
// Function name aliases // Function name aliases
typedef Optimization::Vsplat VsplatSIMD; typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD; typedef Optimization::Vstore VstoreSIMD;
@ -624,7 +616,6 @@ namespace Optimization {
typedef Optimization::Vstream VstreamSIMD; typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>; template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations // Arithmetic operations
typedef Optimization::Sum SumSIMD; typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Sub SubSIMD;
@ -637,4 +628,4 @@ namespace Optimization {
typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD; typedef Optimization::TimesI TimesISIMD;
} NAMESPACE_END(Grid);