mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
Exchange in generic
Precision change in AVX, SSE, AVX512, Generic. QPX still to do.
This commit is contained in:
@ -377,8 +377,8 @@ namespace Optimization {
|
||||
b0 = _mm256_extractf128_si256(b,0);
|
||||
a1 = _mm256_extractf128_si256(a,1);
|
||||
b1 = _mm256_extractf128_si256(b,1);
|
||||
a0 = _mm_mul_epi32(a0,b0);
|
||||
a1 = _mm_mul_epi32(a1,b1);
|
||||
a0 = _mm_mullo_epi32(a0,b0);
|
||||
a1 = _mm_mullo_epi32(a1,b1);
|
||||
return _mm256_set_m128i(a1,a0);
|
||||
#endif
|
||||
#if defined (AVX2)
|
||||
@ -494,7 +494,7 @@ namespace Optimization {
|
||||
a = _mm256_cvtps_pd(_mm256_extractf128_ps(s,0));
|
||||
b = _mm256_cvtps_pd(_mm256_extractf128_ps(s,1));
|
||||
}
|
||||
static inline __m256 DtoH (__m256i a,__m256 b,__m256 c,__m256 d) {
|
||||
static inline __m256i DtoH (__m256d a,__m256d b,__m256d c,__m256d d) {
|
||||
__m256 sa,sb;
|
||||
sa = DtoS(a,b);
|
||||
sb = DtoS(c,d);
|
||||
|
@ -235,11 +235,9 @@ namespace Optimization {
|
||||
inline void mac(__m512 &a, __m512 b, __m512 c){
|
||||
a= _mm512_fmadd_ps( b, c, a);
|
||||
}
|
||||
|
||||
inline void mac(__m512d &a, __m512d b, __m512d c){
|
||||
a= _mm512_fmadd_pd( b, c, a);
|
||||
}
|
||||
|
||||
// Real float
|
||||
inline __m512 operator()(__m512 a, __m512 b){
|
||||
return _mm512_mul_ps(a,b);
|
||||
@ -366,7 +364,7 @@ namespace Optimization {
|
||||
a = _mm512_cvtps_pd(_mm512_extractf256_ps(s,0));
|
||||
b = _mm512_cvtps_pd(_mm512_extractf256_ps(s,1));
|
||||
}
|
||||
static inline __m512 DtoH (__m512i a,__m512 b,__m512 c,__m512 d) {
|
||||
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
|
||||
__m512 sa,sb;
|
||||
sa = DtoS(a,b);
|
||||
sb = DtoS(c,d);
|
||||
|
@ -279,6 +279,93 @@ namespace Optimization {
|
||||
|
||||
#undef timesi
|
||||
|
||||
struct PrecisionChange {
|
||||
static inline vech StoH (const vecf &a,const vecf &b) {
|
||||
vech ret;
|
||||
vech *ha = (vech *)&a;
|
||||
vech *hb = (vech *)&b;
|
||||
const int nf = W<float>::r;
|
||||
// VECTOR_FOR(i, nf,1){ ret.v[i] = ( (uint16_t *) &a.v[i])[1] ; }
|
||||
// VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; }
|
||||
VECTOR_FOR(i, nf,1){ ret.v[i] = ha->v[2*i+1]; }
|
||||
VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; }
|
||||
return ret;
|
||||
}
|
||||
static inline void HtoS (vech h,vecf &sa,vecf &sb) {
|
||||
const int nf = W<float>::r;
|
||||
const int nh = W<uint16_t>::r;
|
||||
vech *ha = (vech *)&sa;
|
||||
vech *hb = (vech *)&sb;
|
||||
VECTOR_FOR(i, nf, 1){ sb.v[i]= sa.v[i] = 0; }
|
||||
// VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sa.v[i]))[1] = h.v[i];}
|
||||
// VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sb.v[i]))[1] = h.v[i+nf];}
|
||||
VECTOR_FOR(i, nf, 1){ ha->v[2*i+1]=h.v[i]; }
|
||||
VECTOR_FOR(i, nf, 1){ hb->v[2*i+1]=h.v[i+nf]; }
|
||||
}
|
||||
static inline vecf DtoS (vecd a,vecd b) {
|
||||
const int nd = W<double>::r;
|
||||
const int nf = W<float>::r;
|
||||
vecf ret;
|
||||
VECTOR_FOR(i, nd,1){ ret.v[i] = a.v[i] ; }
|
||||
VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; }
|
||||
return ret;
|
||||
}
|
||||
static inline void StoD (vecf s,vecd &a,vecd &b) {
|
||||
const int nd = W<double>::r;
|
||||
VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; }
|
||||
VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; }
|
||||
}
|
||||
static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
|
||||
vecf sa,sb;
|
||||
sa = DtoS(a,b);
|
||||
sb = DtoS(c,d);
|
||||
return StoH(sa,sb);
|
||||
}
|
||||
static inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
|
||||
vecf sa,sb;
|
||||
HtoS(h,sa,sb);
|
||||
StoD(sa,a,b);
|
||||
StoD(sb,c,d);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Exchange support
|
||||
struct Exchange{
|
||||
|
||||
template <typename T,int n>
|
||||
static inline void ExchangeN(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||
const int w = W<T>::r;
|
||||
unsigned int mask = w >> (n + 1);
|
||||
// std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;
|
||||
VECTOR_FOR(i, w, 1) {
|
||||
int j1 = i&(~mask);
|
||||
if ( (i&mask) == 0 ) { out1.v[i]=in1.v[j1];}
|
||||
else { out1.v[i]=in2.v[j1];}
|
||||
int j2 = i|mask;
|
||||
if ( (i&mask) == 0 ) { out2.v[i]=in1.v[j2];}
|
||||
else { out2.v[i]=in2.v[j2];}
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
static inline void Exchange0(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||
ExchangeN<T,0>(out1,out2,in1,in2);
|
||||
};
|
||||
template <typename T>
|
||||
static inline void Exchange1(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||
ExchangeN<T,1>(out1,out2,in1,in2);
|
||||
};
|
||||
template <typename T>
|
||||
static inline void Exchange2(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||
ExchangeN<T,2>(out1,out2,in1,in2);
|
||||
};
|
||||
template <typename T>
|
||||
static inline void Exchange3(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){
|
||||
ExchangeN<T,3>(out1,out2,in1,in2);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Some Template specialization
|
||||
#define perm(a, b, n, w)\
|
||||
@ -403,6 +490,7 @@ namespace Optimization {
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
|
||||
typedef Optimization::vech SIMD_Htype; // Reduced precision type
|
||||
typedef Optimization::vecf SIMD_Ftype; // Single precision type
|
||||
typedef Optimization::vecd SIMD_Dtype; // Double precision type
|
||||
typedef Optimization::veci SIMD_Itype; // Integer type
|
||||
|
@ -66,6 +66,10 @@ namespace Optimization {
|
||||
template <> struct W<Integer> {
|
||||
constexpr static unsigned int r = GEN_SIMD_WIDTH/4u;
|
||||
};
|
||||
template <> struct W<uint16_t> {
|
||||
constexpr static unsigned int c = GEN_SIMD_WIDTH/4u;
|
||||
constexpr static unsigned int r = GEN_SIMD_WIDTH/2u;
|
||||
};
|
||||
|
||||
// SIMD vector types
|
||||
template <typename T>
|
||||
@ -73,8 +77,9 @@ namespace Optimization {
|
||||
alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
|
||||
};
|
||||
|
||||
typedef vec<float> vecf;
|
||||
typedef vec<double> vecd;
|
||||
typedef vec<Integer> veci;
|
||||
typedef vec<float> vecf;
|
||||
typedef vec<double> vecd;
|
||||
typedef vec<uint16_t> vech; // half precision comms
|
||||
typedef vec<Integer> veci;
|
||||
|
||||
}}
|
||||
|
@ -125,7 +125,6 @@ namespace Optimization {
|
||||
f[2] = a.v2;
|
||||
f[3] = a.v3;
|
||||
}
|
||||
|
||||
//Double
|
||||
inline void operator()(double *d, vector4double a){
|
||||
vec_st(a, 0, d);
|
||||
|
@ -357,7 +357,7 @@ namespace Optimization {
|
||||
s = (__m128)_mm_alignr_epi32((__m128i)s,(__m128i)s,2);
|
||||
b = _mm_cvtps_pd(s);
|
||||
}
|
||||
static inline __m128 DtoH (__m128i a,__m128 b,__m128 c,__m128 d) {
|
||||
static inline __m128i DtoH (__m128d a,__m128d b,__m128d c,__m128d d) {
|
||||
__m128 sa,sb;
|
||||
sa = DtoS(a,b);
|
||||
sb = DtoS(c,d);
|
||||
|
Reference in New Issue
Block a user