1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-24 12:45:56 +01:00

QPX finishing

This commit is contained in:
Peter Boyle 2016-12-22 17:50:48 +00:00
parent b8cdb3e90a
commit 7dc36628a1

View File

@ -163,6 +163,22 @@ namespace Optimization {
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Arithmetic operations // Arithmetic operations
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
#define FLOAT_WRAP_3(fn, pref)\
pref vector4float fn(vector4float a, vector4float b, vector4float c) \
{\
vector4double ad, bd, rd, cd; \
vector4float r;\
\
ad = Vset()(a);\
bd = Vset()(b);\
cd = Vset()(c);\
rd = fn(ad, bd, cd); \
Vstore()(rd, r);\
\
return r;\
}
#define FLOAT_WRAP_2(fn, pref)\ #define FLOAT_WRAP_2(fn, pref)\
pref vector4float fn(vector4float a, vector4float b)\ pref vector4float fn(vector4float a, vector4float b)\
{\ {\
@ -228,6 +244,13 @@ namespace Optimization {
} }
FLOAT_WRAP_2(operator(), inline) FLOAT_WRAP_2(operator(), inline)
}; };
struct MaddRealPart{
// Complex double
inline vector4double operator()(vector4double a, vector4double b,vector4double c){
return vec_xmadd(a, b, c);
}
FLOAT_WRAP_3(operator(), inline)
};
struct MultComplex{ struct MultComplex{
// Complex double // Complex double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
@ -323,19 +346,36 @@ namespace Optimization {
}; };
struct Rotate{ struct Rotate{
template<int n> static inline vector4double tRotate(vector4double v){
if ( n==1 ) return vec_perm(v, v, vec_gpci(01230));
if ( n==2 ) return vec_perm(v, v, vec_gpci(02301));
if ( n==3 ) return vec_perm(v, v, vec_gpci(03012));
return v;
};
template<int n> static inline vector4float tRotate(vector4float a)
{
vector4double ad, rd;
vector4float r;
ad = Vset()(a);
rd = tRotate<n>(ad);
Vstore()(rd, r);
return r;
};
static inline vector4double rotate(vector4double v, int n){ static inline vector4double rotate(vector4double v, int n){
switch(n){ switch(n){
case 0: case 0:
return v; return v;
break; break;
case 1: case 1:
return vec_perm(v, v, vec_gpci(01230)); return tRotate<1>(v);
break; break;
case 2: case 2:
return vec_perm(v, v, vec_gpci(02301)); return tRotate<2>(v);
break; break;
case 3: case 3:
return vec_perm(v, v, vec_gpci(03012)); return tRotate<3>(v);
break; break;
default: assert(0); default: assert(0);
} }
@ -344,11 +384,9 @@ namespace Optimization {
static inline vector4float rotate(vector4float v, int n){ static inline vector4float rotate(vector4float v, int n){
vector4double vd, rd; vector4double vd, rd;
vector4float r; vector4float r;
vd = Vset()(v); vd = Vset()(v);
rd = rotate(vd, n); rd = rotate(vd, n);
Vstore()(rd, r); Vstore()(rd, r);
return r; return r;
} }
}; };
@ -439,6 +477,7 @@ typedef Optimization::Mult MultSIMD;
typedef Optimization::Div DivSIMD; typedef Optimization::Div DivSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::MultRealPart MultRealPartSIMD; typedef Optimization::MultRealPart MultRealPartSIMD;
typedef Optimization::MaddRealPart MaddRealPartSIMD;
typedef Optimization::Conj ConjSIMD; typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD; typedef Optimization::TimesI TimesISIMD;