1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

QPX finishing

This commit is contained in:
Peter Boyle 2016-12-22 17:50:48 +00:00
parent b8cdb3e90a
commit 7dc36628a1

View File

@ -163,6 +163,22 @@ namespace Optimization {
/////////////////////////////////////////////////////
// Arithmetic operations
/////////////////////////////////////////////////////
#define FLOAT_WRAP_3(fn, pref)\
pref vector4float fn(vector4float a, vector4float b, vector4float c) \
{\
vector4double ad, bd, rd, cd; \
vector4float r;\
\
ad = Vset()(a);\
bd = Vset()(b);\
cd = Vset()(c);\
rd = fn(ad, bd, cd); \
Vstore()(rd, r);\
\
return r;\
}
#define FLOAT_WRAP_2(fn, pref)\
pref vector4float fn(vector4float a, vector4float b)\
{\
@ -228,6 +244,13 @@ namespace Optimization {
}
FLOAT_WRAP_2(operator(), inline)
};
struct MaddRealPart{
// Complex double
inline vector4double operator()(vector4double a, vector4double b,vector4double c){
return vec_xmadd(a, b, c);
}
FLOAT_WRAP_3(operator(), inline)
};
struct MultComplex{
// Complex double
inline vector4double operator()(vector4double a, vector4double b){
@ -323,19 +346,36 @@ namespace Optimization {
};
struct Rotate{
template<int n> static inline vector4double tRotate(vector4double v){
if ( n==1 ) return vec_perm(v, v, vec_gpci(01230));
if ( n==2 ) return vec_perm(v, v, vec_gpci(02301));
if ( n==3 ) return vec_perm(v, v, vec_gpci(03012));
return v;
};
template<int n> static inline vector4float tRotate(vector4float a)
{
vector4double ad, rd;
vector4float r;
ad = Vset()(a);
rd = tRotate<n>(ad);
Vstore()(rd, r);
return r;
};
static inline vector4double rotate(vector4double v, int n){
switch(n){
case 0:
return v;
break;
case 1:
return vec_perm(v, v, vec_gpci(01230));
return tRotate<1>(v);
break;
case 2:
return vec_perm(v, v, vec_gpci(02301));
return tRotate<2>(v);
break;
case 3:
return vec_perm(v, v, vec_gpci(03012));
return tRotate<3>(v);
break;
default: assert(0);
}
@ -344,11 +384,9 @@ namespace Optimization {
static inline vector4float rotate(vector4float v, int n){
vector4double vd, rd;
vector4float r;
vd = Vset()(v);
rd = rotate(vd, n);
Vstore()(rd, r);
return r;
}
};
@ -439,6 +477,7 @@ typedef Optimization::Mult MultSIMD;
typedef Optimization::Div DivSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::MultRealPart MultRealPartSIMD;
typedef Optimization::MaddRealPart MaddRealPartSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;