mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
QPX single precision implementation
This commit is contained in:
parent
2e74520821
commit
0724f7af75
@ -265,7 +265,7 @@
|
||||
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
|
||||
}
|
||||
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
|
||||
_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
|
||||
//_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
|
||||
local = _entries[ent]._is_local;
|
||||
perm = _entries[ent]._permute;
|
||||
if (perm) ptype = _permute_type[point];
|
||||
|
@ -27,20 +27,31 @@
|
||||
|
||||
namespace Grid {
|
||||
namespace Optimization {
|
||||
typedef struct
|
||||
{
|
||||
float v0,v1,v2,v3;
|
||||
} vector4float;
|
||||
|
||||
inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
|
||||
{
|
||||
stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
|
||||
return stream;
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
|
||||
{
|
||||
stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
|
||||
return stream;
|
||||
};
|
||||
|
||||
struct Vsplat{
|
||||
//Complex float
|
||||
inline vector4double operator()(float a, float b){
|
||||
return (vector4double){(double)a, (double)b, (double)a, (double)b};
|
||||
inline vector4float operator()(float a, float b){
|
||||
return (vector4float){a, b, a, b};
|
||||
}
|
||||
// Real float
|
||||
inline vector4double operator()(float a){
|
||||
return (vector4double){(double)a, (double)a, (double)a, (double)a};
|
||||
inline vector4float operator()(float a){
|
||||
return (vector4float){a, a, a, a};
|
||||
}
|
||||
//Complex double
|
||||
inline vector4double operator()(double a, double b){
|
||||
@ -61,6 +72,18 @@ namespace Optimization {
|
||||
inline void operator()(vector4double a, float *f){
|
||||
vec_st(a, 0, f);
|
||||
}
|
||||
|
||||
inline void operator()(vector4double a, vector4float &f){
|
||||
vec_st(a, 0, (float *)(&f));
|
||||
}
|
||||
|
||||
inline void operator()(vector4float a, float *f){
|
||||
f[0] = a.v0;
|
||||
f[1] = a.v1;
|
||||
f[2] = a.v2;
|
||||
f[3] = a.v3;
|
||||
}
|
||||
|
||||
//Double
|
||||
inline void operator()(vector4double a, double *d){
|
||||
vec_st(a, 0, d);
|
||||
@ -76,6 +99,18 @@ namespace Optimization {
|
||||
inline void operator()(float *f, vector4double a){
|
||||
vec_st(a, 0, f);
|
||||
}
|
||||
|
||||
inline void operator()(vector4float f, vector4double a){
|
||||
vec_st(a, 0, (float *)(&f));
|
||||
}
|
||||
|
||||
inline void operator()(float *f, vector4float a){
|
||||
f[0] = a.v0;
|
||||
f[1] = a.v1;
|
||||
f[2] = a.v2;
|
||||
f[3] = a.v3;
|
||||
}
|
||||
|
||||
//Double
|
||||
inline void operator()(double *d, vector4double a){
|
||||
vec_st(a, 0, d);
|
||||
@ -85,17 +120,23 @@ namespace Optimization {
|
||||
|
||||
struct Vset{
|
||||
// Complex float
|
||||
inline vector4double operator()(Grid::ComplexF *a){
|
||||
return vec_ld(0, (float *)a);
|
||||
inline vector4float operator()(Grid::ComplexF *a){
|
||||
return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
|
||||
}
|
||||
// Complex double
|
||||
inline vector4double operator()(Grid::ComplexD *a){
|
||||
return vec_ld(0, (double *)a);
|
||||
}
|
||||
|
||||
// Real float
|
||||
inline vector4double operator()(float *a){
|
||||
return vec_ld(0, a);
|
||||
inline vector4float operator()(float *a){
|
||||
return (vector4float){a[0], a[1], a[2], a[3]};
|
||||
}
|
||||
|
||||
inline vector4double operator()(vector4float a){
|
||||
return vec_ld(0, (float *)(&a));
|
||||
}
|
||||
|
||||
// Real double
|
||||
inline vector4double operator()(double *a){
|
||||
return vec_ld(0, a);
|
||||
@ -122,11 +163,42 @@ namespace Optimization {
|
||||
/////////////////////////////////////////////////////
|
||||
// Arithmetic operations
|
||||
/////////////////////////////////////////////////////
|
||||
#define FLOAT_WRAP_2(fn, pref)\
|
||||
pref vector4float fn(vector4float a, vector4float b)\
|
||||
{\
|
||||
vector4double ad, bd, rd;\
|
||||
vector4float r;\
|
||||
\
|
||||
ad = Vset()(a);\
|
||||
bd = Vset()(b);\
|
||||
rd = fn(ad, bd);\
|
||||
Vstore()(rd, r);\
|
||||
\
|
||||
return r;\
|
||||
}
|
||||
|
||||
#define FLOAT_WRAP_1(fn, pref)\
|
||||
pref vector4float fn(vector4float a)\
|
||||
{\
|
||||
vector4double ad, rd;\
|
||||
vector4float r;\
|
||||
\
|
||||
ad = Vset()(a);\
|
||||
rd = fn(ad);\
|
||||
Vstore()(rd, r);\
|
||||
\
|
||||
return r;\
|
||||
}
|
||||
|
||||
struct Sum{
|
||||
//Complex/Real double
|
||||
inline vector4double operator()(vector4double a, vector4double b){
|
||||
return vec_add(a, b);
|
||||
}
|
||||
|
||||
//Complex/Real float
|
||||
FLOAT_WRAP_2(operator(), inline)
|
||||
|
||||
//Integer
|
||||
inline int operator()(int a, int b){
|
||||
return a + b;
|
||||
@ -138,6 +210,10 @@ namespace Optimization {
|
||||
inline vector4double operator()(vector4double a, vector4double b){
|
||||
return vec_sub(a, b);
|
||||
}
|
||||
|
||||
//Complex/Real float
|
||||
FLOAT_WRAP_2(operator(), inline)
|
||||
|
||||
//Integer
|
||||
inline int operator()(int a, int b){
|
||||
return a - b;
|
||||
@ -149,6 +225,9 @@ namespace Optimization {
|
||||
inline vector4double operator()(vector4double a, vector4double b){
|
||||
return vec_xxnpmadd(a, b, vec_xmul(b, a));
|
||||
}
|
||||
|
||||
// Complex float
|
||||
FLOAT_WRAP_2(operator(), inline)
|
||||
};
|
||||
|
||||
struct Mult{
|
||||
@ -156,6 +235,10 @@ namespace Optimization {
|
||||
inline vector4double operator()(vector4double a, vector4double b){
|
||||
return vec_mul(a, b);
|
||||
}
|
||||
|
||||
// Real float
|
||||
FLOAT_WRAP_2(operator(), inline)
|
||||
|
||||
// Integer
|
||||
inline int operator()(int a, int b){
|
||||
return a*b;
|
||||
@ -167,6 +250,9 @@ namespace Optimization {
|
||||
inline vector4double operator()(vector4double v){
|
||||
return vec_mul(v, (vector4double){1., -1., 1., -1.});
|
||||
}
|
||||
|
||||
// Complex float
|
||||
FLOAT_WRAP_1(operator(), inline)
|
||||
};
|
||||
|
||||
struct TimesMinusI{
|
||||
@ -175,6 +261,9 @@ namespace Optimization {
|
||||
return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
|
||||
(vector4double){0., 0., 0., 0.});
|
||||
}
|
||||
|
||||
// Complex float
|
||||
FLOAT_WRAP_2(operator(), inline)
|
||||
};
|
||||
|
||||
struct TimesI{
|
||||
@ -183,9 +272,13 @@ namespace Optimization {
|
||||
return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
|
||||
(vector4double){0., 0., 0., 0.});
|
||||
}
|
||||
|
||||
// Complex float
|
||||
FLOAT_WRAP_2(operator(), inline)
|
||||
};
|
||||
|
||||
struct Permute{
|
||||
//Complex double
|
||||
static inline vector4double Permute0(vector4double v){ //0123 -> 2301
|
||||
return vec_perm(v, v, vec_gpci(02301));
|
||||
};
|
||||
@ -198,6 +291,12 @@ namespace Optimization {
|
||||
static inline vector4double Permute3(vector4double v){
|
||||
return v;
|
||||
};
|
||||
|
||||
// Complex float
|
||||
FLOAT_WRAP_1(Permute0, static inline)
|
||||
FLOAT_WRAP_1(Permute1, static inline)
|
||||
FLOAT_WRAP_1(Permute2, static inline)
|
||||
FLOAT_WRAP_1(Permute3, static inline)
|
||||
};
|
||||
|
||||
struct Rotate{
|
||||
@ -218,31 +317,42 @@ namespace Optimization {
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline vector4float rotate(vector4float v, int n){
|
||||
vector4double vd, rd;
|
||||
vector4float r;
|
||||
|
||||
vd = Vset()(v);
|
||||
rd = rotate(vd, n);
|
||||
Vstore()(rd, r);
|
||||
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF
|
||||
Reduce<Grid::ComplexF, vector4double>::operator()(vector4double v) { //2 complex
|
||||
vector4double v1,v2;
|
||||
Reduce<Grid::ComplexF, vector4float>::operator()(vector4float v) { //2 complex
|
||||
vector4float v1,v2;
|
||||
|
||||
v1 = Optimization::Permute::Permute0(v);
|
||||
v1 = vec_add(v1, v);
|
||||
v1 = Optimization::Sum()(v1, v);
|
||||
|
||||
return Grid::ComplexF((float)vec_extract(v1, 0), (float)vec_extract(v1, 1));
|
||||
return Grid::ComplexF(v1.v0, v1.v1);
|
||||
}
|
||||
//Real float Reduce
|
||||
template<>
|
||||
inline Grid::RealF
|
||||
Reduce<Grid::RealF, vector4double>::operator()(vector4double v){ //4 floats
|
||||
vector4double v1,v2;
|
||||
Reduce<Grid::RealF, vector4float>::operator()(vector4float v){ //4 floats
|
||||
vector4float v1,v2;
|
||||
|
||||
v1 = Optimization::Permute::Permute0(v);
|
||||
v1 = vec_add(v1, v);
|
||||
v1 = Optimization::Sum()(v1, v);
|
||||
v2 = Optimization::Permute::Permute1(v1);
|
||||
v1 = vec_add(v1, v2);
|
||||
v1 = Optimization::Sum()(v1, v2);
|
||||
|
||||
return (float)vec_extract(v1, 0);
|
||||
return v1.v0;
|
||||
}
|
||||
|
||||
|
||||
@ -283,10 +393,9 @@ namespace Optimization {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Here assign types
|
||||
|
||||
typedef vector4double SIMD_Ftype; // Single precision type
|
||||
typedef vector4double SIMD_Dtype; // Double precision type
|
||||
typedef int SIMD_Itype; // Integer type
|
||||
typedef Optimization::vector4float SIMD_Ftype; // Single precision type
|
||||
typedef vector4double SIMD_Dtype; // Double precision type
|
||||
typedef int SIMD_Itype; // Integer type
|
||||
|
||||
// prefetch utilities
|
||||
inline void v_prefetch0(int size, const char *ptr){};
|
||||
|
@ -157,10 +157,9 @@ void Tester(const functor &func)
|
||||
std::cout << GridLogMessage << " " << func.name() << std::endl;
|
||||
|
||||
std::cout << GridLogDebug << v_input1 << std::endl;
|
||||
std::cout << GridLogDebug << v_input2 << std::endl;
|
||||
std::cout << GridLogDebug << v_result << std::endl;
|
||||
|
||||
|
||||
|
||||
int ok=0;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
if ( abs(reference[i]-result[i])>1.0e-7){
|
||||
|
Loading…
Reference in New Issue
Block a user