1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

QPX single precision implementation

This commit is contained in:
Antonin Portelli 2016-09-19 18:09:12 +01:00
parent 2e74520821
commit 0724f7af75
3 changed files with 132 additions and 24 deletions

View File

@ -265,7 +265,7 @@
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
}
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
//_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
local = _entries[ent]._is_local;
perm = _entries[ent]._permute;
if (perm) ptype = _permute_type[point];

View File

@ -27,20 +27,31 @@
namespace Grid {
namespace Optimization {
typedef struct
{
float v0,v1,v2,v3;
} vector4float;
inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
{
stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
return stream;
};
inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
{
stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
return stream;
};
struct Vsplat{
//Complex float
inline vector4double operator()(float a, float b){
return (vector4double){(double)a, (double)b, (double)a, (double)b};
inline vector4float operator()(float a, float b){
return (vector4float){a, b, a, b};
}
// Real float
inline vector4double operator()(float a){
return (vector4double){(double)a, (double)a, (double)a, (double)a};
inline vector4float operator()(float a){
return (vector4float){a, a, a, a};
}
//Complex double
inline vector4double operator()(double a, double b){
@ -61,6 +72,18 @@ namespace Optimization {
inline void operator()(vector4double a, float *f){
vec_st(a, 0, f);
}
inline void operator()(vector4double a, vector4float &f){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(vector4float a, float *f){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double
inline void operator()(vector4double a, double *d){
vec_st(a, 0, d);
@ -76,6 +99,18 @@ namespace Optimization {
inline void operator()(float *f, vector4double a){
vec_st(a, 0, f);
}
inline void operator()(vector4float f, vector4double a){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(float *f, vector4float a){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double
inline void operator()(double *d, vector4double a){
vec_st(a, 0, d);
@ -85,17 +120,23 @@ namespace Optimization {
struct Vset{
// Complex float
inline vector4double operator()(Grid::ComplexF *a){
return vec_ld(0, (float *)a);
inline vector4float operator()(Grid::ComplexF *a){
return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
}
// Complex double
inline vector4double operator()(Grid::ComplexD *a){
return vec_ld(0, (double *)a);
}
// Real float
inline vector4double operator()(float *a){
return vec_ld(0, a);
inline vector4float operator()(float *a){
return (vector4float){a[0], a[1], a[2], a[3]};
}
inline vector4double operator()(vector4float a){
return vec_ld(0, (float *)(&a));
}
// Real double
inline vector4double operator()(double *a){
return vec_ld(0, a);
@ -122,11 +163,42 @@ namespace Optimization {
/////////////////////////////////////////////////////
// Arithmetic operations
/////////////////////////////////////////////////////
#define FLOAT_WRAP_2(fn, pref)\
pref vector4float fn(vector4float a, vector4float b)\
{\
vector4double ad, bd, rd;\
vector4float r;\
\
ad = Vset()(a);\
bd = Vset()(b);\
rd = fn(ad, bd);\
Vstore()(rd, r);\
\
return r;\
}
#define FLOAT_WRAP_1(fn, pref)\
pref vector4float fn(vector4float a)\
{\
vector4double ad, rd;\
vector4float r;\
\
ad = Vset()(a);\
rd = fn(ad);\
Vstore()(rd, r);\
\
return r;\
}
struct Sum{
//Complex/Real double
inline vector4double operator()(vector4double a, vector4double b){
return vec_add(a, b);
}
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer
inline int operator()(int a, int b){
return a + b;
@ -138,6 +210,10 @@ namespace Optimization {
inline vector4double operator()(vector4double a, vector4double b){
return vec_sub(a, b);
}
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer
inline int operator()(int a, int b){
return a - b;
@ -149,6 +225,9 @@ namespace Optimization {
inline vector4double operator()(vector4double a, vector4double b){
return vec_xxnpmadd(a, b, vec_xmul(b, a));
}
// Complex float
FLOAT_WRAP_2(operator(), inline)
};
struct Mult{
@ -156,6 +235,10 @@ namespace Optimization {
inline vector4double operator()(vector4double a, vector4double b){
return vec_mul(a, b);
}
// Real float
FLOAT_WRAP_2(operator(), inline)
// Integer
inline int operator()(int a, int b){
return a*b;
@ -167,6 +250,9 @@ namespace Optimization {
inline vector4double operator()(vector4double v){
return vec_mul(v, (vector4double){1., -1., 1., -1.});
}
// Complex float
FLOAT_WRAP_1(operator(), inline)
};
struct TimesMinusI{
@ -175,6 +261,9 @@ namespace Optimization {
return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
(vector4double){0., 0., 0., 0.});
}
// Complex float
FLOAT_WRAP_2(operator(), inline)
};
struct TimesI{
@ -183,9 +272,13 @@ namespace Optimization {
return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
(vector4double){0., 0., 0., 0.});
}
// Complex float
FLOAT_WRAP_2(operator(), inline)
};
struct Permute{
//Complex double
static inline vector4double Permute0(vector4double v){ //0123 -> 2301
return vec_perm(v, v, vec_gpci(02301));
};
@ -198,6 +291,12 @@ namespace Optimization {
static inline vector4double Permute3(vector4double v){
return v;
};
// Complex float
FLOAT_WRAP_1(Permute0, static inline)
FLOAT_WRAP_1(Permute1, static inline)
FLOAT_WRAP_1(Permute2, static inline)
FLOAT_WRAP_1(Permute3, static inline)
};
struct Rotate{
@ -218,31 +317,42 @@ namespace Optimization {
default: assert(0);
}
}
static inline vector4float rotate(vector4float v, int n){
vector4double vd, rd;
vector4float r;
vd = Vset()(v);
rd = rotate(vd, n);
Vstore()(rd, r);
return r;
}
};
//Complex float Reduce
template<>
inline Grid::ComplexF
Reduce<Grid::ComplexF, vector4double>::operator()(vector4double v) { //2 complex
vector4double v1,v2;
Reduce<Grid::ComplexF, vector4float>::operator()(vector4float v) { //2 complex
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
v1 = Optimization::Sum()(v1, v);
return Grid::ComplexF((float)vec_extract(v1, 0), (float)vec_extract(v1, 1));
return Grid::ComplexF(v1.v0, v1.v1);
}
//Real float Reduce
template<>
inline Grid::RealF
Reduce<Grid::RealF, vector4double>::operator()(vector4double v){ //4 floats
vector4double v1,v2;
Reduce<Grid::RealF, vector4float>::operator()(vector4float v){ //4 floats
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
v1 = Optimization::Sum()(v1, v);
v2 = Optimization::Permute::Permute1(v1);
v1 = vec_add(v1, v2);
v1 = Optimization::Sum()(v1, v2);
return (float)vec_extract(v1, 0);
return v1.v0;
}
@ -283,10 +393,9 @@ namespace Optimization {
////////////////////////////////////////////////////////////////////////////////
// Here assign types
typedef vector4double SIMD_Ftype; // Single precision type
typedef vector4double SIMD_Dtype; // Double precision type
typedef int SIMD_Itype; // Integer type
typedef Optimization::vector4float SIMD_Ftype; // Single precision type
typedef vector4double SIMD_Dtype; // Double precision type
typedef int SIMD_Itype; // Integer type
// prefetch utilities
inline void v_prefetch0(int size, const char *ptr){};

View File

@ -157,10 +157,9 @@ void Tester(const functor &func)
std::cout << GridLogMessage << " " << func.name() << std::endl;
std::cout << GridLogDebug << v_input1 << std::endl;
std::cout << GridLogDebug << v_input2 << std::endl;
std::cout << GridLogDebug << v_result << std::endl;
int ok=0;
for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>1.0e-7){