1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

GPU support improvements

This commit is contained in:
Peter Boyle 2019-01-01 15:05:03 +00:00
parent 3a4e397e72
commit 9d866d062a

View File

@ -225,9 +225,17 @@ public:
v = rhs.v; v = rhs.v;
return *this; return *this;
}; // faster than not declaring it and leaving to the compiler }; // faster than not declaring it and leaving to the compiler
accelerator Grid_simd() = default; accelerator Grid_simd() = default;
accelerator_inline Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps accelerator_inline Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
accelerator_inline Grid_simd(const Grid_simd &&rhs) : v(rhs.v){}; accelerator_inline Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
accelerator Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
// Enable if complex type
template <typename S = Scalar_type> accelerator_inline
Grid_simd(const typename std::enable_if<is_complex<S>::value, S>::type a) {
vsplat(*this, a);
};
///////////////////////////// /////////////////////////////
// Constructors // Constructors
@ -237,13 +245,7 @@ public:
return (*this); return (*this);
} }
// Enable if complex type
template <typename S = Scalar_type> accelerator_inline
Grid_simd(const typename std::enable_if<is_complex<S>::value, S>::type a) {
vsplat(*this, a);
};
accelerator Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
/////////////////////////////////////////////// ///////////////////////////////////////////////
// mac, mult, sub, add, adj // mac, mult, sub, add, adj
@ -251,66 +253,66 @@ public:
// FIXME -- alias this to an accelerator_inline MAC struct. // FIXME -- alias this to an accelerator_inline MAC struct.
friend accelerator_inline void mac(Grid_simd *__restrict__ y, friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a, const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) { const Grid_simd *__restrict__ x) {
*y = (*a) * (*x) + (*y); *y = (*a) * (*x) + (*y);
}; };
friend accelerator_inline void mult(Grid_simd *__restrict__ y, friend accelerator_inline void mult(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l, const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) { const Grid_simd *__restrict__ r) {
*y = (*l) * (*r); *y = (*l) * (*r);
} }
friend accelerator_inline void sub(Grid_simd *__restrict__ y, friend accelerator_inline void sub(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l, const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) { const Grid_simd *__restrict__ r) {
*y = (*l) - (*r); *y = (*l) - (*r);
} }
friend accelerator_inline void add(Grid_simd *__restrict__ y, friend accelerator_inline void add(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l, const Grid_simd *__restrict__ l,
const Grid_simd *__restrict__ r) { const Grid_simd *__restrict__ r) {
*y = (*l) + (*r); *y = (*l) + (*r);
} }
friend accelerator_inline void mac(Grid_simd *__restrict__ y, friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ a, const Scalar_type *__restrict__ a,
const Grid_simd *__restrict__ x) { const Grid_simd *__restrict__ x) {
*y = (*a) * (*x) + (*y); *y = (*a) * (*x) + (*y);
}; };
friend accelerator_inline void mult(Grid_simd *__restrict__ y, friend accelerator_inline void mult(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l, const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) { const Grid_simd *__restrict__ r) {
*y = (*l) * (*r); *y = (*l) * (*r);
} }
friend accelerator_inline void sub(Grid_simd *__restrict__ y, friend accelerator_inline void sub(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l, const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) { const Grid_simd *__restrict__ r) {
*y = (*l) - (*r); *y = (*l) - (*r);
} }
friend accelerator_inline void add(Grid_simd *__restrict__ y, friend accelerator_inline void add(Grid_simd *__restrict__ y,
const Scalar_type *__restrict__ l, const Scalar_type *__restrict__ l,
const Grid_simd *__restrict__ r) { const Grid_simd *__restrict__ r) {
*y = (*l) + (*r); *y = (*l) + (*r);
} }
friend accelerator_inline void mac(Grid_simd *__restrict__ y, friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a, const Grid_simd *__restrict__ a,
const Scalar_type *__restrict__ x) { const Scalar_type *__restrict__ x) {
*y = (*a) * (*x) + (*y); *y = (*a) * (*x) + (*y);
}; };
friend accelerator_inline void mult(Grid_simd *__restrict__ y, friend accelerator_inline void mult(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l, const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) { const Scalar_type *__restrict__ r) {
*y = (*l) * (*r); *y = (*l) * (*r);
} }
friend accelerator_inline void sub(Grid_simd *__restrict__ y, friend accelerator_inline void sub(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l, const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) { const Scalar_type *__restrict__ r) {
*y = (*l) - (*r); *y = (*l) - (*r);
} }
friend accelerator_inline void add(Grid_simd *__restrict__ y, friend accelerator_inline void add(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l, const Grid_simd *__restrict__ l,
const Scalar_type *__restrict__ r) { const Scalar_type *__restrict__ r) {
*y = (*l) + (*r); *y = (*l) + (*r);
} }
@ -851,11 +853,17 @@ accelerator_inline Grid_simd<S, V> toReal(const Grid_simd<complex<S>, V> &in) {
return ret; return ret;
} }
template <class T> struct toComplexMapper {};
template<> struct toComplexMapper<vRealF> { typedef vComplexF Complexified; };
template<> struct toComplexMapper<vRealD> { typedef vComplexD Complexified; };
// complex = toComplex( real ) // complex = toComplex( real )
template <class R, class V, IfReal<R> = 0> // must be a real arg template <class Rsimd> // must be a real arg
accelerator_inline Grid_simd< complex<R>, V> toComplex(const Grid_simd<R, V> &in) { accelerator_inline typename toComplexMapper<Rsimd>::Complexified toComplex(const Rsimd &in) {
typedef Grid_simd<R, V> Rsimd;
typedef Grid_simd< complex<R>, V> Csimd; typedef typename toComplexMapper<Rsimd>::Complexified Csimd;
typename Rsimd::conv_t conv; // address as real typename Rsimd::conv_t conv; // address as real
conv.v = in.v; conv.v = in.v;
@ -867,7 +875,7 @@ accelerator_inline Grid_simd< complex<R>, V> toComplex(const Grid_simd<R, V> &in
conv.s[i + 1] = 0.0; // zero imaginary parts conv.s[i + 1] = 0.0; // zero imaginary parts
} }
Csimd ret; Csimd ret;
ret.v = conv.v; memcpy((void *)&ret.v,(void *)&conv.v,sizeof(ret.v));
return ret; return ret;
} }