mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
GPU support improvements
This commit is contained in:
parent
3a4e397e72
commit
9d866d062a
@ -225,9 +225,17 @@ public:
|
|||||||
v = rhs.v;
|
v = rhs.v;
|
||||||
return *this;
|
return *this;
|
||||||
}; // faster than not declaring it and leaving to the compiler
|
}; // faster than not declaring it and leaving to the compiler
|
||||||
|
|
||||||
|
|
||||||
accelerator Grid_simd() = default;
|
accelerator Grid_simd() = default;
|
||||||
accelerator_inline Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
|
accelerator_inline Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
|
||||||
accelerator_inline Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
|
accelerator_inline Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
|
||||||
|
accelerator Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
|
||||||
|
// Enable if complex type
|
||||||
|
template <typename S = Scalar_type> accelerator_inline
|
||||||
|
Grid_simd(const typename std::enable_if<is_complex<S>::value, S>::type a) {
|
||||||
|
vsplat(*this, a);
|
||||||
|
};
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Constructors
|
// Constructors
|
||||||
@ -237,13 +245,7 @@ public:
|
|||||||
return (*this);
|
return (*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable if complex type
|
|
||||||
template <typename S = Scalar_type> accelerator_inline
|
|
||||||
Grid_simd(const typename std::enable_if<is_complex<S>::value, S>::type a) {
|
|
||||||
vsplat(*this, a);
|
|
||||||
};
|
|
||||||
|
|
||||||
accelerator Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
|
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// mac, mult, sub, add, adj
|
// mac, mult, sub, add, adj
|
||||||
@ -251,66 +253,66 @@ public:
|
|||||||
|
|
||||||
// FIXME -- alias this to an accelerator_inline MAC struct.
|
// FIXME -- alias this to an accelerator_inline MAC struct.
|
||||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ a,
|
const Grid_simd *__restrict__ a,
|
||||||
const Grid_simd *__restrict__ x) {
|
const Grid_simd *__restrict__ x) {
|
||||||
*y = (*a) * (*x) + (*y);
|
*y = (*a) * (*x) + (*y);
|
||||||
};
|
};
|
||||||
|
|
||||||
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) * (*r);
|
*y = (*l) * (*r);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend accelerator_inline void sub(Grid_simd *__restrict__ y,
|
friend accelerator_inline void sub(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) - (*r);
|
*y = (*l) - (*r);
|
||||||
}
|
}
|
||||||
friend accelerator_inline void add(Grid_simd *__restrict__ y,
|
friend accelerator_inline void add(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) + (*r);
|
*y = (*l) + (*r);
|
||||||
}
|
}
|
||||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Scalar_type *__restrict__ a,
|
const Scalar_type *__restrict__ a,
|
||||||
const Grid_simd *__restrict__ x) {
|
const Grid_simd *__restrict__ x) {
|
||||||
*y = (*a) * (*x) + (*y);
|
*y = (*a) * (*x) + (*y);
|
||||||
};
|
};
|
||||||
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
||||||
const Scalar_type *__restrict__ l,
|
const Scalar_type *__restrict__ l,
|
||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) * (*r);
|
*y = (*l) * (*r);
|
||||||
}
|
}
|
||||||
friend accelerator_inline void sub(Grid_simd *__restrict__ y,
|
friend accelerator_inline void sub(Grid_simd *__restrict__ y,
|
||||||
const Scalar_type *__restrict__ l,
|
const Scalar_type *__restrict__ l,
|
||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) - (*r);
|
*y = (*l) - (*r);
|
||||||
}
|
}
|
||||||
friend accelerator_inline void add(Grid_simd *__restrict__ y,
|
friend accelerator_inline void add(Grid_simd *__restrict__ y,
|
||||||
const Scalar_type *__restrict__ l,
|
const Scalar_type *__restrict__ l,
|
||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) + (*r);
|
*y = (*l) + (*r);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ a,
|
const Grid_simd *__restrict__ a,
|
||||||
const Scalar_type *__restrict__ x) {
|
const Scalar_type *__restrict__ x) {
|
||||||
*y = (*a) * (*x) + (*y);
|
*y = (*a) * (*x) + (*y);
|
||||||
};
|
};
|
||||||
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
const Scalar_type *__restrict__ r) {
|
const Scalar_type *__restrict__ r) {
|
||||||
*y = (*l) * (*r);
|
*y = (*l) * (*r);
|
||||||
}
|
}
|
||||||
friend accelerator_inline void sub(Grid_simd *__restrict__ y,
|
friend accelerator_inline void sub(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
const Scalar_type *__restrict__ r) {
|
const Scalar_type *__restrict__ r) {
|
||||||
*y = (*l) - (*r);
|
*y = (*l) - (*r);
|
||||||
}
|
}
|
||||||
friend accelerator_inline void add(Grid_simd *__restrict__ y,
|
friend accelerator_inline void add(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
const Scalar_type *__restrict__ r) {
|
const Scalar_type *__restrict__ r) {
|
||||||
*y = (*l) + (*r);
|
*y = (*l) + (*r);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -851,11 +853,17 @@ accelerator_inline Grid_simd<S, V> toReal(const Grid_simd<complex<S>, V> &in) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class T> struct toComplexMapper {};
|
||||||
|
template<> struct toComplexMapper<vRealF> { typedef vComplexF Complexified; };
|
||||||
|
template<> struct toComplexMapper<vRealD> { typedef vComplexD Complexified; };
|
||||||
|
|
||||||
|
|
||||||
// complex = toComplex( real )
|
// complex = toComplex( real )
|
||||||
template <class R, class V, IfReal<R> = 0> // must be a real arg
|
template <class Rsimd> // must be a real arg
|
||||||
accelerator_inline Grid_simd< complex<R>, V> toComplex(const Grid_simd<R, V> &in) {
|
accelerator_inline typename toComplexMapper<Rsimd>::Complexified toComplex(const Rsimd &in) {
|
||||||
typedef Grid_simd<R, V> Rsimd;
|
|
||||||
typedef Grid_simd< complex<R>, V> Csimd;
|
typedef typename toComplexMapper<Rsimd>::Complexified Csimd;
|
||||||
typename Rsimd::conv_t conv; // address as real
|
typename Rsimd::conv_t conv; // address as real
|
||||||
|
|
||||||
conv.v = in.v;
|
conv.v = in.v;
|
||||||
@ -867,7 +875,7 @@ accelerator_inline Grid_simd< complex<R>, V> toComplex(const Grid_simd<R, V> &in
|
|||||||
conv.s[i + 1] = 0.0; // zero imaginary parts
|
conv.s[i + 1] = 0.0; // zero imaginary parts
|
||||||
}
|
}
|
||||||
Csimd ret;
|
Csimd ret;
|
||||||
ret.v = conv.v;
|
memcpy((void *)&ret.v,(void *)&conv.v,sizeof(ret.v));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user