mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
support fcmla in vector_types, untested
This commit is contained in:
parent
032f7dde1a
commit
9f212679f1
@ -388,8 +388,29 @@ struct MultComplex{
|
|||||||
typename acle<T>::vt z_v = acle<T>::zero();
|
typename acle<T>::vt z_v = acle<T>::zero();
|
||||||
|
|
||||||
// using FCMLA
|
// using FCMLA
|
||||||
typename acle<T>::vt r_v = svcmla_x(pg1, z_v, a_v, b_v, 90);
|
typename acle<T>::vt r_v = svcmla_x(pg1, z_v, a_v, b_v, 0);
|
||||||
r_v = svcmla_x(pg1, r_v, a_v, b_v, 0);
|
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
|
||||||
|
|
||||||
|
svst1(pg1, out.v, r_v);
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MultAddComplex{
|
||||||
|
// Complex a*b+c
|
||||||
|
template <typename T>
|
||||||
|
inline vec<T> mac(const vec<T> &a, const vec<T> &b, const vec<T> &c){
|
||||||
|
|
||||||
|
vec<T> out;
|
||||||
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
|
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
||||||
|
typename acle<T>::vt c_v = svld1(pg1, c.v);;
|
||||||
|
|
||||||
|
// using FCMLA
|
||||||
|
typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
|
||||||
|
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
|
||||||
|
|
||||||
svst1(pg1, out.v, r_v);
|
svst1(pg1, out.v, r_v);
|
||||||
|
|
||||||
@ -897,15 +918,16 @@ typedef Optimization::Vstream VstreamSIMD;
|
|||||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||||
|
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
typedef Optimization::Sum SumSIMD;
|
typedef Optimization::Sum SumSIMD;
|
||||||
typedef Optimization::Sub SubSIMD;
|
typedef Optimization::Sub SubSIMD;
|
||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
typedef Optimization::MultRealPart MultRealPartSIMD;
|
typedef Optimization::MultAddComplex MultAddComplexSIMD;
|
||||||
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
NAMESPACE_END(Grid)
|
NAMESPACE_END(Grid)
|
||||||
|
@ -367,6 +367,24 @@ struct MultComplex{
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MultAddComplex{
|
||||||
|
// Complex a*b+c
|
||||||
|
// Complex float
|
||||||
|
inline vecf mac(vecf a, vecf b, vecf c){
|
||||||
|
pred pg1 = acle<float>::pg1();
|
||||||
|
// using FCMLA
|
||||||
|
vecf r_v = svcmla_x(pg1, c, a, b, 0);
|
||||||
|
return svcmla_x(pg1, r_v, a, b, 90);
|
||||||
|
}
|
||||||
|
// Complex double
|
||||||
|
inline vecd mac(vecd a, vecd b, vecd c){
|
||||||
|
pred pg1 = acle<double>::pg1();
|
||||||
|
// using FCMLA
|
||||||
|
vecf r_v = svcmla_x(pg1, c, a, b, 0);
|
||||||
|
return svcmla_x(pg1, r_v, a, b, 90);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct Div{
|
struct Div{
|
||||||
// Real float
|
// Real float
|
||||||
inline vecf operator()(vecf a, vecf b){
|
inline vecf operator()(vecf a, vecf b){
|
||||||
@ -772,15 +790,16 @@ typedef Optimization::Vstream VstreamSIMD;
|
|||||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||||
|
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
typedef Optimization::Sum SumSIMD;
|
typedef Optimization::Sum SumSIMD;
|
||||||
typedef Optimization::Sub SubSIMD;
|
typedef Optimization::Sub SubSIMD;
|
||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
typedef Optimization::MultRealPart MultRealPartSIMD;
|
typedef Optimization::MultAddComplex MultAddComplexSIMD;
|
||||||
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -298,11 +298,19 @@ public:
|
|||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
|
|
||||||
// FIXME -- alias this to an accelerator_inline MAC struct.
|
// FIXME -- alias this to an accelerator_inline MAC struct.
|
||||||
|
#if defined(A64FX) || defined(A64FXFIXEDSIZE) // on A64FX use FCMLA
|
||||||
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
|
const Grid_simd *__restrict__ a,
|
||||||
|
const Grid_simd *__restrict__ x) {
|
||||||
|
y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v);
|
||||||
|
};
|
||||||
|
#else
|
||||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ a,
|
const Grid_simd *__restrict__ a,
|
||||||
const Grid_simd *__restrict__ x) {
|
const Grid_simd *__restrict__ x) {
|
||||||
*y = (*a) * (*x) + (*y);
|
*y = (*a) * (*x) + (*y);
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user