mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
disable fcmla in vector type building for VLA
This commit is contained in:
parent
046b1cbbc0
commit
8c5a5fdfce
@ -324,18 +324,6 @@ struct Sub{
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct Mult{
|
struct Mult{
|
||||||
template <typename T>
|
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
|
||||||
vec<T> out;
|
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
|
||||||
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
|
||||||
typename acle<T>::vt c_v = svld1(pg1, c.v);
|
|
||||||
typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v);
|
|
||||||
svst1(pg1, out.v, r_v);
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
@ -408,7 +396,7 @@ struct MultComplex{
|
|||||||
struct MultAddComplex{
|
struct MultAddComplex{
|
||||||
// Complex a*b+c
|
// Complex a*b+c
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -419,9 +407,7 @@ struct MultAddComplex{
|
|||||||
typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
|
typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
|
||||||
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
|
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
|
||||||
|
|
||||||
svst1(pg1, out.v, r_v);
|
svst1(pg1, a.v, r_v);
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -295,12 +295,12 @@ struct Sub{
|
|||||||
|
|
||||||
struct Mult{
|
struct Mult{
|
||||||
// Real float fma
|
// Real float fma
|
||||||
inline void operator()(vecf a, vecf b, vecf c){
|
inline vecf operator()(vecf a, vecf b, vecf c){
|
||||||
pred pg1 = acle<float>::pg1();
|
pred pg1 = acle<float>::pg1();
|
||||||
return svmad_x(pg1, b, c, a);
|
return svmad_x(pg1, b, c, a);
|
||||||
}
|
}
|
||||||
// Real double fma
|
// Real double fma
|
||||||
inline void operator()(vecd a, vecd b, vecd c){
|
inline vecd operator()(vecd a, vecd b, vecd c){
|
||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
return svmad_x(pg1, b, c, a);
|
return svmad_x(pg1, b, c, a);
|
||||||
}
|
}
|
||||||
|
@ -298,23 +298,21 @@ public:
|
|||||||
|
|
||||||
// FIXME -- alias this to an accelerator_inline MAC struct.
|
// FIXME -- alias this to an accelerator_inline MAC struct.
|
||||||
|
|
||||||
// A64FX: use FCMLA
|
// FIXME VLA build error
|
||||||
/*
|
//#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error
|
||||||
#if defined(A64FX) || defined(A64FXFIXEDSIZE) // A64FX: use FCMLA
|
#if defined(A64FXFIXEDSIZE)
|
||||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ a,
|
const Grid_simd *__restrict__ a,
|
||||||
const Grid_simd *__restrict__ x) {
|
const Grid_simd *__restrict__ x) {
|
||||||
y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v);
|
*y = fxmac((*a), (*x), (*y));
|
||||||
};
|
};
|
||||||
#else
|
#else
|
||||||
#endif
|
|
||||||
|
|
||||||
*/
|
|
||||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ a,
|
const Grid_simd *__restrict__ a,
|
||||||
const Grid_simd *__restrict__ x) {
|
const Grid_simd *__restrict__ x) {
|
||||||
*y = (*a) * (*x) + (*y);
|
*y = (*a) * (*x) + (*y);
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
@ -793,6 +791,28 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
|
|||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ----------------A64FX MAC ---------------------
|
||||||
|
// Distinguish between complex types and others
|
||||||
|
//#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error
|
||||||
|
#if defined(A64FXFIXEDSIZE)
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
|
ret.v = trinary<V>(a.v, b.v, c.v, MultAddComplexSIMD());
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Real/Integer types
|
||||||
|
template <class S, class V, IfNotComplex<S> = 0>
|
||||||
|
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
|
ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
// -------------------------------------
|
||||||
|
|
||||||
|
|
||||||
// Distinguish between complex types and others
|
// Distinguish between complex types and others
|
||||||
template <class S, class V, IfComplex<S> = 0>
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user