1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

disable fcmla in vector type building for VLA

This commit is contained in:
nmeyer-ur 2020-05-21 19:41:42 +02:00
parent 046b1cbbc0
commit 8c5a5fdfce
3 changed files with 31 additions and 25 deletions

View File

@ -324,18 +324,6 @@ struct Sub{
};
struct Mult{
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
vec<T> out;
svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, a.v);
typename acle<T>::vt b_v = svld1(pg1, b.v);
typename acle<T>::vt c_v = svld1(pg1, c.v);
typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v);
svst1(pg1, out.v, r_v);
return out;
}
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){
vec<T> out;
@ -408,7 +396,7 @@ struct MultComplex{
struct MultAddComplex{
// Complex a*b+c
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){
vec<T> out;
svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, a.v);
@ -419,9 +407,7 @@ struct MultAddComplex{
typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
svst1(pg1, out.v, r_v);
return out;
svst1(pg1, a.v, r_v);
}
};

View File

@ -295,12 +295,12 @@ struct Sub{
struct Mult{
// Real float fma
inline void operator()(vecf a, vecf b, vecf c){
inline vecf operator()(vecf a, vecf b, vecf c){
pred pg1 = acle<float>::pg1();
return svmad_x(pg1, b, c, a);
}
// Real double fma
inline void operator()(vecd a, vecd b, vecd c){
inline vecd operator()(vecd a, vecd b, vecd c){
pred pg1 = acle<double>::pg1();
return svmad_x(pg1, b, c, a);
}

View File

@ -298,23 +298,21 @@ public:
// FIXME -- alias this to an accelerator_inline MAC struct.
// A64FX: use FCMLA
/*
#if defined(A64FX) || defined(A64FXFIXEDSIZE) // A64FX: use FCMLA
// FIXME VLA build error
//#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error
#if defined(A64FXFIXEDSIZE)
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) {
y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v);
*y = fxmac((*a), (*x), (*y));
};
#else
#endif
*/
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) {
*y = (*a) * (*x) + (*y);
};
#endif
friend accelerator_inline void mult(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ l,
@ -793,6 +791,28 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
return ret;
};
// ----------------A64FX MAC ---------------------
// Distinguish between complex types and others
//#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error
#if defined(A64FXFIXEDSIZE)
template <class S, class V, IfComplex<S> = 0>
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
Grid_simd<S, V> ret;
ret.v = trinary<V>(a.v, b.v, c.v, MultAddComplexSIMD());
return ret;
};
// Real/Integer types
template <class S, class V, IfNotComplex<S> = 0>
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
Grid_simd<S, V> ret;
ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
return ret;
};
#endif
// -------------------------------------
// Distinguish between complex types and others
template <class S, class V, IfComplex<S> = 0>
accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {