1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

enable fcmla in tensor arithmetics; fixed-size works, VLA does not compile

This commit is contained in:
nmeyer-ur 2020-05-21 19:39:07 +02:00
parent a65ce237c1
commit 046b1cbbc0
2 changed files with 24 additions and 10 deletions

View File

@ -324,6 +324,18 @@ struct Sub{
}; };
struct Mult{ struct Mult{
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
vec<T> out;
svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, a.v);
typename acle<T>::vt b_v = svld1(pg1, b.v);
typename acle<T>::vt c_v = svld1(pg1, c.v);
typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v);
svst1(pg1, out.v, r_v);
return out;
}
template <typename T> template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){ inline vec<T> operator()(vec<T> a, vec<T> b){
vec<T> out; vec<T> out;
@ -396,7 +408,7 @@ struct MultComplex{
struct MultAddComplex{ struct MultAddComplex{
// Complex a*b+c // Complex a*b+c
template <typename T> template <typename T>
inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){ inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
vec<T> out; vec<T> out;
svbool_t pg1 = acle<T>::pg1(); svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, a.v); typename acle<T>::vt a_v = svld1(pg1, a.v);
@ -407,7 +419,9 @@ struct MultAddComplex{
typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0); typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90); r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
svst1(pg1, a.v, r_v); svst1(pg1, out.v, r_v);
return out;
} }
}; };

View File

@ -295,14 +295,14 @@ struct Sub{
struct Mult{ struct Mult{
// Real float fma // Real float fma
inline void mac(vecf &a, vecf b, vecf c){ inline void operator()(vecf a, vecf b, vecf c){
pred pg1 = acle<float>::pg1(); pred pg1 = acle<float>::pg1();
a = svmad_x(pg1, b, c, a); return svmad_x(pg1, b, c, a);
} }
// Real double fma // Real double fma
inline void mac(vecd &a, vecd b, vecd c){ inline void operator()(vecd a, vecd b, vecd c){
pred pg1 = acle<double>::pg1(); pred pg1 = acle<double>::pg1();
a = svmad_x(pg1, b, c, a); return svmad_x(pg1, b, c, a);
} }
// Real float // Real float
inline vecf operator()(vecf a, vecf b){ inline vecf operator()(vecf a, vecf b){
@ -376,18 +376,18 @@ struct MultComplex{
struct MultAddComplex{ struct MultAddComplex{
// Complex a*b+c // Complex a*b+c
// Complex float // Complex float
inline void mac(vecf &a, vecf b, vecf c){ inline vecf operator()(vecf a, vecf b, vecf c){
pred pg1 = acle<float>::pg1(); pred pg1 = acle<float>::pg1();
// using FCMLA // using FCMLA
vecf r_v = svcmla_x(pg1, c, a, b, 0); vecf r_v = svcmla_x(pg1, c, a, b, 0);
a = svcmla_x(pg1, r_v, a, b, 90); return svcmla_x(pg1, r_v, a, b, 90);
} }
// Complex double // Complex double
inline void mac(vecd &a, vecd b, vecd c){ inline vecd operator()(vecd a, vecd b, vecd c){
pred pg1 = acle<double>::pg1(); pred pg1 = acle<double>::pg1();
// using FCMLA // using FCMLA
vecd r_v = svcmla_x(pg1, c, a, b, 0); vecd r_v = svcmla_x(pg1, c, a, b, 0);
a = svcmla_x(pg1, r_v, a, b, 90); return svcmla_x(pg1, r_v, a, b, 90);
} }
}; };