mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
enable fcmla in tensor arithmetics; fixed-size works, VLA does not compile
This commit is contained in:
parent
a65ce237c1
commit
046b1cbbc0
@ -324,6 +324,18 @@ struct Sub{
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct Mult{
|
struct Mult{
|
||||||
|
template <typename T>
|
||||||
|
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
||||||
|
vec<T> out;
|
||||||
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
|
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
||||||
|
typename acle<T>::vt c_v = svld1(pg1, c.v);
|
||||||
|
typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v);
|
||||||
|
svst1(pg1, out.v, r_v);
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
@ -396,7 +408,7 @@ struct MultComplex{
|
|||||||
struct MultAddComplex{
|
struct MultAddComplex{
|
||||||
// Complex a*b+c
|
// Complex a*b+c
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){
|
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -407,7 +419,9 @@ struct MultAddComplex{
|
|||||||
typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
|
typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
|
||||||
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
|
r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
|
||||||
|
|
||||||
svst1(pg1, a.v, r_v);
|
svst1(pg1, out.v, r_v);
|
||||||
|
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -295,14 +295,14 @@ struct Sub{
|
|||||||
|
|
||||||
struct Mult{
|
struct Mult{
|
||||||
// Real float fma
|
// Real float fma
|
||||||
inline void mac(vecf &a, vecf b, vecf c){
|
inline void operator()(vecf a, vecf b, vecf c){
|
||||||
pred pg1 = acle<float>::pg1();
|
pred pg1 = acle<float>::pg1();
|
||||||
a = svmad_x(pg1, b, c, a);
|
return svmad_x(pg1, b, c, a);
|
||||||
}
|
}
|
||||||
// Real double fma
|
// Real double fma
|
||||||
inline void mac(vecd &a, vecd b, vecd c){
|
inline void operator()(vecd a, vecd b, vecd c){
|
||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
a = svmad_x(pg1, b, c, a);
|
return svmad_x(pg1, b, c, a);
|
||||||
}
|
}
|
||||||
// Real float
|
// Real float
|
||||||
inline vecf operator()(vecf a, vecf b){
|
inline vecf operator()(vecf a, vecf b){
|
||||||
@ -376,18 +376,18 @@ struct MultComplex{
|
|||||||
struct MultAddComplex{
|
struct MultAddComplex{
|
||||||
// Complex a*b+c
|
// Complex a*b+c
|
||||||
// Complex float
|
// Complex float
|
||||||
inline void mac(vecf &a, vecf b, vecf c){
|
inline vecf operator()(vecf a, vecf b, vecf c){
|
||||||
pred pg1 = acle<float>::pg1();
|
pred pg1 = acle<float>::pg1();
|
||||||
// using FCMLA
|
// using FCMLA
|
||||||
vecf r_v = svcmla_x(pg1, c, a, b, 0);
|
vecf r_v = svcmla_x(pg1, c, a, b, 0);
|
||||||
a = svcmla_x(pg1, r_v, a, b, 90);
|
return svcmla_x(pg1, r_v, a, b, 90);
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline void mac(vecd &a, vecd b, vecd c){
|
inline vecd operator()(vecd a, vecd b, vecd c){
|
||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
// using FCMLA
|
// using FCMLA
|
||||||
vecd r_v = svcmla_x(pg1, c, a, b, 0);
|
vecd r_v = svcmla_x(pg1, c, a, b, 0);
|
||||||
a = svcmla_x(pg1, r_v, a, b, 90);
|
return svcmla_x(pg1, r_v, a, b, 90);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user