mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
add 3 op Mult for VLA
This commit is contained in:
parent
87266ce099
commit
92281ec22d
@ -8,6 +8,8 @@
|
||||
|
||||
Author: Nils Meyer <nils.meyer@ur.de>
|
||||
|
||||
with support from Arm
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
@ -365,6 +367,18 @@ struct Sub{
|
||||
};
|
||||
|
||||
struct Mult{
|
||||
template <typename T>
|
||||
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
||||
vec<T> out;
|
||||
svbool_t pg1 = acle<T>::pg1();
|
||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
||||
typename acle<T>::vt c_v = svld1(pg1, c.v);
|
||||
typename acle<T>::vt r_v = svmla_x(pg1, c_v, a_v, b_v);
|
||||
svst1(pg1, out.v, r_v);
|
||||
|
||||
return out;
|
||||
}
|
||||
template <typename T>
|
||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||
vec<T> out;
|
||||
|
@ -327,8 +327,7 @@ public:
|
||||
|
||||
// FIXME -- alias this to an accelerator_inline MAC struct.
|
||||
|
||||
//#if defined(A64FX) || defined(A64FXFIXEDSIZE)
|
||||
#if 0
|
||||
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
|
||||
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
|
||||
const Grid_simd *__restrict__ a,
|
||||
const Grid_simd *__restrict__ x) {
|
||||
@ -821,8 +820,7 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
|
||||
|
||||
// ---------------- A64FX MAC -------------------
|
||||
// Distinguish between complex types and others
|
||||
//#if defined(A64FX) || defined(A64FXFIXEDSIZE)
|
||||
#if 0
|
||||
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
|
||||
template <class S, class V, IfComplex<S> = 0>
|
||||
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
|
||||
Grid_simd<S, V> ret;
|
||||
@ -834,7 +832,6 @@ accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, G
|
||||
template <class S, class V, IfNotComplex<S> = 0>
|
||||
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
|
||||
Grid_simd<S, V> ret;
|
||||
// MultSIMD takes only 2 args -> need MultAddReal
|
||||
ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
|
||||
return ret;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user