1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-11 14:40:46 +01:00

add 3 op Mult for VLA

This commit is contained in:
nmeyer-ur 2020-06-12 18:49:05 +02:00
parent 87266ce099
commit 92281ec22d
2 changed files with 16 additions and 5 deletions

View File

@ -8,6 +8,8 @@
Author: Nils Meyer <nils.meyer@ur.de> Author: Nils Meyer <nils.meyer@ur.de>
with support from Arm
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
@ -365,6 +367,18 @@ struct Sub{
}; };
struct Mult{ struct Mult{
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
vec<T> out;
svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, a.v);
typename acle<T>::vt b_v = svld1(pg1, b.v);
typename acle<T>::vt c_v = svld1(pg1, c.v);
typename acle<T>::vt r_v = svmla_x(pg1, c_v, a_v, b_v);
svst1(pg1, out.v, r_v);
return out;
}
template <typename T> template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){ inline vec<T> operator()(vec<T> a, vec<T> b){
vec<T> out; vec<T> out;

View File

@ -327,8 +327,7 @@ public:
// FIXME -- alias this to an accelerator_inline MAC struct. // FIXME -- alias this to an accelerator_inline MAC struct.
//#if defined(A64FX) || defined(A64FXFIXEDSIZE) #if defined(A64FX) || defined(A64FXFIXEDSIZE)
#if 0
friend accelerator_inline void mac(Grid_simd *__restrict__ y, friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a, const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) { const Grid_simd *__restrict__ x) {
@ -821,8 +820,7 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
// ---------------- A64FX MAC ------------------- // ---------------- A64FX MAC -------------------
// Distinguish between complex types and others // Distinguish between complex types and others
//#if defined(A64FX) || defined(A64FXFIXEDSIZE) #if defined(A64FX) || defined(A64FXFIXEDSIZE)
#if 0
template <class S, class V, IfComplex<S> = 0> template <class S, class V, IfComplex<S> = 0>
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) { accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
Grid_simd<S, V> ret; Grid_simd<S, V> ret;
@ -834,7 +832,6 @@ accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, G
template <class S, class V, IfNotComplex<S> = 0> template <class S, class V, IfNotComplex<S> = 0>
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) { accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
Grid_simd<S, V> ret; Grid_simd<S, V> ret;
// MultSIMD takes only 2 args -> need MultAddReal
ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD()); ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
return ret; return ret;
}; };