1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

add 3 op Mult for VLA

This commit is contained in:
nmeyer-ur 2020-06-12 18:49:05 +02:00
parent 87266ce099
commit 92281ec22d
2 changed files with 16 additions and 5 deletions

View File

@ -8,6 +8,8 @@
Author: Nils Meyer <nils.meyer@ur.de>
with support from Arm
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
@ -365,6 +367,18 @@ struct Sub{
};
struct Mult{
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
vec<T> out;
svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, a.v);
typename acle<T>::vt b_v = svld1(pg1, b.v);
typename acle<T>::vt c_v = svld1(pg1, c.v);
typename acle<T>::vt r_v = svmla_x(pg1, c_v, a_v, b_v);
svst1(pg1, out.v, r_v);
return out;
}
template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){
vec<T> out;

View File

@ -327,8 +327,7 @@ public:
// FIXME -- alias this to an accelerator_inline MAC struct.
//#if defined(A64FX) || defined(A64FXFIXEDSIZE)
#if 0
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) {
@ -821,8 +820,7 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
// ---------------- A64FX MAC -------------------
// Distinguish between complex types and others
//#if defined(A64FX) || defined(A64FXFIXEDSIZE)
#if 0
#if defined(A64FX) || defined(A64FXFIXEDSIZE)
template <class S, class V, IfComplex<S> = 0>
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
Grid_simd<S, V> ret;
@ -834,7 +832,6 @@ accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, G
template <class S, class V, IfNotComplex<S> = 0>
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
Grid_simd<S, V> ret;
// MultSIMD takes only 2 args -> need MultAddReal
ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
return ret;
};