1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-18 09:45:55 +01:00

clean up, reduction in acle

This commit is contained in:
nmeyer-ur 2020-04-03 19:18:24 +02:00
parent 28d49a3b60
commit 5f8a76d490

View File

@ -2,18 +2,12 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_a64fx-1.h Source file: Grid_a64fx-2.h
Copyright (C) 2020 Copyright (C) 2020
Author: Nils Meyer <nils.meyer@ur.de> Author: Nils Meyer <nils.meyer@ur.de>
Copyright (C) 2015
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
@ -47,8 +41,8 @@ static_assert(GEN_SIMD_WIDTH % 64u == 0, "A64FX SIMD vector size is 64 bytes");
#pragma error "Missing SVE feature" #pragma error "Missing SVE feature"
#endif /* __ARM_FEATURE_SVE */ #endif /* __ARM_FEATURE_SVE */
namespace Grid { NAMESPACE_BEGIN(Grid);
namespace Optimization { NAMESPACE_BEGIN(Optimization);
// type traits giving the number of elements for each vector type // type traits giving the number of elements for each vector type
template <typename T> struct W; template <typename T> struct W;
@ -83,12 +77,12 @@ namespace Optimization {
typedef vec<uint16_t> vech; // half precision comms typedef vec<uint16_t> vech; // half precision comms
typedef vec<Integer> veci; typedef vec<Integer> veci;
}} // Grid::Optimization NAMESPACE_END(Optimization)
NAMESPACE_END(Grid)
// low-level API // low-level API
namespace Grid { NAMESPACE_BEGIN(Grid);
namespace Optimization { NAMESPACE_BEGIN(Optimization);
template <typename T> template <typename T>
struct acle{}; struct acle{};
@ -249,12 +243,7 @@ struct Vsplat{
svbool_t pg1 = acle<T>::pg1(); svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v); typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v);
// NOTE illegal '&' here causes SIGBUS at runtime, related to CAS-35230-H2H6T1
// svst1(pg1, (typename acle<T>::pt*)&D, a_v);
svst1(pg1, D, a_v); svst1(pg1, D, a_v);
// non temporal version
//svstnt1(pg1, D, a_v);
} }
}; };
@ -265,7 +254,6 @@ struct Vsplat{
svbool_t pg1 = acle<T>::pg1(); svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt b_v = svld1(pg1, b.v); typename acle<T>::vt b_v = svld1(pg1, b.v);
// FIXME non-temporal store causes compiler crash CAS-35230-H2H6T1
svstnt1(pg1, a, b_v); svstnt1(pg1, a, b_v);
//svst1(pg1, a, b_v); //svst1(pg1, a, b_v);
} }
@ -780,7 +768,7 @@ struct Rotate{
}; };
// ======================================================================= // =======================================================================
/* SVE ACLE reducedoes not compile, check later // SVE ACLE reduce does not compile, check later
// tree-based reduction // tree-based reduction
#define svred(pg, v)\ #define svred(pg, v)\
@ -864,11 +852,11 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
} }
#undef svred #undef svred
*/ // */
// ======================================================================= // =======================================================================
/*
#define acc(v, a, off, step, n)\ #define acc(v, a, off, step, n)\
for (unsigned int i = off; i < n; i += step)\ for (unsigned int i = off; i < n; i += step)\
{\ {\
@ -939,9 +927,9 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
} }
#undef acc // EIGEN compatibility #undef acc // EIGEN compatibility
*/
NAMESPACE_END(Optimization)
} // Optimization
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
@ -974,4 +962,4 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD; typedef Optimization::TimesI TimesISIMD;
} NAMESPACE_END(Grid)