1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-28 06:35:55 +01:00

clean up, reduction in acle

This commit is contained in:
nmeyer-ur 2020-04-03 19:18:24 +02:00
parent 28d49a3b60
commit 5f8a76d490

View File

@ -2,17 +2,11 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_a64fx-1.h Source file: Grid_a64fx-2.h
Copyright (C) 2020 Copyright (C) 2020
Author: Nils Meyer <nils.meyer@ur.de> Author: Nils Meyer <nils.meyer@ur.de>
Copyright (C) 2015
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -47,8 +41,8 @@ static_assert(GEN_SIMD_WIDTH % 64u == 0, "A64FX SIMD vector size is 64 bytes");
#pragma error "Missing SVE feature" #pragma error "Missing SVE feature"
#endif /* __ARM_FEATURE_SVE */ #endif /* __ARM_FEATURE_SVE */
namespace Grid { NAMESPACE_BEGIN(Grid);
namespace Optimization { NAMESPACE_BEGIN(Optimization);
// type traits giving the number of elements for each vector type // type traits giving the number of elements for each vector type
template <typename T> struct W; template <typename T> struct W;
@ -83,12 +77,12 @@ namespace Optimization {
typedef vec<uint16_t> vech; // half precision comms typedef vec<uint16_t> vech; // half precision comms
typedef vec<Integer> veci; typedef vec<Integer> veci;
}} // Grid::Optimization NAMESPACE_END(Optimization)
NAMESPACE_END(Grid)
// low-level API // low-level API
namespace Grid { NAMESPACE_BEGIN(Grid);
namespace Optimization { NAMESPACE_BEGIN(Optimization);
template <typename T> template <typename T>
struct acle{}; struct acle{};
@ -242,21 +236,16 @@ struct Vsplat{
} }
}; };
struct Vstore{ struct Vstore{
// Real // Real
template <typename T> template <typename T>
inline void operator()(vec<T> a, T *D){ inline void operator()(vec<T> a, T *D){
svbool_t pg1 = acle<T>::pg1(); svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v); typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v);
// NOTE illegal '&' here causes SIGBUS at runtime, related to CAS-35230-H2H6T1
// svst1(pg1, (typename acle<T>::pt*)&D, a_v);
svst1(pg1, D, a_v); svst1(pg1, D, a_v);
// non temporal version
//svstnt1(pg1, D, a_v);
} }
}; };
struct Vstream{ struct Vstream{
// Real // Real
@ -265,7 +254,6 @@ struct Vsplat{
svbool_t pg1 = acle<T>::pg1(); svbool_t pg1 = acle<T>::pg1();
typename acle<T>::vt b_v = svld1(pg1, b.v); typename acle<T>::vt b_v = svld1(pg1, b.v);
// FIXME non-temporal store causes compiler crash CAS-35230-H2H6T1
svstnt1(pg1, a, b_v); svstnt1(pg1, a, b_v);
//svst1(pg1, a, b_v); //svst1(pg1, a, b_v);
} }
@ -297,12 +285,12 @@ struct Vsplat{
} }
}; };
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Arithmetic operations // Arithmetic operations
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
struct Sum{ struct Sum{
template <typename T> template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){ inline vec<T> operator()(vec<T> a, vec<T> b){
@ -315,9 +303,9 @@ struct Vsplat{
return out; return out;
} }
}; };
struct Sub{ struct Sub{
template <typename T> template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){ inline vec<T> operator()(vec<T> a, vec<T> b){
@ -330,7 +318,7 @@ struct Vsplat{
return out; return out;
} }
}; };
struct Mult{ struct Mult{
@ -440,7 +428,7 @@ struct Conj{
}; };
struct TimesMinusI{ struct TimesMinusI{
// Complex // Complex
template <typename T> template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){ inline vec<T> operator()(vec<T> a, vec<T> b){
@ -458,9 +446,9 @@ struct Conj{
return out; return out;
} }
}; };
struct TimesI{ struct TimesI{
// Complex // Complex
template <typename T> template <typename T>
inline vec<T> operator()(vec<T> a, vec<T> b){ inline vec<T> operator()(vec<T> a, vec<T> b){
@ -478,7 +466,7 @@ struct Conj{
return out; return out;
} }
}; };
struct PrecisionChange { struct PrecisionChange {
@ -587,7 +575,7 @@ struct PrecisionChange {
}; };
struct Exchange{ struct Exchange{
// Exchange0 is valid for arbitrary SVE vector length // Exchange0 is valid for arbitrary SVE vector length
template <typename T> template <typename T>
@ -780,7 +768,7 @@ struct Rotate{
}; };
// ======================================================================= // =======================================================================
/* SVE ACLE reducedoes not compile, check later // SVE ACLE reduce does not compile, check later
// tree-based reduction // tree-based reduction
#define svred(pg, v)\ #define svred(pg, v)\
@ -864,11 +852,11 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
} }
#undef svred #undef svred
*/ // */
// ======================================================================= // =======================================================================
/*
#define acc(v, a, off, step, n)\ #define acc(v, a, off, step, n)\
for (unsigned int i = off; i < n; i += step)\ for (unsigned int i = off; i < n; i += step)\
{\ {\
@ -939,39 +927,39 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
} }
#undef acc // EIGEN compatibility #undef acc // EIGEN compatibility
*/
NAMESPACE_END(Optimization)
} // Optimization
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
typedef Optimization::vech SIMD_Htype; // Reduced precision type typedef Optimization::vech SIMD_Htype; // Reduced precision type
typedef Optimization::vecf SIMD_Ftype; // Single precision type typedef Optimization::vecf SIMD_Ftype; // Single precision type
typedef Optimization::vecd SIMD_Dtype; // Double precision type typedef Optimization::vecd SIMD_Dtype; // Double precision type
typedef Optimization::veci SIMD_Itype; // Integer type typedef Optimization::veci SIMD_Itype; // Integer type
// prefetch utilities // prefetch utilities
inline void v_prefetch0(int size, const char *ptr){}; inline void v_prefetch0(int size, const char *ptr){};
inline void prefetch_HINT_T0(const char *ptr){}; inline void prefetch_HINT_T0(const char *ptr){};
// Function name aliases // Function name aliases
typedef Optimization::Vsplat VsplatSIMD; typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD; typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD; typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD; typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>; template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations // Arithmetic operations
typedef Optimization::Sum SumSIMD; typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Sub SubSIMD;
typedef Optimization::Div DivSIMD; typedef Optimization::Div DivSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::MultRealPart MultRealPartSIMD; typedef Optimization::MultRealPart MultRealPartSIMD;
typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::MaddRealPart MaddRealPartSIMD;
typedef Optimization::Conj ConjSIMD; typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD; typedef Optimization::TimesI TimesISIMD;
} NAMESPACE_END(Grid)