mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-28 06:35:55 +01:00
clean up, reduction in acle
This commit is contained in:
parent
28d49a3b60
commit
5f8a76d490
@ -2,17 +2,11 @@
|
|||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/simd/Grid_a64fx-1.h
|
Source file: Grid_a64fx-2.h
|
||||||
|
|
||||||
Copyright (C) 2020
|
Copyright (C) 2020
|
||||||
|
|
||||||
Author: Nils Meyer <nils.meyer@ur.de>
|
Author: Nils Meyer <nils.meyer@ur.de>
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
Copyright (C) 2017
|
|
||||||
|
|
||||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
|
||||||
Andrew Lawson <andrew.lawson1991@gmail.com>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -47,8 +41,8 @@ static_assert(GEN_SIMD_WIDTH % 64u == 0, "A64FX SIMD vector size is 64 bytes");
|
|||||||
#pragma error "Missing SVE feature"
|
#pragma error "Missing SVE feature"
|
||||||
#endif /* __ARM_FEATURE_SVE */
|
#endif /* __ARM_FEATURE_SVE */
|
||||||
|
|
||||||
namespace Grid {
|
NAMESPACE_BEGIN(Grid);
|
||||||
namespace Optimization {
|
NAMESPACE_BEGIN(Optimization);
|
||||||
|
|
||||||
// type traits giving the number of elements for each vector type
|
// type traits giving the number of elements for each vector type
|
||||||
template <typename T> struct W;
|
template <typename T> struct W;
|
||||||
@ -83,12 +77,12 @@ namespace Optimization {
|
|||||||
typedef vec<uint16_t> vech; // half precision comms
|
typedef vec<uint16_t> vech; // half precision comms
|
||||||
typedef vec<Integer> veci;
|
typedef vec<Integer> veci;
|
||||||
|
|
||||||
}} // Grid::Optimization
|
NAMESPACE_END(Optimization)
|
||||||
|
NAMESPACE_END(Grid)
|
||||||
|
|
||||||
// low-level API
|
// low-level API
|
||||||
namespace Grid {
|
NAMESPACE_BEGIN(Grid);
|
||||||
namespace Optimization {
|
NAMESPACE_BEGIN(Optimization);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct acle{};
|
struct acle{};
|
||||||
@ -242,21 +236,16 @@ struct Vsplat{
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Vstore{
|
struct Vstore{
|
||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void operator()(vec<T> a, T *D){
|
inline void operator()(vec<T> a, T *D){
|
||||||
|
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v);
|
typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v);
|
||||||
// NOTE illegal '&' here causes SIGBUS at runtime, related to CAS-35230-H2H6T1
|
|
||||||
// svst1(pg1, (typename acle<T>::pt*)&D, a_v);
|
|
||||||
svst1(pg1, D, a_v);
|
svst1(pg1, D, a_v);
|
||||||
|
|
||||||
// non temporal version
|
|
||||||
//svstnt1(pg1, D, a_v);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Vstream{
|
struct Vstream{
|
||||||
// Real
|
// Real
|
||||||
@ -265,7 +254,6 @@ struct Vsplat{
|
|||||||
|
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
||||||
// FIXME non-temporal store causes compiler crash CAS-35230-H2H6T1
|
|
||||||
svstnt1(pg1, a, b_v);
|
svstnt1(pg1, a, b_v);
|
||||||
//svst1(pg1, a, b_v);
|
//svst1(pg1, a, b_v);
|
||||||
}
|
}
|
||||||
@ -297,12 +285,12 @@ struct Vsplat{
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
struct Sum{
|
struct Sum{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
@ -315,9 +303,9 @@ struct Vsplat{
|
|||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Sub{
|
struct Sub{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
@ -330,7 +318,7 @@ struct Vsplat{
|
|||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Mult{
|
struct Mult{
|
||||||
@ -440,7 +428,7 @@ struct Conj{
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct TimesMinusI{
|
struct TimesMinusI{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
@ -458,9 +446,9 @@ struct Conj{
|
|||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TimesI{
|
struct TimesI{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
@ -478,7 +466,7 @@ struct Conj{
|
|||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct PrecisionChange {
|
struct PrecisionChange {
|
||||||
@ -587,7 +575,7 @@ struct PrecisionChange {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Exchange{
|
struct Exchange{
|
||||||
|
|
||||||
// Exchange0 is valid for arbitrary SVE vector length
|
// Exchange0 is valid for arbitrary SVE vector length
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -780,7 +768,7 @@ struct Rotate{
|
|||||||
};
|
};
|
||||||
|
|
||||||
// =======================================================================
|
// =======================================================================
|
||||||
/* SVE ACLE reducedoes not compile, check later
|
// SVE ACLE reduce does not compile, check later
|
||||||
|
|
||||||
// tree-based reduction
|
// tree-based reduction
|
||||||
#define svred(pg, v)\
|
#define svred(pg, v)\
|
||||||
@ -864,11 +852,11 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
|
|||||||
}
|
}
|
||||||
|
|
||||||
#undef svred
|
#undef svred
|
||||||
*/
|
// */
|
||||||
|
|
||||||
// =======================================================================
|
// =======================================================================
|
||||||
|
|
||||||
|
/*
|
||||||
#define acc(v, a, off, step, n)\
|
#define acc(v, a, off, step, n)\
|
||||||
for (unsigned int i = off; i < n; i += step)\
|
for (unsigned int i = off; i < n; i += step)\
|
||||||
{\
|
{\
|
||||||
@ -939,39 +927,39 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
|
|||||||
}
|
}
|
||||||
|
|
||||||
#undef acc // EIGEN compatibility
|
#undef acc // EIGEN compatibility
|
||||||
|
*/
|
||||||
|
|
||||||
|
NAMESPACE_END(Optimization)
|
||||||
} // Optimization
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Here assign types
|
// Here assign types
|
||||||
|
|
||||||
typedef Optimization::vech SIMD_Htype; // Reduced precision type
|
typedef Optimization::vech SIMD_Htype; // Reduced precision type
|
||||||
typedef Optimization::vecf SIMD_Ftype; // Single precision type
|
typedef Optimization::vecf SIMD_Ftype; // Single precision type
|
||||||
typedef Optimization::vecd SIMD_Dtype; // Double precision type
|
typedef Optimization::vecd SIMD_Dtype; // Double precision type
|
||||||
typedef Optimization::veci SIMD_Itype; // Integer type
|
typedef Optimization::veci SIMD_Itype; // Integer type
|
||||||
|
|
||||||
// prefetch utilities
|
// prefetch utilities
|
||||||
inline void v_prefetch0(int size, const char *ptr){};
|
inline void v_prefetch0(int size, const char *ptr){};
|
||||||
inline void prefetch_HINT_T0(const char *ptr){};
|
inline void prefetch_HINT_T0(const char *ptr){};
|
||||||
|
|
||||||
// Function name aliases
|
// Function name aliases
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
typedef Optimization::Vstore VstoreSIMD;
|
typedef Optimization::Vstore VstoreSIMD;
|
||||||
typedef Optimization::Vset VsetSIMD;
|
typedef Optimization::Vset VsetSIMD;
|
||||||
typedef Optimization::Vstream VstreamSIMD;
|
typedef Optimization::Vstream VstreamSIMD;
|
||||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||||
|
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
typedef Optimization::Sum SumSIMD;
|
typedef Optimization::Sum SumSIMD;
|
||||||
typedef Optimization::Sub SubSIMD;
|
typedef Optimization::Sub SubSIMD;
|
||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
typedef Optimization::MultRealPart MultRealPartSIMD;
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
}
|
NAMESPACE_END(Grid)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user