mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
apply VLA patch for memcpy reduction suggested by Arm, CAS-162542-D6W7Z7
This commit is contained in:
parent
433766ac62
commit
2111052fbe
@ -57,11 +57,39 @@ NAMESPACE_BEGIN(Optimization);
|
|||||||
constexpr static unsigned int r = GEN_SIMD_WIDTH/8u;
|
constexpr static unsigned int r = GEN_SIMD_WIDTH/8u;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef ARMCLANGCOMPAT
|
||||||
|
// SIMD vector immediate types
|
||||||
|
template <typename T>
|
||||||
|
struct vec_imm {
|
||||||
|
alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
|
||||||
|
};
|
||||||
|
|
||||||
|
// SIMD vector types
|
||||||
|
template <typename T>
|
||||||
|
struct vec {
|
||||||
|
alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
|
||||||
|
vec() = default;
|
||||||
|
vec(const vec &rhs) { this->operator=(rhs); }
|
||||||
|
vec(const vec_imm<T> &rhs) {
|
||||||
|
// v = rhs.v
|
||||||
|
svst1(svptrue_b8(), (T*)this, svld1(svptrue_b8(), (T*)rhs.v));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vec &operator=(const vec &rhs) {
|
||||||
|
// v = rhs.v
|
||||||
|
svst1(svptrue_b8(), (T*)this, svld1(svptrue_b8(), (T*)rhs.v));
|
||||||
|
return *this;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#else // not defines ARMCLANGCOMPAT
|
||||||
|
#define vec_imm vec
|
||||||
// SIMD vector types
|
// SIMD vector types
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct vec {
|
struct vec {
|
||||||
alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
|
alignas(GEN_SIMD_WIDTH) T v[W<T>::r];
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef vec<float> vecf;
|
typedef vec<float> vecf;
|
||||||
typedef vec<double> vecd;
|
typedef vec<double> vecd;
|
||||||
@ -91,27 +119,33 @@ struct acle<double>{
|
|||||||
static inline svbool_t pg2(){return svptrue_pat_b64(SV_VL4);}
|
static inline svbool_t pg2(){return svptrue_pat_b64(SV_VL4);}
|
||||||
static inline svbool_t pg4(){return svptrue_pat_b64(SV_VL2);}
|
static inline svbool_t pg4(){return svptrue_pat_b64(SV_VL2);}
|
||||||
static inline vec<uint64_t> tbl_swap(){
|
static inline vec<uint64_t> tbl_swap(){
|
||||||
const vec<uint64_t> t = {1, 0, 3, 2, 5, 4, 7, 6};
|
//const vec<uint64_t> t = {1, 0, 3, 2, 5, 4, 7, 6};
|
||||||
|
const vec_imm<uint64_t> t = {1, 0, 3, 2, 5, 4, 7, 6};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl0(){
|
static inline vec<uint64_t> tbl0(){
|
||||||
const vec<uint64_t> t = {4, 5, 6, 7, 0, 1, 2, 3};
|
//const vec<uint64_t> t = {4, 5, 6, 7, 0, 1, 2, 3};
|
||||||
|
const vec_imm<uint64_t> t = {4, 5, 6, 7, 0, 1, 2, 3};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl1(){
|
static inline vec<uint64_t> tbl1(){
|
||||||
const vec<uint64_t> t = {2, 3, 0, 1, 6, 7, 4, 5};
|
//const vec<uint64_t> t = {2, 3, 0, 1, 6, 7, 4, 5};
|
||||||
|
const vec_imm<uint64_t> t = {2, 3, 0, 1, 6, 7, 4, 5};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl_exch1a(){ // Exchange1
|
static inline vec<uint64_t> tbl_exch1a(){ // Exchange1
|
||||||
const vec<uint64_t> t = {0, 1, 4, 5, 2, 3, 6, 7};
|
//const vec<uint64_t> t = {0, 1, 4, 5, 2, 3, 6, 7};
|
||||||
|
const vec_imm<uint64_t> t = {0, 1, 4, 5, 2, 3, 6, 7};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl_exch1b(){ // Exchange1
|
static inline vec<uint64_t> tbl_exch1b(){ // Exchange1
|
||||||
const vec<uint64_t> t = {2, 3, 6, 7, 0, 1, 4, 5};
|
//const vec<uint64_t> t = {2, 3, 6, 7, 0, 1, 4, 5};
|
||||||
|
const vec_imm<uint64_t> t = {2, 3, 6, 7, 0, 1, 4, 5};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl_exch1c(){ // Exchange1
|
static inline vec<uint64_t> tbl_exch1c(){ // Exchange1
|
||||||
const vec<uint64_t> t = {4, 5, 0, 1, 6, 7, 2, 3};
|
//const vec<uint64_t> t = {4, 5, 0, 1, 6, 7, 2, 3};
|
||||||
|
const vec_imm<uint64_t> t = {4, 5, 0, 1, 6, 7, 2, 3};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline svbool_t pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
static inline svbool_t pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
||||||
|
@ -122,7 +122,7 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) {
|
|||||||
#if defined(A64FX) // VLA
|
#if defined(A64FX) // VLA
|
||||||
#pragma message("building for A64FX / SVE ACLE VLA")
|
#pragma message("building for A64FX / SVE ACLE VLA")
|
||||||
#if defined(ARMCLANGCOMPAT)
|
#if defined(ARMCLANGCOMPAT)
|
||||||
#pragma message("applying armclang fix")
|
#pragma message("applying armclang patch")
|
||||||
#endif
|
#endif
|
||||||
#include "Grid_a64fx-2.h"
|
#include "Grid_a64fx-2.h"
|
||||||
#endif
|
#endif
|
||||||
@ -247,21 +247,37 @@ public:
|
|||||||
return sizeof(Vector_type) / sizeof(Scalar_type);
|
return sizeof(Vector_type) / sizeof(Scalar_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ARMCLANGCOMPAT
|
#ifdef ARMCLANGCOMPAT
|
||||||
accelerator_inline Grid_simd &operator=(const Grid_simd &&rhs) {
|
template <class S = Scalar_type>
|
||||||
svint8_t tmp = svld1(svptrue_b8(), (int8_t*)&(rhs.v));
|
accelerator_inline Grid_simd &operator=(const Grid_simd<typename std::enable_if<!is_complex<S>::value, S>::type, Vector_type> &&rhs) {
|
||||||
svst1(svptrue_b8(), (int8_t*)this, tmp);
|
//v = rhs.v;
|
||||||
//v = rhs.v;
|
svst1(svptrue_b8(), (Scalar_type*)this, svld1(svptrue_b8(), (Scalar_type*)&(rhs.v)));
|
||||||
return *this;
|
return *this;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <class S = Scalar_type>
|
||||||
|
accelerator_inline Grid_simd &operator=(const Grid_simd<typename std::enable_if<!is_complex<S>::value, S>::type, Vector_type> &rhs) {
|
||||||
|
//v = rhs.v;
|
||||||
|
svst1(svptrue_b8(), (Scalar_type*)this, svld1(svptrue_b8(), (Scalar_type*)&(rhs.v)));
|
||||||
|
return *this;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class S = Scalar_type>
|
||||||
|
accelerator_inline Grid_simd &operator=(const Grid_simd<typename std::enable_if<is_complex<S>::value, S>::type, Vector_type> &&rhs) {
|
||||||
|
//v = rhs.v;
|
||||||
|
svst1(svptrue_b8(), (int8_t*)this, svld1(svptrue_b8(), (int8_t*)&(rhs.v)));
|
||||||
|
return *this;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class S = Scalar_type>
|
||||||
|
accelerator_inline Grid_simd &operator=(const Grid_simd<typename std::enable_if<is_complex<S>::value, S>::type, Vector_type> &rhs) {
|
||||||
|
//v = rhs.v;
|
||||||
|
svst1(svptrue_b8(), (int8_t*)this, svld1(svptrue_b8(), (int8_t*)&(rhs.v)));
|
||||||
|
return *this;
|
||||||
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
accelerator_inline Grid_simd &operator=(const Grid_simd &rhs) {
|
|
||||||
svint8_t tmp = svld1(svptrue_b8(), (int8_t*)&(rhs.v));
|
|
||||||
svst1(svptrue_b8(), (int8_t*)this, tmp);
|
|
||||||
//v = rhs.v;
|
|
||||||
return *this;
|
|
||||||
};
|
|
||||||
#else
|
|
||||||
accelerator_inline Grid_simd &operator=(const Grid_simd &&rhs) {
|
accelerator_inline Grid_simd &operator=(const Grid_simd &&rhs) {
|
||||||
v = rhs.v;
|
v = rhs.v;
|
||||||
return *this;
|
return *this;
|
||||||
@ -270,11 +286,24 @@ public:
|
|||||||
v = rhs.v;
|
v = rhs.v;
|
||||||
return *this;
|
return *this;
|
||||||
}; // faster than not declaring it and leaving to the compiler
|
}; // faster than not declaring it and leaving to the compiler
|
||||||
#endif
|
|
||||||
|
#endif
|
||||||
|
|
||||||
accelerator Grid_simd() = default;
|
accelerator Grid_simd() = default;
|
||||||
accelerator_inline Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
|
|
||||||
accelerator_inline Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
|
#ifdef ARMCLANGCOMPAT
|
||||||
|
template <class S = Scalar_type>
|
||||||
|
accelerator_inline Grid_simd(const Grid_simd<typename std::enable_if<!is_complex<S>::value, S>::type, Vector_type> &rhs) { this->operator=(rhs); }
|
||||||
|
template <class S = Scalar_type>
|
||||||
|
accelerator_inline Grid_simd(const Grid_simd<typename std::enable_if<!is_complex<S>::value, S>::type, Vector_type> &&rhs) { this->operator=(rhs); }
|
||||||
|
template <class S = Scalar_type>
|
||||||
|
accelerator_inline Grid_simd(const Grid_simd<typename std::enable_if<is_complex<S>::value, S>::type, Vector_type> &rhs) { this->operator=(rhs); }
|
||||||
|
template <class S = Scalar_type>
|
||||||
|
accelerator_inline Grid_simd(const Grid_simd<typename std::enable_if<is_complex<S>::value, S>::type, Vector_type> &&rhs) { this->operator=(rhs); }
|
||||||
|
#else
|
||||||
|
accelerator_inline Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
|
||||||
|
accelerator_inline Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
|
||||||
|
#endif
|
||||||
accelerator_inline Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
|
accelerator_inline Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); };
|
||||||
// Enable if complex type
|
// Enable if complex type
|
||||||
template <typename S = Scalar_type> accelerator_inline
|
template <typename S = Scalar_type> accelerator_inline
|
||||||
|
Loading…
Reference in New Issue
Block a user