mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
clean up; Exch1 VLA sp+dp integrate, tested, working
This commit is contained in:
parent
cd27f1005d
commit
a65ce237c1
@ -30,21 +30,8 @@
|
|||||||
// Using SVE ACLE
|
// Using SVE ACLE
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
|
|
||||||
#ifndef GEN_SIMD_WIDTH
|
|
||||||
#define GEN_SIMD_WIDTH 64u
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static_assert(GEN_SIMD_WIDTH % 64u == 0, "A64FX SIMD vector size is 64 bytes");
|
static_assert(GEN_SIMD_WIDTH % 64u == 0, "A64FX SIMD vector size is 64 bytes");
|
||||||
|
|
||||||
#ifdef __ARM_FEATURE_SVE
|
|
||||||
#ifdef __clang__
|
|
||||||
//#pragma message("Using clang compiler")
|
|
||||||
#include <arm_sve.h>
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#pragma error "Missing SVE feature"
|
|
||||||
#endif /* __ARM_FEATURE_SVE */
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
NAMESPACE_BEGIN(Optimization);
|
NAMESPACE_BEGIN(Optimization);
|
||||||
|
|
||||||
@ -104,16 +91,28 @@ struct acle<double>{
|
|||||||
static inline svbool_t pg2(){return svptrue_pat_b64(SV_VL4);}
|
static inline svbool_t pg2(){return svptrue_pat_b64(SV_VL4);}
|
||||||
static inline svbool_t pg4(){return svptrue_pat_b64(SV_VL2);}
|
static inline svbool_t pg4(){return svptrue_pat_b64(SV_VL2);}
|
||||||
static inline vec<uint64_t> tbl_swap(){
|
static inline vec<uint64_t> tbl_swap(){
|
||||||
const vec<uint64_t> t = {1, 0, 3, 2, 5, 4, 7, 6};
|
const vec<uint64_t> t = {1, 0, 3, 2, 5, 4, 7, 6};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl0(){
|
static inline vec<uint64_t> tbl0(){
|
||||||
const vec<uint64_t> t = {4, 5, 6, 7, 0, 1, 2, 3};
|
const vec<uint64_t> t = {4, 5, 6, 7, 0, 1, 2, 3};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl1(){
|
static inline vec<uint64_t> tbl1(){
|
||||||
const vec<uint64_t> t = {2, 3, 0, 1, 6, 7, 4, 5};
|
const vec<uint64_t> t = {2, 3, 0, 1, 6, 7, 4, 5};
|
||||||
return t;
|
return t;
|
||||||
|
}
|
||||||
|
static inline vec<uint64_t> tbl_exch1a(){ // Exchange1
|
||||||
|
const vec<uint64_t> t = {0, 1, 4, 5, 2, 3, 6, 7};
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
static inline vec<uint64_t> tbl_exch1b(){ // Exchange1
|
||||||
|
const vec<uint64_t> t = {2, 3, 6, 7, 0, 1, 4, 5};
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
static inline vec<uint64_t> tbl_exch1c(){ // Exchange1
|
||||||
|
const vec<uint64_t> t = {4, 5, 0, 1, 6, 7, 2, 3};
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
static inline svbool_t pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
static inline svbool_t pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
||||||
static inline svbool_t pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
static inline svbool_t pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
||||||
@ -132,20 +131,32 @@ struct acle<float>{
|
|||||||
static inline svbool_t pg2(){return svptrue_pat_b32(SV_VL8);}
|
static inline svbool_t pg2(){return svptrue_pat_b32(SV_VL8);}
|
||||||
// exchange neighboring elements
|
// exchange neighboring elements
|
||||||
static inline vec<uint32_t> tbl_swap(){
|
static inline vec<uint32_t> tbl_swap(){
|
||||||
const vec<uint32_t> t = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
|
const vec<uint32_t> t = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint32_t> tbl0(){
|
static inline vec<uint32_t> tbl0(){
|
||||||
const vec<uint32_t> t = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
|
const vec<uint32_t> t = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint32_t> tbl1(){
|
static inline vec<uint32_t> tbl1(){
|
||||||
const vec<uint32_t> t = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
|
const vec<uint32_t> t = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
static inline vec<uint32_t> tbl2(){
|
static inline vec<uint32_t> tbl2(){
|
||||||
const vec<uint32_t> t = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
|
const vec<uint32_t> t = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
|
||||||
return t;
|
return t;
|
||||||
|
}
|
||||||
|
static inline vec<uint32_t> tbl_exch1a(){ // Exchange1
|
||||||
|
const vec<uint32_t> t = {0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
static inline vec<uint32_t> tbl_exch1b(){ // Exchange1
|
||||||
|
const vec<uint32_t> t = {4, 5, 6, 7, 12, 13, 14, 15, 0, 1, 2, 3, 8, 9, 10, 11 };
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
static inline vec<uint32_t> tbl_exch1c(){ // Exchange1
|
||||||
|
const vec<uint32_t> t = {8, 9, 10, 11, 0, 1, 2, 3, 12, 13, 14, 15, 4, 5, 6, 7};
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
static inline svbool_t pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
static inline svbool_t pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
||||||
static inline svbool_t pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
static inline svbool_t pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
||||||
@ -186,7 +197,6 @@ struct acle<Integer>{
|
|||||||
struct Vsplat{
|
struct Vsplat{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline vecf operator()(float a, float b){
|
inline vecf operator()(float a, float b){
|
||||||
|
|
||||||
vecf out;
|
vecf out;
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
typename acle<float>::vt a_v = svdup_f32(a);
|
typename acle<float>::vt a_v = svdup_f32(a);
|
||||||
@ -198,7 +208,6 @@ struct Vsplat{
|
|||||||
|
|
||||||
// Real float
|
// Real float
|
||||||
inline vecf operator()(float a){
|
inline vecf operator()(float a){
|
||||||
|
|
||||||
vecf out;
|
vecf out;
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
typename acle<float>::vt r_v = svdup_f32(a);
|
typename acle<float>::vt r_v = svdup_f32(a);
|
||||||
@ -208,7 +217,6 @@ struct Vsplat{
|
|||||||
|
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vecd operator()(double a, double b){
|
inline vecd operator()(double a, double b){
|
||||||
|
|
||||||
vecd out;
|
vecd out;
|
||||||
svbool_t pg1 = acle<double>::pg1();
|
svbool_t pg1 = acle<double>::pg1();
|
||||||
typename acle<double>::vt a_v = svdup_f64(a);
|
typename acle<double>::vt a_v = svdup_f64(a);
|
||||||
@ -220,7 +228,6 @@ struct Vsplat{
|
|||||||
|
|
||||||
// Real double
|
// Real double
|
||||||
inline vecd operator()(double a){
|
inline vecd operator()(double a){
|
||||||
|
|
||||||
vecd out;
|
vecd out;
|
||||||
svbool_t pg1 = acle<double>::pg1();
|
svbool_t pg1 = acle<double>::pg1();
|
||||||
typename acle<double>::vt r_v = svdup_f64(a);
|
typename acle<double>::vt r_v = svdup_f64(a);
|
||||||
@ -230,7 +237,6 @@ struct Vsplat{
|
|||||||
|
|
||||||
// Integer
|
// Integer
|
||||||
inline vec<Integer> operator()(Integer a){
|
inline vec<Integer> operator()(Integer a){
|
||||||
|
|
||||||
vec<Integer> out;
|
vec<Integer> out;
|
||||||
svbool_t pg1 = acle<Integer>::pg1();
|
svbool_t pg1 = acle<Integer>::pg1();
|
||||||
// Add check whether Integer is really a uint32_t???
|
// Add check whether Integer is really a uint32_t???
|
||||||
@ -244,7 +250,6 @@ struct Vstore{
|
|||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void operator()(vec<T> a, T *D){
|
inline void operator()(vec<T> a, T *D){
|
||||||
|
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v);
|
typename acle<T>::vt a_v = svld1(pg1, (typename acle<T>::pt*)&a.v);
|
||||||
svst1(pg1, D, a_v);
|
svst1(pg1, D, a_v);
|
||||||
@ -255,7 +260,6 @@ struct Vstream{
|
|||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void operator()(T * a, vec<T> b){
|
inline void operator()(T * a, vec<T> b){
|
||||||
|
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
||||||
svstnt1(pg1, a, b_v);
|
svstnt1(pg1, a, b_v);
|
||||||
@ -267,7 +271,6 @@ struct Vstream{
|
|||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(std::complex<T> *a){
|
inline vec<T> operator()(std::complex<T> *a){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, (T*)a);
|
typename acle<T>::vt a_v = svld1(pg1, (T*)a);
|
||||||
@ -279,7 +282,6 @@ struct Vstream{
|
|||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(T *a){
|
inline vec<T> operator()(T *a){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a);
|
typename acle<T>::vt a_v = svld1(pg1, a);
|
||||||
@ -296,7 +298,6 @@ struct Vstream{
|
|||||||
struct Sum{
|
struct Sum{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -311,7 +312,6 @@ struct Sum{
|
|||||||
struct Sub{
|
struct Sub{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -326,7 +326,6 @@ struct Sub{
|
|||||||
struct Mult{
|
struct Mult{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -341,7 +340,6 @@ struct Mult{
|
|||||||
struct MultRealPart{
|
struct MultRealPart{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -360,7 +358,6 @@ struct MultRealPart{
|
|||||||
struct MaddRealPart{
|
struct MaddRealPart{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -380,7 +377,6 @@ struct MultComplex{
|
|||||||
// Complex a*b
|
// Complex a*b
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -400,8 +396,7 @@ struct MultComplex{
|
|||||||
struct MultAddComplex{
|
struct MultAddComplex{
|
||||||
// Complex a*b+c
|
// Complex a*b+c
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline mac(const vec<T> &a, const vec<T> b, const vec<T> c){
|
inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -420,7 +415,6 @@ struct Div{
|
|||||||
// Real
|
// Real
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||||
@ -436,7 +430,6 @@ struct Conj{
|
|||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a){
|
inline vec<T> operator()(vec<T> a){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
svbool_t pg_odd = acle<T>::pg_odd();
|
svbool_t pg_odd = acle<T>::pg_odd();
|
||||||
@ -453,7 +446,6 @@ struct TimesMinusI{
|
|||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
const vec<typename acle<T>::uint> tbl_swap = acle<T>::tbl_swap();
|
const vec<typename acle<T>::uint> tbl_swap = acle<T>::tbl_swap();
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
@ -473,7 +465,6 @@ struct TimesI{
|
|||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> operator()(vec<T> a, vec<T> b){
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
const vec<typename acle<T>::uint> tbl_swap = acle<T>::tbl_swap();
|
const vec<typename acle<T>::uint> tbl_swap = acle<T>::tbl_swap();
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
@ -492,7 +483,6 @@ struct TimesI{
|
|||||||
|
|
||||||
struct PrecisionChange {
|
struct PrecisionChange {
|
||||||
static inline vech StoH (const vecf &sa,const vecf &sb) {
|
static inline vech StoH (const vecf &sa,const vecf &sb) {
|
||||||
|
|
||||||
vech ret;
|
vech ret;
|
||||||
svbool_t pg1s = acle<float>::pg1();
|
svbool_t pg1s = acle<float>::pg1();
|
||||||
svbool_t pg1h = acle<uint16_t>::pg1();
|
svbool_t pg1h = acle<uint16_t>::pg1();
|
||||||
@ -502,10 +492,10 @@ struct PrecisionChange {
|
|||||||
typename acle<uint16_t>::vt hb_v = svcvt_f16_x(pg1s, sb_v);
|
typename acle<uint16_t>::vt hb_v = svcvt_f16_x(pg1s, sb_v);
|
||||||
typename acle<uint16_t>::vt r_v = svuzp1(ha_v, hb_v);
|
typename acle<uint16_t>::vt r_v = svuzp1(ha_v, hb_v);
|
||||||
svst1(pg1h, (typename acle<uint16_t>::pt*)&ret.v, r_v);
|
svst1(pg1h, (typename acle<uint16_t>::pt*)&ret.v, r_v);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
static inline void HtoS(vech h,vecf &sa,vecf &sb) {
|
static inline void HtoS(vech h,vecf &sa,vecf &sb) {
|
||||||
|
|
||||||
svbool_t pg1h = acle<uint16_t>::pg1();
|
svbool_t pg1h = acle<uint16_t>::pg1();
|
||||||
svbool_t pg1s = acle<float>::pg1();
|
svbool_t pg1s = acle<float>::pg1();
|
||||||
typename acle<uint16_t>::vt h_v = svld1(pg1h, (typename acle<uint16_t>::pt*)&h.v);
|
typename acle<uint16_t>::vt h_v = svld1(pg1h, (typename acle<uint16_t>::pt*)&h.v);
|
||||||
@ -517,7 +507,6 @@ struct PrecisionChange {
|
|||||||
svst1(pg1s, sb.v, sb_v);
|
svst1(pg1s, sb.v, sb_v);
|
||||||
}
|
}
|
||||||
static inline vecf DtoS (vecd a,vecd b) {
|
static inline vecf DtoS (vecd a,vecd b) {
|
||||||
|
|
||||||
vecf ret;
|
vecf ret;
|
||||||
svbool_t pg1d = acle<double>::pg1();
|
svbool_t pg1d = acle<double>::pg1();
|
||||||
svbool_t pg1s = acle<float>::pg1();
|
svbool_t pg1s = acle<float>::pg1();
|
||||||
@ -527,10 +516,10 @@ struct PrecisionChange {
|
|||||||
typename acle<float>::vt sb_v = svcvt_f32_x(pg1d, b_v);
|
typename acle<float>::vt sb_v = svcvt_f32_x(pg1d, b_v);
|
||||||
typename acle<float>::vt r_v = svuzp1(sa_v, sb_v);
|
typename acle<float>::vt r_v = svuzp1(sa_v, sb_v);
|
||||||
svst1(pg1s, ret.v, r_v);
|
svst1(pg1s, ret.v, r_v);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
static inline void StoD (vecf s,vecd &a,vecd &b) {
|
static inline void StoD (vecf s,vecd &a,vecd &b) {
|
||||||
|
|
||||||
svbool_t pg1s = acle<float>::pg1();
|
svbool_t pg1s = acle<float>::pg1();
|
||||||
svbool_t pg1d = acle<double>::pg1();
|
svbool_t pg1d = acle<double>::pg1();
|
||||||
typename acle<float>::vt s_v = svld1(pg1s, s.v);
|
typename acle<float>::vt s_v = svld1(pg1s, s.v);
|
||||||
@ -542,7 +531,6 @@ struct PrecisionChange {
|
|||||||
svst1(pg1d, b.v, b_v);
|
svst1(pg1d, b.v, b_v);
|
||||||
}
|
}
|
||||||
static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
|
static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
|
||||||
|
|
||||||
vech ret;
|
vech ret;
|
||||||
svbool_t pg1d = acle<double>::pg1();
|
svbool_t pg1d = acle<double>::pg1();
|
||||||
svbool_t pg1h = acle<uint16_t>::pg1();
|
svbool_t pg1h = acle<uint16_t>::pg1();
|
||||||
@ -568,7 +556,6 @@ struct PrecisionChange {
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
static inline void HtoD(vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
|
static inline void HtoD(vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
|
||||||
|
|
||||||
svbool_t pg1h = acle<uint16_t>::pg1();
|
svbool_t pg1h = acle<uint16_t>::pg1();
|
||||||
svbool_t pg1d = acle<double>::pg1();
|
svbool_t pg1d = acle<double>::pg1();
|
||||||
typename acle<uint16_t>::vt h_v = svld1(pg1h, (typename acle<uint16_t>::pt*)&h.v);
|
typename acle<uint16_t>::vt h_v = svld1(pg1h, (typename acle<uint16_t>::pt*)&h.v);
|
||||||
@ -600,7 +587,6 @@ struct Exchange{
|
|||||||
// Exchange0 is valid for arbitrary SVE vector length
|
// Exchange0 is valid for arbitrary SVE vector length
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void Exchange0(vec<T> &out1, vec<T> &out2, const vec<T> &in1, const vec<T> &in2){
|
static inline void Exchange0(vec<T> &out1, vec<T> &out2, const vec<T> &in1, const vec<T> &in2){
|
||||||
|
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a1_v = svld1(pg1, in1.v);
|
typename acle<T>::vt a1_v = svld1(pg1, in1.v);
|
||||||
typename acle<T>::vt a2_v = svld1(pg1, in2.v);
|
typename acle<T>::vt a2_v = svld1(pg1, in2.v);
|
||||||
@ -612,55 +598,35 @@ struct Exchange{
|
|||||||
svst1(pg1, out2.v, r2_v);
|
svst1(pg1, out2.v, r2_v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* FIXME use svcreate etc. or switch to table lookup directly
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void Exchange1(vec<T> &out1, vec<T> &out2, const vec<T> &in1, const vec<T> &in2){
|
static inline void Exchange1(vec<T> &out1, vec<T> &out2, const vec<T> &in1, const vec<T> &in2){
|
||||||
|
// this one is tricky; svtrn2q* from SVE2 fits best, but it is not available in SVE1
|
||||||
|
// alternative: use 4-el structure; expect translation into ldp + stp -> SFI
|
||||||
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
|
const vec<typename acle<T>::uint> tbl_exch1a = acle<T>::tbl_exch1a();
|
||||||
|
const vec<typename acle<T>::uint> tbl_exch1b = acle<T>::tbl_exch1b();
|
||||||
|
const vec<typename acle<T>::uint> tbl_exch1c = acle<T>::tbl_exch1c();
|
||||||
|
|
||||||
svbool_t pg4 = acle<double>::pg4();
|
typename acle<T>::svuint tbl_exch1a_v = svld1(pg1, tbl_exch1a.v);
|
||||||
typename acle<double>::vt4 in1_v4 = svld4(pg4, (typename acle<double>::pt*)in1.v);
|
typename acle<T>::svuint tbl_exch1b_v = svld1(pg1, tbl_exch1b.v);
|
||||||
typename acle<double>::vt4 in2_v4 = svld4(pg4, (typename acle<double>::pt*)in2.v);
|
typename acle<T>::svuint tbl_exch1c_v = svld1(pg1, tbl_exch1c.v);
|
||||||
typename acle<double>::vt4 out1_v4;
|
|
||||||
typename acle<double>::vt4 out2_v4;
|
typename acle<T>::vt in1_v = svld1(pg1, in1.v);
|
||||||
out1_v4.v0 = in1_v4.v0;
|
typename acle<T>::vt in2_v = svld1(pg1, in2.v);
|
||||||
out1_v4.v1 = in1_v4.v1;
|
|
||||||
out1_v4.v2 = in2_v4.v0;
|
typename acle<T>::vt a1_v = svtbl(in1_v, tbl_exch1a_v);
|
||||||
out1_v4.v3 = in2_v4.v1;
|
typename acle<T>::vt a2_v = svtbl(in2_v, tbl_exch1b_v);
|
||||||
out2_v4.v0 = in1_v4.v2;
|
typename acle<T>::vt b1_v = svext(a2_v, a1_v, (uint64_t)(W<T>::r / 2u));
|
||||||
out2_v4.v1 = in1_v4.v3;
|
typename acle<T>::vt b2_v = svext(a1_v, a2_v, (uint64_t)(W<T>::r / 2u));
|
||||||
out2_v4.v2 = in2_v4.v2;
|
typename acle<T>::vt out1_v = svtbl(b1_v, tbl_exch1c_v);
|
||||||
out2_v4.v3 = in2_v4.v3;
|
typename acle<T>::vt out2_v = svtbl(b2_v, tbl_exch1a_v);
|
||||||
svst4(pg4, (typename acle<double>::pt*)out1.v, out1_v4);
|
|
||||||
svst4(pg4, (typename acle<double>::pt*)out2.v, out2_v4);
|
svst1(pg1, out1.v, out1_v);
|
||||||
|
svst1(pg1, out2.v, out2_v);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
#define VECTOR_FOR(i, w, inc) \
|
|
||||||
for (unsigned int i = 0; i < w; i += inc)
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static inline void Exchange1(vec<T> &out1, vec<T> &out2, const vec<T> &in1, const vec<T> &in2){
|
|
||||||
// FIXME
|
|
||||||
const int n = 1;
|
|
||||||
const int w = W<T>::r;
|
|
||||||
unsigned int mask = w >> (n + 1);
|
|
||||||
// std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;
|
|
||||||
VECTOR_FOR(i, w, 1) {
|
|
||||||
int j1 = i&(~mask);
|
|
||||||
if ( (i&mask) == 0 ) { out1.v[i]=in1.v[j1];}
|
|
||||||
else { out1.v[i]=in2.v[j1];}
|
|
||||||
int j2 = i|mask;
|
|
||||||
if ( (i&mask) == 0 ) { out2.v[i]=in1.v[j2];}
|
|
||||||
else { out2.v[i]=in2.v[j2];}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef VECTOR_FOR
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void Exchange2(vec<T> &out1, vec<T> &out2, const vec<T> &in1, const vec<T> &in2){
|
static inline void Exchange2(vec<T> &out1, vec<T> &out2, const vec<T> &in1, const vec<T> &in2){
|
||||||
|
|
||||||
svbool_t pg1 = acle<double>::pg1();
|
svbool_t pg1 = acle<double>::pg1();
|
||||||
typename acle<double>::vt a1_v = svld1(pg1, (typename acle<double>::pt*)in1.v);
|
typename acle<double>::vt a1_v = svld1(pg1, (typename acle<double>::pt*)in1.v);
|
||||||
typename acle<double>::vt a2_v = svld1(pg1, (typename acle<double>::pt*)in2.v);
|
typename acle<double>::vt a2_v = svld1(pg1, (typename acle<double>::pt*)in2.v);
|
||||||
@ -671,7 +637,6 @@ struct Exchange{
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void Exchange3(vecf &out1, vecf &out2, const vecf &in1, const vecf &in2){
|
static inline void Exchange3(vecf &out1, vecf &out2, const vecf &in1, const vecf &in2){
|
||||||
|
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
typename acle<float>::vt a1_v = svld1(pg1, in1.v);
|
typename acle<float>::vt a1_v = svld1(pg1, in1.v);
|
||||||
typename acle<float>::vt a2_v = svld1(pg1, in2.v);
|
typename acle<float>::vt a2_v = svld1(pg1, in2.v);
|
||||||
@ -692,17 +657,16 @@ struct Permute{
|
|||||||
// Permute0 is valid for any SVE vector width
|
// Permute0 is valid for any SVE vector width
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline vec<T> Permute0(vec<T> in) {
|
static inline vec<T> Permute0(vec<T> in) {
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, in.v);
|
typename acle<T>::vt a_v = svld1(pg1, in.v);
|
||||||
typename acle<T>::vt r_v = svext(a_v, a_v, (uint64_t)(W<T>::r / 2u));
|
typename acle<T>::vt r_v = svext(a_v, a_v, (uint64_t)(W<T>::r / 2u));
|
||||||
svst1(pg1, out.v, r_v);
|
svst1(pg1, out.v, r_v);
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline vecd Permute1(vecd in) {
|
static inline vecd Permute1(vecd in) {
|
||||||
|
|
||||||
vecd out;
|
vecd out;
|
||||||
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl1();
|
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl1();
|
||||||
svbool_t pg1 = acle<double>::pg1();
|
svbool_t pg1 = acle<double>::pg1();
|
||||||
@ -715,7 +679,6 @@ struct Permute{
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline vecf Permute1(vecf in) {
|
static inline vecf Permute1(vecf in) {
|
||||||
|
|
||||||
vecf out;
|
vecf out;
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl1();
|
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl1();
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
@ -728,7 +691,6 @@ struct Permute{
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline vecd Permute2(vecd in) {
|
static inline vecd Permute2(vecd in) {
|
||||||
|
|
||||||
vecd out;
|
vecd out;
|
||||||
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl_swap();
|
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl_swap();
|
||||||
svbool_t pg1 = acle<double>::pg1();
|
svbool_t pg1 = acle<double>::pg1();
|
||||||
@ -741,7 +703,6 @@ struct Permute{
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline vecf Permute2(vecf in) {
|
static inline vecf Permute2(vecf in) {
|
||||||
|
|
||||||
vecf out;
|
vecf out;
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl2();
|
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl2();
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
@ -754,7 +715,6 @@ struct Permute{
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline vecf Permute3(vecf in) {
|
static inline vecf Permute3(vecf in) {
|
||||||
|
|
||||||
vecf out;
|
vecf out;
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl_swap();
|
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl_swap();
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
@ -775,7 +735,6 @@ struct Permute{
|
|||||||
struct Rotate{
|
struct Rotate{
|
||||||
|
|
||||||
template <int n, typename T> static inline vec<T> tRotate(vec<T> in){
|
template <int n, typename T> static inline vec<T> tRotate(vec<T> in){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
typename acle<T>::vt a_v = svld1(pg1, in.v);
|
typename acle<T>::vt a_v = svld1(pg1, in.v);
|
||||||
@ -833,7 +792,6 @@ struct Reduce{
|
|||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template <>
|
template <>
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
||||||
|
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
svbool_t pg_even = acle<float>::pg_even();
|
svbool_t pg_even = acle<float>::pg_even();
|
||||||
svbool_t pg_odd = acle<float>::pg_odd();
|
svbool_t pg_odd = acle<float>::pg_odd();
|
||||||
@ -848,7 +806,6 @@ inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
|||||||
//Real float Reduce
|
//Real float Reduce
|
||||||
template <>
|
template <>
|
||||||
inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
||||||
|
|
||||||
svbool_t pg1 = acle<float>::pg1();
|
svbool_t pg1 = acle<float>::pg1();
|
||||||
typename acle<float>::vt a_v = svld1(pg1, in.v);
|
typename acle<float>::vt a_v = svld1(pg1, in.v);
|
||||||
float a = svred(pg1, a_v);
|
float a = svred(pg1, a_v);
|
||||||
@ -859,7 +816,6 @@ inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
|||||||
//Complex double Reduce
|
//Complex double Reduce
|
||||||
template <>
|
template <>
|
||||||
inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
||||||
|
|
||||||
svbool_t pg1 = acle<double>::pg1();
|
svbool_t pg1 = acle<double>::pg1();
|
||||||
svbool_t pg_even = acle<double>::pg_even();
|
svbool_t pg_even = acle<double>::pg_even();
|
||||||
svbool_t pg_odd = acle<double>::pg_odd();
|
svbool_t pg_odd = acle<double>::pg_odd();
|
||||||
@ -873,7 +829,6 @@ inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
|||||||
//Real double Reduce
|
//Real double Reduce
|
||||||
template <>
|
template <>
|
||||||
inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
||||||
|
|
||||||
svbool_t pg1 = acle<double>::pg1();
|
svbool_t pg1 = acle<double>::pg1();
|
||||||
typename acle<double>::vt a_v = svld1(pg1, in.v);
|
typename acle<double>::vt a_v = svld1(pg1, in.v);
|
||||||
double a = svred(pg1, a_v);
|
double a = svred(pg1, a_v);
|
||||||
@ -884,7 +839,6 @@ inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
|||||||
//Integer Reduce
|
//Integer Reduce
|
||||||
template <>
|
template <>
|
||||||
inline Integer Reduce<Integer, veci>::operator()(veci in){
|
inline Integer Reduce<Integer, veci>::operator()(veci in){
|
||||||
|
|
||||||
svbool_t pg1 = acle<Integer>::pg1();
|
svbool_t pg1 = acle<Integer>::pg1();
|
||||||
typename acle<Integer>::vt a_v = svld1(pg1, in.v);
|
typename acle<Integer>::vt a_v = svld1(pg1, in.v);
|
||||||
Integer a = svred(pg1, a_v);
|
Integer a = svred(pg1, a_v);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user