mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
first attempt to introduce tables using fixed-size; still incomplete
This commit is contained in:
parent
acff9d6ed2
commit
2b81cbe2c2
@ -49,6 +49,8 @@ typedef svfloat16_t vech __attribute__((arm_sve_vector_bits(512)));
|
|||||||
typedef svfloat32_t vecf __attribute__((arm_sve_vector_bits(512)));
|
typedef svfloat32_t vecf __attribute__((arm_sve_vector_bits(512)));
|
||||||
typedef svfloat64_t vecd __attribute__((arm_sve_vector_bits(512)));
|
typedef svfloat64_t vecd __attribute__((arm_sve_vector_bits(512)));
|
||||||
typedef svuint32_t veci __attribute__((arm_sve_vector_bits(512)));
|
typedef svuint32_t veci __attribute__((arm_sve_vector_bits(512)));
|
||||||
|
typedef svuint32_t lutf __attribute__((arm_sve_vector_bits(512))); // LUTs for float
|
||||||
|
typedef svuint64_t lutd __attribute__((arm_sve_vector_bits(512))); // LUTs for double
|
||||||
#else
|
#else
|
||||||
#pragma error("Oops. Wrong or undefined SVE vector size?")
|
#pragma error("Oops. Wrong or undefined SVE vector size?")
|
||||||
#endif /* __ARM_FEATURE_SVE_BITS */
|
#endif /* __ARM_FEATURE_SVE_BITS */
|
||||||
@ -109,21 +111,24 @@ struct acle<double>{
|
|||||||
typedef svuint64_t svuint;
|
typedef svuint64_t svuint;
|
||||||
|
|
||||||
static inline pred pg1(){return svptrue_b64();}
|
static inline pred pg1(){return svptrue_b64();}
|
||||||
static inline vec<uint64_t> tbl_swap(){
|
static inline lutd tbl_swap(){
|
||||||
const vec<uint64_t> t = {1, 0, 3, 2, 5, 4, 7, 6};
|
const uint64_t t[8] = {1, 0, 3, 2, 5, 4, 7, 6};
|
||||||
return t;
|
pred pg1 = svptrue_b64();
|
||||||
|
return svld1(pg1, t);
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl0(){
|
static inline lutd tbl0(){
|
||||||
const vec<uint64_t> t = {4, 5, 6, 7, 0, 1, 2, 3};
|
const uint64_t t[8] = {4, 5, 6, 7, 0, 1, 2, 3};
|
||||||
return t;
|
pred pg1 = svptrue_b64();
|
||||||
|
return svld1(pg1, t);
|
||||||
}
|
}
|
||||||
static inline vec<uint64_t> tbl1(){
|
static inline lutd tbl1(){
|
||||||
const vec<uint64_t> t = {2, 3, 0, 1, 6, 7, 4, 5};
|
const uint64_t t[8] = {2, 3, 0, 1, 6, 7, 4, 5};
|
||||||
return t;
|
pred pg1 = svptrue_b64();
|
||||||
|
return svld1(pg1, t);
|
||||||
}
|
}
|
||||||
static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
||||||
static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
||||||
static inline svfloat64_t zero(){return svdup_f64(0.);}
|
static inline vecd zero(){return svdup_f64(0.);}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -136,25 +141,29 @@ struct acle<float>{
|
|||||||
|
|
||||||
static inline pred pg1(){return svptrue_b32();}
|
static inline pred pg1(){return svptrue_b32();}
|
||||||
// exchange neighboring elements
|
// exchange neighboring elements
|
||||||
static inline vec<uint32_t> tbl_swap(){
|
static inline lutf tbl_swap(){
|
||||||
const vec<uint32_t> t = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
|
const uint32_t t[16] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
|
||||||
return t;
|
pred pg1 = svptrue_b32();
|
||||||
|
return svld1(pg1, t);
|
||||||
}
|
}
|
||||||
static inline vec<uint32_t> tbl0(){
|
static inline lutf tbl0(){
|
||||||
const vec<uint32_t> t = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
|
const uint32_t t[16] = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||||
return t;
|
pred pg1 = svptrue_b32();
|
||||||
|
return svld1(pg1, t);
|
||||||
}
|
}
|
||||||
static inline vec<uint32_t> tbl1(){
|
static inline vec<uint32_t> tbl1(){
|
||||||
const vec<uint32_t> t = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
|
const lutf = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
|
||||||
return t;
|
pred pg1 = svptrue_b32();
|
||||||
|
return svld1(pg1, t);
|
||||||
}
|
}
|
||||||
static inline vec<uint32_t> tbl2(){
|
static inline vec<uint32_t> tbl2(){
|
||||||
const vec<uint32_t> t = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
|
const lutf = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
|
||||||
return t;
|
pred pg1 = svptrue_b32();
|
||||||
|
return svld1(pg1, t);
|
||||||
}
|
}
|
||||||
static inline pred pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
static inline pred pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
||||||
static inline pred pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
static inline pred pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
||||||
static inline svfloat32_t zero(){return svdup_f32(0.);}
|
static inline vecf zero(){return svdup_f32(0.);}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -167,7 +176,7 @@ struct acle<uint16_t>{
|
|||||||
static inline pred pg1(){return svptrue_b16();}
|
static inline pred pg1(){return svptrue_b16();}
|
||||||
static inline pred pg_even(){return svzip1_b16(svptrue_b16(), svpfalse_b());}
|
static inline pred pg_even(){return svzip1_b16(svptrue_b16(), svpfalse_b());}
|
||||||
static inline pred pg_odd() {return svzip1_b16(svpfalse_b(), svptrue_b16());}
|
static inline pred pg_odd() {return svzip1_b16(svpfalse_b(), svptrue_b16());}
|
||||||
static inline svfloat16_t zero(){return svdup_f16(0.);}
|
static inline vech zero(){return svdup_f16(0.);}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -180,7 +189,6 @@ struct acle<Integer>{
|
|||||||
|
|
||||||
//static inline svbool_t pg1(){return svptrue_b16();}
|
//static inline svbool_t pg1(){return svptrue_b16();}
|
||||||
static inline pred pg1(){return svptrue_b32();}
|
static inline pred pg1(){return svptrue_b32();}
|
||||||
static inline pred pg2(){return svptrue_pat_b32(SV_VL8);}
|
|
||||||
static inline pred pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
static inline pred pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
||||||
static inline pred pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
static inline pred pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
||||||
};
|
};
|
||||||
@ -416,24 +424,20 @@ struct Conj{
|
|||||||
struct TimesMinusI{
|
struct TimesMinusI{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline vecf operator()(vecf a, vecf b){
|
inline vecf operator()(vecf a, vecf b){
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl_swap();
|
lutf tbl_swap = acle<float>::tbl_swap();
|
||||||
pred pg1 = acle<float>::pg1();
|
pred pg1 = acle<float>::pg1();
|
||||||
pred pg_odd = acle<float>::pg_odd();
|
pred pg_odd = acle<float>::pg_odd();
|
||||||
|
|
||||||
typename acle<float>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
vecf a_v = svtbl(a, tbl_swap);
|
||||||
vecf a_v = svld1(pg1, a.v);
|
|
||||||
a_v = svtbl(a_v, tbl_swap_v);
|
|
||||||
return svneg_x(pg_odd, a_v);
|
return svneg_x(pg_odd, a_v);
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vecd operator()(vecd a, vecd b){
|
inline vecd operator()(vecd a, vecd b){
|
||||||
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl_swap();
|
lutd tbl_swap = acle<double>::tbl_swap();
|
||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
pred pg_odd = acle<double>::pg_odd();
|
pred pg_odd = acle<double>::pg_odd();
|
||||||
|
|
||||||
typename acle<double>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
vecd a_v = svtbl(a, tbl_swap);
|
||||||
vecd a_v = svld1(pg1, a.v);
|
|
||||||
a_v = svtbl(a_v, tbl_swap_v);
|
|
||||||
return svneg_x(pg_odd, a_v);
|
return svneg_x(pg_odd, a_v);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -441,24 +445,20 @@ struct TimesMinusI{
|
|||||||
struct TimesI{
|
struct TimesI{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline vecf operator()(vecf a, vecf b){
|
inline vecf operator()(vecf a, vecf b){
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<T>::tbl_swap();
|
lutf tbl_swap = acle<T>::tbl_swap();
|
||||||
pred pg1 = acle<float>::pg1();
|
pred pg1 = acle<float>::pg1();
|
||||||
pred pg_even = acle<float>::pg_even();
|
pred pg_even = acle<float>::pg_even();
|
||||||
|
|
||||||
typename acle<float>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
vecf a_v = svtbl(a, tbl_swap);
|
||||||
vecf a_v = svld1(pg1, a.v);
|
|
||||||
a_v = svtbl(a_v, tbl_swap_v);
|
|
||||||
return svneg_x(pg_even, a_v);
|
return svneg_x(pg_even, a_v);
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vecd operator()(vecd a, vecd b){
|
inline vecd operator()(vecd a, vecd b){
|
||||||
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl_swap();
|
lutd tbl_swap = acle<double>::tbl_swap();
|
||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
pred pg_even = acle<double>::pg_even();
|
pred pg_even = acle<double>::pg_even();
|
||||||
|
|
||||||
typename acle<double>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
vecd a_v = svtbl(a, tbl_swap);
|
||||||
vecd a_v = svld1(pg1, a.v);
|
|
||||||
a_v = svtbl(a_v, tbl_swap_v);
|
|
||||||
return svneg_x(pg_even, a_v);
|
return svneg_x(pg_even, a_v);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -642,22 +642,16 @@ struct Permute{
|
|||||||
return svext(in, in, (uint64_t)(16u / 2u));
|
return svext(in, in, (uint64_t)(16u / 2u));
|
||||||
}
|
}
|
||||||
static inline vecf Permute1(vecf in) {
|
static inline vecf Permute1(vecf in) {
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl1();
|
lutf tbl_swap = acle<float>::tbl1();
|
||||||
pred pg1 = acle<float>::pg1();
|
return svtbl(in, tbl_swap);
|
||||||
typename acle<float>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
|
||||||
return svtbl(in, tbl_swap_v);
|
|
||||||
}
|
}
|
||||||
static inline vecf Permute2(vecf in) {
|
static inline vecf Permute2(vecf in) {
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl2();
|
lutf tbl_swap = acle<float>::tbl2();
|
||||||
pred pg1 = acle<float>::pg1();
|
return svtbl(in, tbl_swap);
|
||||||
typename acle<float>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
|
||||||
return svtbl(in, tbl_swap_v);
|
|
||||||
}
|
}
|
||||||
static inline vecf Permute3(vecf in) {
|
static inline vecf Permute3(vecf in) {
|
||||||
const vec<typename acle<float>::uint> tbl_swap = acle<float>::tbl_swap();
|
lutf tbl_swap = acle<float>::tbl_swap();
|
||||||
pred pg1 = acle<float>::pg1();
|
return svtbl(in, tbl_swap);
|
||||||
typename acle<float>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
|
||||||
return svtbl(in, tbl_swap_v);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// double
|
// double
|
||||||
@ -665,17 +659,12 @@ struct Permute{
|
|||||||
return svext(in, in, (uint64_t)(8u / 2u));
|
return svext(in, in, (uint64_t)(8u / 2u));
|
||||||
}
|
}
|
||||||
static inline vecd Permute1(vecd in) {
|
static inline vecd Permute1(vecd in) {
|
||||||
|
lutd tbl_swap = acle<double>::tbl1();
|
||||||
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl1();
|
return svtbl(in, tbl_swap);
|
||||||
pred pg1 = acle<double>::pg1();
|
|
||||||
typename acle<double>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
|
||||||
return svtbl(in, tbl_swap_v);
|
|
||||||
}
|
}
|
||||||
static inline vecd Permute2(vecd in) {
|
static inline vecd Permute2(vecd in) {
|
||||||
const vec<typename acle<double>::uint> tbl_swap = acle<double>::tbl_swap();
|
lutd tbl_swap = acle<double>::tbl_swap();
|
||||||
pred pg1 = acle<double>::pg1();
|
return svtbl(in, tbl_swap);
|
||||||
typename acle<double>::svuint tbl_swap_v = svld1(pg1, tbl_swap.v);
|
|
||||||
return svtbl(in, tbl_swap_v);
|
|
||||||
}
|
}
|
||||||
static inline vecd Permute3(vecd in) {
|
static inline vecd Permute3(vecd in) {
|
||||||
return in;
|
return in;
|
||||||
@ -776,7 +765,6 @@ inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
|||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
return svred(pg1, in);
|
return svred(pg1, in);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Integer Reduce
|
//Integer Reduce
|
||||||
template <>
|
template <>
|
||||||
inline Integer Reduce<Integer, veci>::operator()(veci in){
|
inline Integer Reduce<Integer, veci>::operator()(veci in){
|
||||||
|
Loading…
x
Reference in New Issue
Block a user