mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
clean up; Exch1 dp integrate, tested, working
This commit is contained in:
parent
832485699f
commit
f8c0a59221
@ -400,7 +400,7 @@ struct MultComplex{
|
|||||||
struct MultAddComplex{
|
struct MultAddComplex{
|
||||||
// Complex a*b+c
|
// Complex a*b+c
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline vec<T> mac(const vec<T> &a, const vec<T> b, const vec<T> c){
|
inline mac(const vec<T> &a, const vec<T> b, const vec<T> c){
|
||||||
|
|
||||||
vec<T> out;
|
vec<T> out;
|
||||||
svbool_t pg1 = acle<T>::pg1();
|
svbool_t pg1 = acle<T>::pg1();
|
||||||
|
@ -107,6 +107,18 @@ struct acle<double>{
|
|||||||
const ulutd t = { .s = {2, 3, 0, 1, 6, 7, 4, 5} };
|
const ulutd t = { .s = {2, 3, 0, 1, 6, 7, 4, 5} };
|
||||||
return t.v;
|
return t.v;
|
||||||
}
|
}
|
||||||
|
static inline lutd tbl_exch1a(){ // Exchange1
|
||||||
|
const ulutd t = { .s = {0, 1, 4, 5, 2, 3, 6, 7} };
|
||||||
|
return t.v;
|
||||||
|
}
|
||||||
|
static inline lutd tbl_exch1b(){ // Exchange1
|
||||||
|
const ulutd t = { .s = {2, 3, 6, 7, 0, 1, 4, 5} };
|
||||||
|
return t.v;
|
||||||
|
}
|
||||||
|
static inline lutd tbl_exch1c(){ // Exchange1
|
||||||
|
const ulutd t = { .s = {4, 5, 0, 1, 6, 7, 2, 3} };
|
||||||
|
return t.v;
|
||||||
|
}
|
||||||
static inline pred pg1(){return svptrue_b64();}
|
static inline pred pg1(){return svptrue_b64();}
|
||||||
static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
||||||
static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
||||||
@ -369,14 +381,14 @@ struct MultComplex{
|
|||||||
struct MultAddComplex{
|
struct MultAddComplex{
|
||||||
// Complex a*b+c
|
// Complex a*b+c
|
||||||
// Complex float
|
// Complex float
|
||||||
inline vecf mac(vecf &a, vecf b, vecf c){
|
inline void mac(vecf &a, vecf b, vecf c){
|
||||||
pred pg1 = acle<float>::pg1();
|
pred pg1 = acle<float>::pg1();
|
||||||
// using FCMLA
|
// using FCMLA
|
||||||
vecf r_v = svcmla_x(pg1, c, a, b, 0);
|
vecf r_v = svcmla_x(pg1, c, a, b, 0);
|
||||||
a = svcmla_x(pg1, r_v, a, b, 90);
|
a = svcmla_x(pg1, r_v, a, b, 90);
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vecd mac(vecd &a, vecd b, vecd c){
|
inline void mac(vecd &a, vecd b, vecd c){
|
||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
// using FCMLA
|
// using FCMLA
|
||||||
vecd r_v = svcmla_x(pg1, c, a, b, 0);
|
vecd r_v = svcmla_x(pg1, c, a, b, 0);
|
||||||
@ -401,13 +413,13 @@ struct Conj{
|
|||||||
// Complex float
|
// Complex float
|
||||||
inline vecf operator()(vecf a){
|
inline vecf operator()(vecf a){
|
||||||
pred pg_odd = acle<float>::pg_odd();
|
pred pg_odd = acle<float>::pg_odd();
|
||||||
//return svneg_x(pg_odd, a); this is unsafe!
|
//return svneg_x(pg_odd, a); this is unsafe
|
||||||
return svneg_m(a, pg_odd, a);
|
return svneg_m(a, pg_odd, a);
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vecd operator()(vecd a){
|
inline vecd operator()(vecd a){
|
||||||
pred pg_odd = acle<double>::pg_odd();
|
pred pg_odd = acle<double>::pg_odd();
|
||||||
//return svneg_x(pg_odd, a); this is unsafe!
|
//return svneg_x(pg_odd, a); this is unsafe
|
||||||
return svneg_m(a, pg_odd, a);
|
return svneg_m(a, pg_odd, a);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -488,17 +500,13 @@ struct PrecisionChange {
|
|||||||
static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
|
static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
|
||||||
pred pg1d = acle<double>::pg1();
|
pred pg1d = acle<double>::pg1();
|
||||||
pred pg1h = acle<uint16_t>::pg1();
|
pred pg1h = acle<uint16_t>::pg1();
|
||||||
vecd a_v = svld1(pg1d, a.v);
|
vech ha_v = svcvt_f16_x(pg1d, a);
|
||||||
vecd b_v = svld1(pg1d, b.v);
|
vech hb_v = svcvt_f16_x(pg1d, b);
|
||||||
vecd c_v = svld1(pg1d, c.v);
|
vech hc_v = svcvt_f16_x(pg1d, c);
|
||||||
vecd d_v = svld1(pg1d, d.v);
|
vech hd_v = svcvt_f16_x(pg1d, d);
|
||||||
vech ha_v = svcvt_f16_x(pg1d, a_v);
|
|
||||||
vech hb_v = svcvt_f16_x(pg1d, b_v);
|
|
||||||
vech hc_v = svcvt_f16_x(pg1d, c_v);
|
|
||||||
vech hd_v = svcvt_f16_x(pg1d, d_v);
|
|
||||||
vech hab_v = svuzp1(ha_v, hb_v);
|
vech hab_v = svuzp1(ha_v, hb_v);
|
||||||
vech hcd_v = svuzp1(hc_v, hd_v);
|
vech hcd_v = svuzp1(hc_v, hd_v);
|
||||||
return r_v = svuzp1(hab_v, hcd_v);
|
return svuzp1(hab_v, hcd_v);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
vecf sa,sb;
|
vecf sa,sb;
|
||||||
@ -510,16 +518,16 @@ struct PrecisionChange {
|
|||||||
static inline void HtoD(vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
|
static inline void HtoD(vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
|
||||||
pred pg1h = acle<uint16_t>::pg1();
|
pred pg1h = acle<uint16_t>::pg1();
|
||||||
pred pg1d = acle<double>::pg1();
|
pred pg1d = acle<double>::pg1();
|
||||||
vech sa_v = svzip1(h_v, h_v);
|
vech sa_v = svzip1(h, h);
|
||||||
vech sb_v = svzip2(h_v, h_v);
|
vech sb_v = svzip2(h, h);
|
||||||
vech da_v = svzip1(sa_v, sa_v);
|
vech da_v = svzip1(sa_v, sa_v);
|
||||||
vech db_v = svzip2(sa_v, sa_v);
|
vech db_v = svzip2(sa_v, sa_v);
|
||||||
vech dc_v = svzip1(sb_v, sb_v);
|
vech dc_v = svzip1(sb_v, sb_v);
|
||||||
vech dd_v = svzip2(sb_v, sb_v);
|
vech dd_v = svzip2(sb_v, sb_v);
|
||||||
vecd a = svcvt_f64_x(pg1d, da_v);
|
a = svcvt_f64_x(pg1d, da_v);
|
||||||
vecd b = svcvt_f64_x(pg1d, db_v);
|
b = svcvt_f64_x(pg1d, db_v);
|
||||||
vecd c = svcvt_f64_x(pg1d, dc_v);
|
c = svcvt_f64_x(pg1d, dc_v);
|
||||||
vecd d = svcvt_f64_x(pg1d, dd_v);
|
d = svcvt_f64_x(pg1d, dd_v);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
vecf sa,sb;
|
vecf sa,sb;
|
||||||
@ -579,26 +587,17 @@ struct Exchange{
|
|||||||
out2 = svext(in1, r2_v, (uint64_t)4u);
|
out2 = svext(in1, r2_v, (uint64_t)4u);
|
||||||
}
|
}
|
||||||
static inline void Exchange1(vecd &out1, vecd &out2, vecd in1, vecd in2){
|
static inline void Exchange1(vecd &out1, vecd &out2, vecd in1, vecd in2){
|
||||||
// FIXME
|
// this one is tricky; svtrn2q* from SVE2 fits best, but it is not available in SVE1
|
||||||
uvecd v1 = { .v = in1 };
|
lutd tbl_exch1a = acle<double>::tbl_exch1a();
|
||||||
uvecd v2 = { .v = in2 };
|
lutd tbl_exch1b = acle<double>::tbl_exch1b();
|
||||||
uvecd o1, o2;
|
lutd tbl_exch1c = acle<double>::tbl_exch1c();
|
||||||
|
|
||||||
const int n = 1;
|
vecd a1_v = svtbl(in1, tbl_exch1a);
|
||||||
const int w = 8; // w = W<T>::r
|
vecd a2_v = svtbl(in2, tbl_exch1b);
|
||||||
unsigned int mask = w >> (n + 1);
|
vecd b1_v = svext(a2_v, a1_v, (uint64_t)4u);
|
||||||
// std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;
|
vecd b2_v = svext(a1_v, a2_v, (uint64_t)4u);
|
||||||
VECTOR_FOR(i, w, 1) {
|
out1 = svtbl(b1_v, tbl_exch1c);
|
||||||
int j1 = i&(~mask);
|
out2 = svtbl(b2_v, tbl_exch1a);
|
||||||
if ( (i&mask) == 0 ) { o1.s[i]=v1.s[j1];}
|
|
||||||
else { o1.s[i]=v2.s[j1];}
|
|
||||||
int j2 = i|mask;
|
|
||||||
if ( (i&mask) == 0 ) { o2.s[i]=v1.s[j2];}
|
|
||||||
else { o2.s[i]=v2.s[j2];}
|
|
||||||
}
|
|
||||||
|
|
||||||
out1 = o1.v;
|
|
||||||
out2 = o2.v;
|
|
||||||
}
|
}
|
||||||
static inline void Exchange2(vecd &out1, vecd &out2, vecd in1, vecd in2){
|
static inline void Exchange2(vecd &out1, vecd &out2, vecd in1, vecd in2){
|
||||||
out1 = svtrn1(in1, in2);
|
out1 = svtrn1(in1, in2);
|
||||||
@ -615,7 +614,7 @@ struct Exchange{
|
|||||||
struct Permute{
|
struct Permute{
|
||||||
// float
|
// float
|
||||||
static inline vecf Permute0(vecf in) {
|
static inline vecf Permute0(vecf in) {
|
||||||
return svext(in, in, (uint64_t)(16u / 2u));
|
return svext(in, in, (uint64_t)8u);
|
||||||
}
|
}
|
||||||
static inline vecf Permute1(vecf in) {
|
static inline vecf Permute1(vecf in) {
|
||||||
lutf tbl_swap = acle<float>::tbl1();
|
lutf tbl_swap = acle<float>::tbl1();
|
||||||
@ -713,8 +712,6 @@ struct Reduce{
|
|||||||
};
|
};
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template <>
|
template <>
|
||||||
// inline Grid::ComplexF Reduce<Grid::ComplexF, svfloat32_t>::operator()(svfloat32_t in){
|
|
||||||
//inline Grid::ComplexF Reduce<Grid::ComplexF, __SVFloat32_t>::operator()(__SVFloat32_t in){
|
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
||||||
pred pg_even = acle<float>::pg_even();
|
pred pg_even = acle<float>::pg_even();
|
||||||
pred pg_odd = acle<float>::pg_odd();
|
pred pg_odd = acle<float>::pg_odd();
|
||||||
@ -724,16 +721,12 @@ inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
|
|||||||
}
|
}
|
||||||
//Real float Reduce
|
//Real float Reduce
|
||||||
template <>
|
template <>
|
||||||
//inline Grid::RealF Reduce<Grid::RealF, svfloat32_t>::operator()(svfloat32_t in){
|
|
||||||
//inline Grid::RealF Reduce<Grid::RealF, __SVFloat32_t>::operator()(__SVFloat32_t in){
|
|
||||||
inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
|
||||||
pred pg1 = acle<float>::pg1();
|
pred pg1 = acle<float>::pg1();
|
||||||
return svred(pg1, in);
|
return svred(pg1, in);
|
||||||
}
|
}
|
||||||
//Complex double Reduce
|
//Complex double Reduce
|
||||||
template <>
|
template <>
|
||||||
//inline Grid::ComplexD Reduce<Grid::ComplexD, svfloat64_t>::operator()(svfloat64_t in){
|
|
||||||
//inline Grid::ComplexD Reduce<Grid::ComplexD, __SVFloat64_t>::operator()(__SVFloat64_t in){
|
|
||||||
inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
||||||
pred pg_even = acle<double>::pg_even();
|
pred pg_even = acle<double>::pg_even();
|
||||||
pred pg_odd = acle<double>::pg_odd();
|
pred pg_odd = acle<double>::pg_odd();
|
||||||
@ -743,16 +736,12 @@ inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
|
|||||||
}
|
}
|
||||||
//Real double Reduce
|
//Real double Reduce
|
||||||
template <>
|
template <>
|
||||||
//inline Grid::RealD Reduce<Grid::RealD, svfloat64_t>::operator()(svfloat64_t in){
|
|
||||||
//inline Grid::RealD Reduce<Grid::RealD, __SVFloat64_t>::operator()(__SVFloat64_t in){
|
|
||||||
inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
|
||||||
pred pg1 = acle<double>::pg1();
|
pred pg1 = acle<double>::pg1();
|
||||||
return svred(pg1, in);
|
return svred(pg1, in);
|
||||||
}
|
}
|
||||||
//Integer Reduce
|
//Integer Reduce
|
||||||
template <>
|
template <>
|
||||||
//inline Integer Reduce<Integer, svuint32_t>::operator()(svuint32_t in){
|
|
||||||
//inline Integer Reduce<Integer, __SVUint32_t>::operator()(__SVUint32_t in){
|
|
||||||
inline Integer Reduce<Integer, veci>::operator()(veci in){
|
inline Integer Reduce<Integer, veci>::operator()(veci in){
|
||||||
pred pg1 = acle<Integer>::pg1();
|
pred pg1 = acle<Integer>::pg1();
|
||||||
return svred(pg1, in);
|
return svred(pg1, in);
|
||||||
|
Loading…
Reference in New Issue
Block a user