1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

clean up; Exch1 dp integrate, tested, working

This commit is contained in:
nmeyer-ur 2020-05-21 02:48:14 +02:00
parent 832485699f
commit f8c0a59221
2 changed files with 39 additions and 50 deletions

View File

@ -400,7 +400,7 @@ struct MultComplex{
struct MultAddComplex{ struct MultAddComplex{
// Complex a*b+c // Complex a*b+c
template <typename T> template <typename T>
inline vec<T> mac(const vec<T> &a, const vec<T> b, const vec<T> c){ inline mac(const vec<T> &a, const vec<T> b, const vec<T> c){
vec<T> out; vec<T> out;
svbool_t pg1 = acle<T>::pg1(); svbool_t pg1 = acle<T>::pg1();

View File

@ -107,6 +107,18 @@ struct acle<double>{
const ulutd t = { .s = {2, 3, 0, 1, 6, 7, 4, 5} }; const ulutd t = { .s = {2, 3, 0, 1, 6, 7, 4, 5} };
return t.v; return t.v;
} }
static inline lutd tbl_exch1a(){ // Exchange1
const ulutd t = { .s = {0, 1, 4, 5, 2, 3, 6, 7} };
return t.v;
}
static inline lutd tbl_exch1b(){ // Exchange1
const ulutd t = { .s = {2, 3, 6, 7, 0, 1, 4, 5} };
return t.v;
}
static inline lutd tbl_exch1c(){ // Exchange1
const ulutd t = { .s = {4, 5, 0, 1, 6, 7, 2, 3} };
return t.v;
}
static inline pred pg1(){return svptrue_b64();} static inline pred pg1(){return svptrue_b64();}
static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());} static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());} static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
@ -369,14 +381,14 @@ struct MultComplex{
struct MultAddComplex{ struct MultAddComplex{
// Complex a*b+c // Complex a*b+c
// Complex float // Complex float
inline vecf mac(vecf &a, vecf b, vecf c){ inline void mac(vecf &a, vecf b, vecf c){
pred pg1 = acle<float>::pg1(); pred pg1 = acle<float>::pg1();
// using FCMLA // using FCMLA
vecf r_v = svcmla_x(pg1, c, a, b, 0); vecf r_v = svcmla_x(pg1, c, a, b, 0);
a = svcmla_x(pg1, r_v, a, b, 90); a = svcmla_x(pg1, r_v, a, b, 90);
} }
// Complex double // Complex double
inline vecd mac(vecd &a, vecd b, vecd c){ inline void mac(vecd &a, vecd b, vecd c){
pred pg1 = acle<double>::pg1(); pred pg1 = acle<double>::pg1();
// using FCMLA // using FCMLA
vecd r_v = svcmla_x(pg1, c, a, b, 0); vecd r_v = svcmla_x(pg1, c, a, b, 0);
@ -401,13 +413,13 @@ struct Conj{
// Complex float // Complex float
inline vecf operator()(vecf a){ inline vecf operator()(vecf a){
pred pg_odd = acle<float>::pg_odd(); pred pg_odd = acle<float>::pg_odd();
//return svneg_x(pg_odd, a); this is unsafe! //return svneg_x(pg_odd, a); this is unsafe
return svneg_m(a, pg_odd, a); return svneg_m(a, pg_odd, a);
} }
// Complex double // Complex double
inline vecd operator()(vecd a){ inline vecd operator()(vecd a){
pred pg_odd = acle<double>::pg_odd(); pred pg_odd = acle<double>::pg_odd();
//return svneg_x(pg_odd, a); this is unsafe! //return svneg_x(pg_odd, a); this is unsafe
return svneg_m(a, pg_odd, a); return svneg_m(a, pg_odd, a);
} }
}; };
@ -488,17 +500,13 @@ struct PrecisionChange {
static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) { static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {
pred pg1d = acle<double>::pg1(); pred pg1d = acle<double>::pg1();
pred pg1h = acle<uint16_t>::pg1(); pred pg1h = acle<uint16_t>::pg1();
vecd a_v = svld1(pg1d, a.v); vech ha_v = svcvt_f16_x(pg1d, a);
vecd b_v = svld1(pg1d, b.v); vech hb_v = svcvt_f16_x(pg1d, b);
vecd c_v = svld1(pg1d, c.v); vech hc_v = svcvt_f16_x(pg1d, c);
vecd d_v = svld1(pg1d, d.v); vech hd_v = svcvt_f16_x(pg1d, d);
vech ha_v = svcvt_f16_x(pg1d, a_v);
vech hb_v = svcvt_f16_x(pg1d, b_v);
vech hc_v = svcvt_f16_x(pg1d, c_v);
vech hd_v = svcvt_f16_x(pg1d, d_v);
vech hab_v = svuzp1(ha_v, hb_v); vech hab_v = svuzp1(ha_v, hb_v);
vech hcd_v = svuzp1(hc_v, hd_v); vech hcd_v = svuzp1(hc_v, hd_v);
return r_v = svuzp1(hab_v, hcd_v); return svuzp1(hab_v, hcd_v);
/* /*
vecf sa,sb; vecf sa,sb;
@ -510,16 +518,16 @@ struct PrecisionChange {
static inline void HtoD(vech h,vecd &a,vecd &b,vecd &c,vecd &d) { static inline void HtoD(vech h,vecd &a,vecd &b,vecd &c,vecd &d) {
pred pg1h = acle<uint16_t>::pg1(); pred pg1h = acle<uint16_t>::pg1();
pred pg1d = acle<double>::pg1(); pred pg1d = acle<double>::pg1();
vech sa_v = svzip1(h_v, h_v); vech sa_v = svzip1(h, h);
vech sb_v = svzip2(h_v, h_v); vech sb_v = svzip2(h, h);
vech da_v = svzip1(sa_v, sa_v); vech da_v = svzip1(sa_v, sa_v);
vech db_v = svzip2(sa_v, sa_v); vech db_v = svzip2(sa_v, sa_v);
vech dc_v = svzip1(sb_v, sb_v); vech dc_v = svzip1(sb_v, sb_v);
vech dd_v = svzip2(sb_v, sb_v); vech dd_v = svzip2(sb_v, sb_v);
vecd a = svcvt_f64_x(pg1d, da_v); a = svcvt_f64_x(pg1d, da_v);
vecd b = svcvt_f64_x(pg1d, db_v); b = svcvt_f64_x(pg1d, db_v);
vecd c = svcvt_f64_x(pg1d, dc_v); c = svcvt_f64_x(pg1d, dc_v);
vecd d = svcvt_f64_x(pg1d, dd_v); d = svcvt_f64_x(pg1d, dd_v);
/* /*
vecf sa,sb; vecf sa,sb;
@ -579,26 +587,17 @@ struct Exchange{
out2 = svext(in1, r2_v, (uint64_t)4u); out2 = svext(in1, r2_v, (uint64_t)4u);
} }
static inline void Exchange1(vecd &out1, vecd &out2, vecd in1, vecd in2){ static inline void Exchange1(vecd &out1, vecd &out2, vecd in1, vecd in2){
// FIXME // this one is tricky; svtrn2q* from SVE2 fits best, but it is not available in SVE1
uvecd v1 = { .v = in1 }; lutd tbl_exch1a = acle<double>::tbl_exch1a();
uvecd v2 = { .v = in2 }; lutd tbl_exch1b = acle<double>::tbl_exch1b();
uvecd o1, o2; lutd tbl_exch1c = acle<double>::tbl_exch1c();
const int n = 1; vecd a1_v = svtbl(in1, tbl_exch1a);
const int w = 8; // w = W<T>::r vecd a2_v = svtbl(in2, tbl_exch1b);
unsigned int mask = w >> (n + 1); vecd b1_v = svext(a2_v, a1_v, (uint64_t)4u);
// std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl; vecd b2_v = svext(a1_v, a2_v, (uint64_t)4u);
VECTOR_FOR(i, w, 1) { out1 = svtbl(b1_v, tbl_exch1c);
int j1 = i&(~mask); out2 = svtbl(b2_v, tbl_exch1a);
if ( (i&mask) == 0 ) { o1.s[i]=v1.s[j1];}
else { o1.s[i]=v2.s[j1];}
int j2 = i|mask;
if ( (i&mask) == 0 ) { o2.s[i]=v1.s[j2];}
else { o2.s[i]=v2.s[j2];}
}
out1 = o1.v;
out2 = o2.v;
} }
static inline void Exchange2(vecd &out1, vecd &out2, vecd in1, vecd in2){ static inline void Exchange2(vecd &out1, vecd &out2, vecd in1, vecd in2){
out1 = svtrn1(in1, in2); out1 = svtrn1(in1, in2);
@ -615,7 +614,7 @@ struct Exchange{
struct Permute{ struct Permute{
// float // float
static inline vecf Permute0(vecf in) { static inline vecf Permute0(vecf in) {
return svext(in, in, (uint64_t)(16u / 2u)); return svext(in, in, (uint64_t)8u);
} }
static inline vecf Permute1(vecf in) { static inline vecf Permute1(vecf in) {
lutf tbl_swap = acle<float>::tbl1(); lutf tbl_swap = acle<float>::tbl1();
@ -713,8 +712,6 @@ struct Reduce{
}; };
//Complex float Reduce //Complex float Reduce
template <> template <>
// inline Grid::ComplexF Reduce<Grid::ComplexF, svfloat32_t>::operator()(svfloat32_t in){
//inline Grid::ComplexF Reduce<Grid::ComplexF, __SVFloat32_t>::operator()(__SVFloat32_t in){
inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){ inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
pred pg_even = acle<float>::pg_even(); pred pg_even = acle<float>::pg_even();
pred pg_odd = acle<float>::pg_odd(); pred pg_odd = acle<float>::pg_odd();
@ -724,16 +721,12 @@ inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){
} }
//Real float Reduce //Real float Reduce
template <> template <>
//inline Grid::RealF Reduce<Grid::RealF, svfloat32_t>::operator()(svfloat32_t in){
//inline Grid::RealF Reduce<Grid::RealF, __SVFloat32_t>::operator()(__SVFloat32_t in){
inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){ inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){
pred pg1 = acle<float>::pg1(); pred pg1 = acle<float>::pg1();
return svred(pg1, in); return svred(pg1, in);
} }
//Complex double Reduce //Complex double Reduce
template <> template <>
//inline Grid::ComplexD Reduce<Grid::ComplexD, svfloat64_t>::operator()(svfloat64_t in){
//inline Grid::ComplexD Reduce<Grid::ComplexD, __SVFloat64_t>::operator()(__SVFloat64_t in){
inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){ inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
pred pg_even = acle<double>::pg_even(); pred pg_even = acle<double>::pg_even();
pred pg_odd = acle<double>::pg_odd(); pred pg_odd = acle<double>::pg_odd();
@ -743,16 +736,12 @@ inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){
} }
//Real double Reduce //Real double Reduce
template <> template <>
//inline Grid::RealD Reduce<Grid::RealD, svfloat64_t>::operator()(svfloat64_t in){
//inline Grid::RealD Reduce<Grid::RealD, __SVFloat64_t>::operator()(__SVFloat64_t in){
inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){ inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){
pred pg1 = acle<double>::pg1(); pred pg1 = acle<double>::pg1();
return svred(pg1, in); return svred(pg1, in);
} }
//Integer Reduce //Integer Reduce
template <> template <>
//inline Integer Reduce<Integer, svuint32_t>::operator()(svuint32_t in){
//inline Integer Reduce<Integer, __SVUint32_t>::operator()(__SVUint32_t in){
inline Integer Reduce<Integer, veci>::operator()(veci in){ inline Integer Reduce<Integer, veci>::operator()(veci in){
pred pg1 = acle<Integer>::pg1(); pred pg1 = acle<Integer>::pg1();
return svred(pg1, in); return svred(pg1, in);