From 832485699f239cb77ec2303f6874bf81c8b5ddbc Mon Sep 17 00:00:00 2001 From: nmeyer-ur Date: Wed, 20 May 2020 23:04:35 +0200 Subject: [PATCH] save some cycles in HtoD and DtoH by direct instead of multi-pass conversion --- Grid/simd/Grid_a64fx-fixedsize.h | 77 +++++++++++++++----------------- 1 file changed, 35 insertions(+), 42 deletions(-) diff --git a/Grid/simd/Grid_a64fx-fixedsize.h b/Grid/simd/Grid_a64fx-fixedsize.h index 9e0fc51e..28fafa27 100644 --- a/Grid/simd/Grid_a64fx-fixedsize.h +++ b/Grid/simd/Grid_a64fx-fixedsize.h @@ -401,13 +401,13 @@ struct Conj{ // Complex float inline vecf operator()(vecf a){ pred pg_odd = acle::pg_odd(); - //return svneg_x(pg_odd, a); + //return svneg_x(pg_odd, a); this is unsafe! return svneg_m(a, pg_odd, a); } // Complex double inline vecd operator()(vecd a){ pred pg_odd = acle::pg_odd(); - //return svneg_x(pg_odd, a); + //return svneg_x(pg_odd, a); this is unsafe! return svneg_m(a, pg_odd, a); } }; @@ -420,7 +420,7 @@ struct TimesMinusI{ pred pg_odd = acle::pg_odd(); vecf a_v = svtbl(a, tbl_swap); - //return svneg_x(pg_odd, a_v); + //return svneg_x(pg_odd, a_v); this is unsafe return svneg_m(a_v, pg_odd, a_v); } // Complex double @@ -430,7 +430,7 @@ struct TimesMinusI{ pred pg_odd = acle::pg_odd(); vecd a_v = svtbl(a, tbl_swap); - //return svneg_x(pg_odd, a_v); + //return svneg_x(pg_odd, a_v); this is unsafe return svneg_m(a_v, pg_odd, a_v); } }; @@ -443,7 +443,7 @@ struct TimesI{ pred pg_even = acle::pg_even(); vecf a_v = svtbl(a, tbl_swap); - //return svneg_x(pg_even, a_v); + //return svneg_x(pg_even, a_v); this is unsafe return svneg_m(a_v, pg_even, a_v); } // Complex double @@ -453,7 +453,7 @@ struct TimesI{ pred pg_even = acle::pg_even(); vecd a_v = svtbl(a, tbl_swap); - //return svneg_x(pg_even, a_v); + //return svneg_x(pg_even, a_v); this is unsafe return svneg_m(a_v, pg_even, a_v); } }; @@ -486,54 +486,47 @@ struct PrecisionChange { b = svcvt_f64_x(pg1d, sb_v); } static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) { -/* - vech ret; - svbool_t pg1d = acle::pg1(); - svbool_t pg1h = acle::pg1(); - typename acle::vt a_v = svld1(pg1d, a.v); - typename acle::vt b_v = svld1(pg1d, b.v); - typename acle::vt c_v = svld1(pg1d, c.v); - typename acle::vt d_v = svld1(pg1d, d.v); - typename acle::vt ha_v = svcvt_f16_x(pg1d, a_v); - typename acle::vt hb_v = svcvt_f16_x(pg1d, b_v); - typename acle::vt hc_v = svcvt_f16_x(pg1d, c_v); - typename acle::vt hd_v = svcvt_f16_x(pg1d, d_v); - typename acle::vt hab_v = svuzp1(ha_v, hb_v); - typename acle::vt hcd_v = svuzp1(hc_v, hd_v); - typename acle::vt r_v = svuzp1(hab_v, hcd_v); - svst1(pg1h, (typename acle::pt*)&ret.v, r_v); + pred pg1d = acle::pg1(); + pred pg1h = acle::pg1(); + vecd a_v = svld1(pg1d, a.v); + vecd b_v = svld1(pg1d, b.v); + vecd c_v = svld1(pg1d, c.v); + vecd d_v = svld1(pg1d, d.v); + vech ha_v = svcvt_f16_x(pg1d, a_v); + vech hb_v = svcvt_f16_x(pg1d, b_v); + vech hc_v = svcvt_f16_x(pg1d, c_v); + vech hd_v = svcvt_f16_x(pg1d, d_v); + vech hab_v = svuzp1(ha_v, hb_v); + vech hcd_v = svuzp1(hc_v, hd_v); + return r_v = svuzp1(hab_v, hcd_v); - return ret; -*/ +/* vecf sa,sb; sa = DtoS(a,b); sb = DtoS(c,d); return StoH(sa,sb); +*/ } static inline void HtoD(vech h,vecd &a,vecd &b,vecd &c,vecd &d) { + pred pg1h = acle::pg1(); + pred pg1d = acle::pg1(); + vech sa_v = svzip1(h_v, h_v); + vech sb_v = svzip2(h_v, h_v); + vech da_v = svzip1(sa_v, sa_v); + vech db_v = svzip2(sa_v, sa_v); + vech dc_v = svzip1(sb_v, sb_v); + vech dd_v = svzip2(sb_v, sb_v); + vecd a = svcvt_f64_x(pg1d, da_v); + vecd b = svcvt_f64_x(pg1d, db_v); + vecd c = svcvt_f64_x(pg1d, dc_v); + vecd d = svcvt_f64_x(pg1d, dd_v); + /* - svbool_t pg1h = acle::pg1(); - svbool_t pg1d = acle::pg1(); - typename acle::vt h_v = svld1(pg1h, (typename acle::pt*)&h.v); - typename acle::vt sa_v = svzip1(h_v, h_v); - typename acle::vt sb_v = svzip2(h_v, h_v); - typename acle::vt da_v = svzip1(sa_v, sa_v); - typename acle::vt db_v = svzip2(sa_v, sa_v); - typename acle::vt dc_v = svzip1(sb_v, sb_v); - typename acle::vt dd_v = svzip2(sb_v, sb_v); - typename acle::vt a_v = svcvt_f64_x(pg1d, da_v); - typename acle::vt b_v = svcvt_f64_x(pg1d, db_v); - typename acle::vt c_v = svcvt_f64_x(pg1d, dc_v); - typename acle::vt d_v = svcvt_f64_x(pg1d, dd_v); - svst1(pg1d, a.v, a_v); - svst1(pg1d, b.v, b_v); - svst1(pg1d, c.v, c_v); - svst1(pg1d, d.v, d_v); -*/ vecf sa,sb; HtoS(h,sa,sb); StoD(sa,a,b); StoD(sb,c,d); +*/ } };