From bbb657da5c31fdbcfbb2b695533305a85039ed90 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 12 Jan 2018 18:10:11 +0000 Subject: [PATCH] NAMESPACE and formatting --- lib/simd/Grid_generic.h | 724 ++++++++++++++++++++-------------------- 1 file changed, 363 insertions(+), 361 deletions(-) diff --git a/lib/simd/Grid_generic.h b/lib/simd/Grid_generic.h index e1d5f894..d555a672 100644 --- a/lib/simd/Grid_generic.h +++ b/lib/simd/Grid_generic.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,413 +25,413 @@ Author: Antonin Portelli 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include "Grid_generic_types.h" -namespace Grid { -namespace Optimization { +NAMESPACE_BEGIN(Grid); +NAMESPACE_BEGIN(Optimization); - struct Vsplat{ - // Complex - template - inline vec operator()(T a, T b){ - vec out; +struct Vsplat{ + // Complex + template + inline vec operator()(T a, T b){ + vec out; - VECTOR_FOR(i, W::r, 2) + VECTOR_FOR(i, W::r, 2) { out.v[i] = a; out.v[i+1] = b; } - return out; - } + return out; + } - // Real - template - inline vec operator()(T a){ - vec out; + // Real + template + inline vec operator()(T a){ + vec out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a; } - return out; - } - }; + return out; + } +}; - struct Vstore{ - // Real - template - inline void operator()(vec a, T *D){ - *((vec *)D) = a; - } - }; +struct Vstore{ + // Real + template + inline void operator()(vec a, T *D){ + *((vec *)D) = a; + } +}; - struct Vstream{ - // Real - template - inline void operator()(T * a, vec b){ - *((vec *)a) = b; - } - }; +struct Vstream{ + // Real + template + inline void operator()(T * a, vec b){ + *((vec *)a) = b; + } +}; - struct Vset{ - // Complex - template - inline vec operator()(std::complex *a){ - vec out; +struct Vset{ + // Complex + template + inline vec operator()(std::complex *a){ + vec out; - VECTOR_FOR(i, W::c, 1) + VECTOR_FOR(i, W::c, 1) { out.v[2*i] = a[i].real(); out.v[2*i+1] = a[i].imag(); } - return out; - } + return out; + } - // Real - template - inline vec operator()(T *a){ - vec out; + // Real + template + inline vec operator()(T *a){ + vec out; - out = *((vec *)a); + out = *((vec *)a); - return out; - } - }; + return out; + } +}; - ///////////////////////////////////////////////////// - // Arithmetic operations - ///////////////////////////////////////////////////// - struct Sum{ - // Complex/Real - template - inline vec operator()(vec a, vec b){ - vec out; +///////////////////////////////////////////////////// +// Arithmetic operations +///////////////////////////////////////////////////// +struct Sum{ + // Complex/Real + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i] + b.v[i]; } - return out; - } - }; + return out; + } +}; - struct Sub{ - // Complex/Real - template - inline vec operator()(vec a, vec b){ - vec out; +struct Sub{ + // Complex/Real + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i] - b.v[i]; } - return out; - } - }; + return out; + } +}; - struct Mult{ - // Real - template - inline vec operator()(vec a, vec b){ - vec out; +struct Mult{ + // Real + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i]*b.v[i]; } - return out; - } - }; + return out; + } +}; - #define cmul(a, b, c, i)\ - c[i] = a[i]*b[i] - a[i+1]*b[i+1];\ +#define cmul(a, b, c, i) \ + c[i] = a[i]*b[i] - a[i+1]*b[i+1]; \ c[i+1] = a[i]*b[i+1] + a[i+1]*b[i]; - struct MultRealPart{ - template - inline vec operator()(vec a, vec b){ - vec out; +struct MultRealPart{ + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::c, 1) + VECTOR_FOR(i, W::c, 1) { - out.v[2*i] = a.v[2*i]*b.v[2*i]; - out.v[2*i+1] = a.v[2*i]*b.v[2*i+1]; + out.v[2*i] = a.v[2*i]*b.v[2*i]; + out.v[2*i+1] = a.v[2*i]*b.v[2*i+1]; } - return out; - } - }; + return out; + } +}; - struct MaddRealPart{ - template - inline vec operator()(vec a, vec b, vec c){ - vec out; +struct MaddRealPart{ + template + inline vec operator()(vec a, vec b, vec c){ + vec out; - VECTOR_FOR(i, W::c, 1) + VECTOR_FOR(i, W::c, 1) { - out.v[2*i] = a.v[2*i]*b.v[2*i] + c.v[2*i]; - out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1]; + out.v[2*i] = a.v[2*i]*b.v[2*i] + c.v[2*i]; + out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1]; } - return out; - } - }; + return out; + } +}; - struct MultComplex{ - // Complex - template - inline vec operator()(vec a, vec b){ - vec out; +struct MultComplex{ + // Complex + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::c, 1) + VECTOR_FOR(i, W::c, 1) { cmul(a.v, b.v, out.v, 2*i); } - return out; - } - }; + return out; + } +}; - #undef cmul +#undef cmul - struct Div{ - // Real - template - inline vec operator()(vec a, vec b){ - vec out; +struct Div{ + // Real + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i]/b.v[i]; } - return out; - } - }; + return out; + } +}; - #define conj(a, b, i)\ - b[i] = a[i];\ +#define conj(a, b, i) \ + b[i] = a[i]; \ b[i+1] = -a[i+1]; - struct Conj{ - // Complex - template - inline vec operator()(vec a){ - vec out; +struct Conj{ + // Complex + template + inline vec operator()(vec a){ + vec out; - VECTOR_FOR(i, W::c, 1) + VECTOR_FOR(i, W::c, 1) { conj(a.v, out.v, 2*i); } - return out; - } - }; + return out; + } +}; - #undef conj +#undef conj - #define timesmi(a, b, i)\ - b[i] = a[i+1];\ +#define timesmi(a, b, i) \ + b[i] = a[i+1]; \ b[i+1] = -a[i]; - struct TimesMinusI{ - // Complex - template - inline vec operator()(vec a, vec b){ - vec out; +struct TimesMinusI{ + // Complex + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::c, 1) + VECTOR_FOR(i, W::c, 1) { timesmi(a.v, out.v, 2*i); } - return out; - } - }; + return out; + } +}; - #undef timesmi +#undef timesmi - #define timesi(a, b, i)\ - b[i] = -a[i+1];\ +#define timesi(a, b, i) \ + b[i] = -a[i+1]; \ b[i+1] = a[i]; - struct TimesI{ - // Complex - template - inline vec operator()(vec a, vec b){ - vec out; +struct TimesI{ + // Complex + template + inline vec operator()(vec a, vec b){ + vec out; - VECTOR_FOR(i, W::c, 1) + VECTOR_FOR(i, W::c, 1) { timesi(a.v, out.v, 2*i); } - return out; - } - }; + return out; + } +}; - #undef timesi +#undef timesi - struct PrecisionChange { - static inline vech StoH (const vecf &a,const vecf &b) { - vech ret; +struct PrecisionChange { + static inline vech StoH (const vecf &a,const vecf &b) { + vech ret; #ifdef USE_FP16 - vech *ha = (vech *)&a; - vech *hb = (vech *)&b; - const int nf = W::r; - // VECTOR_FOR(i, nf,1){ ret.v[i] = ( (uint16_t *) &a.v[i])[1] ; } - // VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; } - VECTOR_FOR(i, nf,1){ ret.v[i] = ha->v[2*i+1]; } - VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; } + vech *ha = (vech *)&a; + vech *hb = (vech *)&b; + const int nf = W::r; + // VECTOR_FOR(i, nf,1){ ret.v[i] = ( (uint16_t *) &a.v[i])[1] ; } + // VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; } + VECTOR_FOR(i, nf,1){ ret.v[i] = ha->v[2*i+1]; } + VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; } #else - assert(0); + assert(0); #endif - return ret; - } - static inline void HtoS (vech h,vecf &sa,vecf &sb) { + return ret; + } + static inline void HtoS (vech h,vecf &sa,vecf &sb) { #ifdef USE_FP16 - const int nf = W::r; - const int nh = W::r; - vech *ha = (vech *)&sa; - vech *hb = (vech *)&sb; - VECTOR_FOR(i, nf, 1){ sb.v[i]= sa.v[i] = 0; } - // VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sa.v[i]))[1] = h.v[i];} - // VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sb.v[i]))[1] = h.v[i+nf];} - VECTOR_FOR(i, nf, 1){ ha->v[2*i+1]=h.v[i]; } - VECTOR_FOR(i, nf, 1){ hb->v[2*i+1]=h.v[i+nf]; } + const int nf = W::r; + const int nh = W::r; + vech *ha = (vech *)&sa; + vech *hb = (vech *)&sb; + VECTOR_FOR(i, nf, 1){ sb.v[i]= sa.v[i] = 0; } + // VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sa.v[i]))[1] = h.v[i];} + // VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sb.v[i]))[1] = h.v[i+nf];} + VECTOR_FOR(i, nf, 1){ ha->v[2*i+1]=h.v[i]; } + VECTOR_FOR(i, nf, 1){ hb->v[2*i+1]=h.v[i+nf]; } #else - assert(0); + assert(0); #endif - } - static inline vecf DtoS (vecd a,vecd b) { - const int nd = W::r; - const int nf = W::r; - vecf ret; - VECTOR_FOR(i, nd,1){ ret.v[i] = a.v[i] ; } - VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; } - return ret; - } - static inline void StoD (vecf s,vecd &a,vecd &b) { - const int nd = W::r; - VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; } - VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; } - } - static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) { - vecf sa,sb; - sa = DtoS(a,b); - sb = DtoS(c,d); - return StoH(sa,sb); - } - static inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) { - vecf sa,sb; - HtoS(h,sa,sb); - StoD(sa,a,b); - StoD(sb,c,d); - } - }; + } + static inline vecf DtoS (vecd a,vecd b) { + const int nd = W::r; + const int nf = W::r; + vecf ret; + VECTOR_FOR(i, nd,1){ ret.v[i] = a.v[i] ; } + VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; } + return ret; + } + static inline void StoD (vecf s,vecd &a,vecd &b) { + const int nd = W::r; + VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; } + VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; } + } + static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) { + vecf sa,sb; + sa = DtoS(a,b); + sb = DtoS(c,d); + return StoH(sa,sb); + } + static inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) { + vecf sa,sb; + HtoS(h,sa,sb); + StoD(sa,a,b); + StoD(sb,c,d); + } +}; - ////////////////////////////////////////////// - // Exchange support - struct Exchange{ +////////////////////////////////////////////// +// Exchange support +struct Exchange{ - template + template static inline void ExchangeN(vec &out1,vec &out2,vec &in1,vec &in2){ - const int w = W::r; - unsigned int mask = w >> (n + 1); - // std::cout << " Exchange "< - static inline void Exchange0(vec &out1,vec &out2,vec &in1,vec &in2){ - ExchangeN(out1,out2,in1,in2); - }; - template - static inline void Exchange1(vec &out1,vec &out2,vec &in1,vec &in2){ - ExchangeN(out1,out2,in1,in2); - }; - template - static inline void Exchange2(vec &out1,vec &out2,vec &in1,vec &in2){ - ExchangeN(out1,out2,in1,in2); - }; - template - static inline void Exchange3(vec &out1,vec &out2,vec &in1,vec &in2){ - ExchangeN(out1,out2,in1,in2); - }; + const int w = W::r; + unsigned int mask = w >> (n + 1); + // std::cout << " Exchange "< + static inline void Exchange0(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN(out1,out2,in1,in2); }; + template + static inline void Exchange1(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN(out1,out2,in1,in2); + }; + template + static inline void Exchange2(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN(out1,out2,in1,in2); + }; + template + static inline void Exchange3(vec &out1,vec &out2,vec &in1,vec &in2){ + ExchangeN(out1,out2,in1,in2); + }; +}; - ////////////////////////////////////////////// - // Some Template specialization - #define perm(a, b, n, w)\ - unsigned int _mask = w >> (n + 1);\ - VECTOR_FOR(i, w, 1)\ - {\ - b[i] = a[i^_mask];\ +////////////////////////////////////////////// +// Some Template specialization +#define perm(a, b, n, w) \ + unsigned int _mask = w >> (n + 1); \ + VECTOR_FOR(i, w, 1) \ + { \ + b[i] = a[i^_mask]; \ } - #define DECL_PERMUTE_N(n)\ - template \ - static inline vec Permute##n(vec in) {\ - vec out;\ - perm(in.v, out.v, n, W::r);\ - return out;\ +#define DECL_PERMUTE_N(n) \ + template \ + static inline vec Permute##n(vec in) { \ + vec out; \ + perm(in.v, out.v, n, W::r); \ + return out; \ } - struct Permute{ - DECL_PERMUTE_N(0); - DECL_PERMUTE_N(1); - DECL_PERMUTE_N(2); - DECL_PERMUTE_N(3); - }; +struct Permute{ + DECL_PERMUTE_N(0); + DECL_PERMUTE_N(1); + DECL_PERMUTE_N(2); + DECL_PERMUTE_N(3); +}; - #undef perm - #undef DECL_PERMUTE_N +#undef perm +#undef DECL_PERMUTE_N - #define rot(a, b, n, w)\ - VECTOR_FOR(i, w, 1)\ - {\ - b[i] = a[(i + n)%w];\ +#define rot(a, b, n, w) \ + VECTOR_FOR(i, w, 1) \ + { \ + b[i] = a[(i + n)%w]; \ } - struct Rotate{ +struct Rotate{ - template static inline vec tRotate(vec in){ - return rotate(in, n); - } + template static inline vec tRotate(vec in){ + return rotate(in, n); + } - template - static inline vec rotate(vec in, int n){ - vec out; + template + static inline vec rotate(vec in, int n){ + vec out; - rot(in.v, out.v, n, W::r); + rot(in.v, out.v, n, W::r); - return out; - } - }; - - #undef rot - - #define acc(v, a, off, step, n)\ - for (unsigned int i = off; i < n; i += step)\ - {\ - a += v[i];\ + return out; } +}; + +#undef rot - template +#define acc(v, a, off, step, n) \ + for (unsigned int i = off; i < n; i += step) \ + { \ + a += v[i]; \ + } + +template struct Reduce{ //Need templated class to overload output type //General form must generate error if compiled @@ -442,89 +442,91 @@ namespace Optimization { } }; - //Complex float Reduce - template <> - inline Grid::ComplexF Reduce::operator()(vecf in){ - float a = 0.f, b = 0.f; +//Complex float Reduce +template <> +inline Grid::ComplexF Reduce::operator()(vecf in){ + float a = 0.f, b = 0.f; - acc(in.v, a, 0, 2, W::r); - acc(in.v, b, 1, 2, W::r); + acc(in.v, a, 0, 2, W::r); + acc(in.v, b, 1, 2, W::r); - return Grid::ComplexF(a, b); - } - - //Real float Reduce - template<> - inline Grid::RealF Reduce::operator()(vecf in){ - float a = 0.; - - acc(in.v, a, 0, 1, W::r); - - return a; - } - - //Complex double Reduce - template<> - inline Grid::ComplexD Reduce::operator()(vecd in){ - double a = 0., b = 0.; - - acc(in.v, a, 0, 2, W::r); - acc(in.v, b, 1, 2, W::r); - - return Grid::ComplexD(a, b); - } - - //Real double Reduce - template<> - inline Grid::RealD Reduce::operator()(vecd in){ - double a = 0.f; - - acc(in.v, a, 0, 1, W::r); - - return a; - } - - //Integer Reduce - template<> - inline Integer Reduce::operator()(veci in){ - Integer a = 0; - - acc(in.v, a, 0, 1, W::r); - - return a; - } - - #undef acc // EIGEN compatibility + return Grid::ComplexF(a, b); } + +//Real float Reduce +template<> +inline Grid::RealF Reduce::operator()(vecf in){ + float a = 0.; + + acc(in.v, a, 0, 1, W::r); + + return a; +} + +//Complex double Reduce +template<> +inline Grid::ComplexD Reduce::operator()(vecd in){ + double a = 0., b = 0.; + + acc(in.v, a, 0, 2, W::r); + acc(in.v, b, 1, 2, W::r); + + return Grid::ComplexD(a, b); +} + +//Real double Reduce +template<> +inline Grid::RealD Reduce::operator()(vecd in){ + double a = 0.f; + + acc(in.v, a, 0, 1, W::r); + + return a; +} + +//Integer Reduce +template<> +inline Integer Reduce::operator()(veci in){ + Integer a = 0; + + acc(in.v, a, 0, 1, W::r); + + return a; +} + +#undef acc // EIGEN compatibility +NAMESPACE_END(Optimization) ////////////////////////////////////////////////////////////////////////////////////// // Here assign types - typedef Optimization::vech SIMD_Htype; // Reduced precision type - typedef Optimization::vecf SIMD_Ftype; // Single precision type - typedef Optimization::vecd SIMD_Dtype; // Double precision type - typedef Optimization::veci SIMD_Itype; // Integer type +typedef Optimization::vech SIMD_Htype; // Reduced precision type +typedef Optimization::vecf SIMD_Ftype; // Single precision type +typedef Optimization::vecd SIMD_Dtype; // Double precision type +typedef Optimization::veci SIMD_Itype; // Integer type - // prefetch utilities - inline void v_prefetch0(int size, const char *ptr){}; - inline void prefetch_HINT_T0(const char *ptr){}; +// prefetch utilities +inline void v_prefetch0(int size, const char *ptr){}; +inline void prefetch_HINT_T0(const char *ptr){}; - // Function name aliases - typedef Optimization::Vsplat VsplatSIMD; - typedef Optimization::Vstore VstoreSIMD; - typedef Optimization::Vset VsetSIMD; - typedef Optimization::Vstream VstreamSIMD; - template using ReduceSIMD = Optimization::Reduce; +// Function name aliases +typedef Optimization::Vsplat VsplatSIMD; +typedef Optimization::Vstore VstoreSIMD; +typedef Optimization::Vset VsetSIMD; +typedef Optimization::Vstream VstreamSIMD; +template using ReduceSIMD = Optimization::Reduce; + +// Arithmetic operations +typedef Optimization::Sum SumSIMD; +typedef Optimization::Sub SubSIMD; +typedef Optimization::Div DivSIMD; +typedef Optimization::Mult MultSIMD; +typedef Optimization::MultComplex MultComplexSIMD; +typedef Optimization::MultRealPart MultRealPartSIMD; +typedef Optimization::MaddRealPart MaddRealPartSIMD; +typedef Optimization::Conj ConjSIMD; +typedef Optimization::TimesMinusI TimesMinusISIMD; +typedef Optimization::TimesI TimesISIMD; + +NAMESPACE_END(Grid) - // Arithmetic operations - typedef Optimization::Sum SumSIMD; - typedef Optimization::Sub SubSIMD; - typedef Optimization::Div DivSIMD; - typedef Optimization::Mult MultSIMD; - typedef Optimization::MultComplex MultComplexSIMD; - typedef Optimization::MultRealPart MultRealPartSIMD; - typedef Optimization::MaddRealPart MaddRealPartSIMD; - typedef Optimization::Conj ConjSIMD; - typedef Optimization::TimesMinusI TimesMinusISIMD; - typedef Optimization::TimesI TimesISIMD; -}