diff --git a/lib/simd/Grid_qpx.h b/lib/simd/Grid_qpx.h index 8de7bde8..a4efeb91 100644 --- a/lib/simd/Grid_qpx.h +++ b/lib/simd/Grid_qpx.h @@ -25,14 +25,14 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - ******************************************************************************/ +******************************************************************************/ #ifndef GEN_SIMD_WIDTH #define GEN_SIMD_WIDTH 32u #endif #include "Grid_generic_types.h" // Definitions for simulated integer SIMD. -namespace Grid { +NAMESPACE_BEGIN(Grid); #ifdef QPX #include @@ -41,139 +41,140 @@ namespace Grid { #include #endif -namespace Optimization { - typedef struct - { - float v0,v1,v2,v3; - } vector4float; +NAMESPACE_BEGIN(Optimization); - inline std::ostream & operator<<(std::ostream& stream, const vector4double a) - { - stream << "{"<::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a; } - return out; - } - }; + return out; + } +}; - struct Vstore{ - //Float - inline void operator()(vector4double a, float *f){ - vec_st(a, 0, f); - } +struct Vstore{ + //Float + inline void operator()(vector4double a, float *f){ + vec_st(a, 0, f); + } - inline void operator()(vector4double a, vector4float &f){ - vec_st(a, 0, (float *)(&f)); - } + inline void operator()(vector4double a, vector4float &f){ + vec_st(a, 0, (float *)(&f)); + } - inline void operator()(vector4float a, float *f){ - f[0] = a.v0; - f[1] = a.v1; - f[2] = a.v2; - f[3] = a.v3; - } + inline void operator()(vector4float a, float *f){ + f[0] = a.v0; + f[1] = a.v1; + f[2] = a.v2; + f[3] = a.v3; + } - //Double - inline void operator()(vector4double a, double *d){ - vec_st(a, 0, d); - } + //Double + inline void operator()(vector4double a, double *d){ + vec_st(a, 0, d); + } - //Integer - inline void operator()(veci a, Integer *i){ - *((veci *)i) = a; - } - }; + //Integer + inline void operator()(veci a, Integer *i){ + *((veci *)i) = a; + } +}; - struct Vstream{ - //Float - inline void operator()(float *f, vector4double a){ - vec_st(a, 0, f); - } +struct Vstream{ + //Float + inline void operator()(float *f, vector4double a){ + vec_st(a, 0, f); + } - inline void operator()(vector4float f, vector4double a){ - vec_st(a, 0, (float *)(&f)); - } + inline void operator()(vector4float f, vector4double a){ + vec_st(a, 0, (float *)(&f)); + } - inline void operator()(float *f, vector4float a){ - f[0] = a.v0; - f[1] = a.v1; - f[2] = a.v2; - f[3] = a.v3; - } - //Double - inline void operator()(double *d, vector4double a){ - vec_st(a, 0, d); - } + inline void operator()(float *f, vector4float a){ + f[0] = a.v0; + f[1] = a.v1; + f[2] = a.v2; + f[3] = a.v3; + } + //Double + inline void operator()(double *d, vector4double a){ + vec_st(a, 0, d); + } - }; +}; - struct Vset{ - // Complex float - inline vector4float operator()(Grid::ComplexF *a){ - return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()}; - } - // Complex double - inline vector4double operator()(Grid::ComplexD *a){ - return vec_ld(0, (double *)a); - } +struct Vset{ + // Complex float + inline vector4float operator()(Grid::ComplexF *a){ + return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()}; + } + // Complex double + inline vector4double operator()(Grid::ComplexD *a){ + return vec_ld(0, (double *)a); + } - // Real float - inline vector4float operator()(float *a){ - return (vector4float){a[0], a[1], a[2], a[3]}; - } + // Real float + inline vector4float operator()(float *a){ + return (vector4float){a[0], a[1], a[2], a[3]}; + } - inline vector4double operator()(vector4float a){ - return vec_ld(0, (float *)(&a)); - } + inline vector4double operator()(vector4float a){ + return vec_ld(0, (float *)(&a)); + } - // Real double - inline vector4double operator()(double *a){ - return vec_ld(0, a); - } - // Integer - inline veci operator()(Integer *a){ - veci out; + // Real double + inline vector4double operator()(double *a){ + return vec_ld(0, a); + } + // Integer + inline veci operator()(Integer *a){ + veci out; - out = *((veci *)a); + out = *((veci *)a); - return out; - } - }; + return out; + } +}; - template +template struct Reduce{ //Need templated class to overload output type //General form must generate error if compiled @@ -184,406 +185,407 @@ namespace Optimization { } }; - ///////////////////////////////////////////////////// - // Arithmetic operations - ///////////////////////////////////////////////////// +///////////////////////////////////////////////////// +// Arithmetic operations +///////////////////////////////////////////////////// - #define FLOAT_WRAP_3(fn, pref)\ +#define FLOAT_WRAP_3(fn, pref) \ pref vector4float fn(vector4float a, vector4float b, vector4float c) \ - {\ - vector4double ad, bd, rd, cd; \ - vector4float r;\ - \ - ad = Vset()(a);\ - bd = Vset()(b);\ - cd = Vset()(c);\ - rd = fn(ad, bd, cd); \ - Vstore()(rd, r);\ - \ - return r;\ + { \ + vector4double ad, bd, rd, cd; \ + vector4float r; \ + \ + ad = Vset()(a); \ + bd = Vset()(b); \ + cd = Vset()(c); \ + rd = fn(ad, bd, cd); \ + Vstore()(rd, r); \ + \ + return r; \ } - #define FLOAT_WRAP_2(fn, pref)\ - pref vector4float fn(vector4float a, vector4float b)\ - {\ - vector4double ad, bd, rd;\ - vector4float r;\ - \ - ad = Vset()(a);\ - bd = Vset()(b);\ - rd = fn(ad, bd);\ - Vstore()(rd, r);\ - \ - return r;\ +#define FLOAT_WRAP_2(fn, pref) \ + pref vector4float fn(vector4float a, vector4float b) \ + { \ + vector4double ad, bd, rd; \ + vector4float r; \ + \ + ad = Vset()(a); \ + bd = Vset()(b); \ + rd = fn(ad, bd); \ + Vstore()(rd, r); \ + \ + return r; \ } - #define FLOAT_WRAP_1(fn, pref)\ - pref vector4float fn(vector4float a)\ - {\ - vector4double ad, rd;\ - vector4float r;\ - \ - ad = Vset()(a);\ - rd = fn(ad);\ - Vstore()(rd, r);\ - \ - return r;\ +#define FLOAT_WRAP_1(fn, pref) \ + pref vector4float fn(vector4float a) \ + { \ + vector4double ad, rd; \ + vector4float r; \ + \ + ad = Vset()(a); \ + rd = fn(ad); \ + Vstore()(rd, r); \ + \ + return r; \ } - struct Sum{ - //Complex/Real double - inline vector4double operator()(vector4double a, vector4double b){ - return vec_add(a, b); - } +struct Sum{ + //Complex/Real double + inline vector4double operator()(vector4double a, vector4double b){ + return vec_add(a, b); + } - //Complex/Real float - FLOAT_WRAP_2(operator(), inline) + //Complex/Real float + FLOAT_WRAP_2(operator(), inline) - //Integer - inline veci operator()(veci a, veci b){ - veci out; + //Integer + inline veci operator()(veci a, veci b){ + veci out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i] + b.v[i]; } - return out; - } - }; + return out; + } +}; - struct Sub{ - //Complex/Real double - inline vector4double operator()(vector4double a, vector4double b){ - return vec_sub(a, b); - } +struct Sub{ + //Complex/Real double + inline vector4double operator()(vector4double a, vector4double b){ + return vec_sub(a, b); + } - //Complex/Real float - FLOAT_WRAP_2(operator(), inline) + //Complex/Real float + FLOAT_WRAP_2(operator(), inline) - //Integer - inline veci operator()(veci a, veci b){ - veci out; + //Integer + inline veci operator()(veci a, veci b){ + veci out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i] - b.v[i]; } - return out; - } - }; + return out; + } +}; - struct MultRealPart{ - // Complex double - inline vector4double operator()(vector4double a, vector4double b){ - // return vec_xmul(b, a); - return vec_xmul(a, b); - } - FLOAT_WRAP_2(operator(), inline) - }; - struct MaddRealPart{ - // Complex double - inline vector4double operator()(vector4double a, vector4double b,vector4double c){ - return vec_xmadd(a, b, c); - } - FLOAT_WRAP_3(operator(), inline) - }; - struct MultComplex{ - // Complex double - inline vector4double operator()(vector4double a, vector4double b){ - return vec_xxnpmadd(a, b, vec_xmul(b, a)); - } +struct MultRealPart{ + // Complex double + inline vector4double operator()(vector4double a, vector4double b){ + // return vec_xmul(b, a); + return vec_xmul(a, b); + } + FLOAT_WRAP_2(operator(), inline) +}; +struct MaddRealPart{ + // Complex double + inline vector4double operator()(vector4double a, vector4double b,vector4double c){ + return vec_xmadd(a, b, c); + } + FLOAT_WRAP_3(operator(), inline) +}; +struct MultComplex{ + // Complex double + inline vector4double operator()(vector4double a, vector4double b){ + return vec_xxnpmadd(a, b, vec_xmul(b, a)); + } - // Complex float - FLOAT_WRAP_2(operator(), inline) - }; + // Complex float + FLOAT_WRAP_2(operator(), inline) +}; - struct Mult{ - // Real double - inline vector4double operator()(vector4double a, vector4double b){ - return vec_mul(a, b); - } +struct Mult{ + // Real double + inline vector4double operator()(vector4double a, vector4double b){ + return vec_mul(a, b); + } - // Real float - FLOAT_WRAP_2(operator(), inline) + // Real float + FLOAT_WRAP_2(operator(), inline) - // Integer - inline veci operator()(veci a, veci b){ - veci out; + // Integer + inline veci operator()(veci a, veci b){ + veci out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i]*b.v[i]; } - return out; - } - }; + return out; + } +}; - struct Div{ - // Real double - inline vector4double operator()(vector4double a, vector4double b){ - return vec_swdiv(a, b); - } +struct Div{ + // Real double + inline vector4double operator()(vector4double a, vector4double b){ + return vec_swdiv(a, b); + } - // Real float - FLOAT_WRAP_2(operator(), inline) + // Real float + FLOAT_WRAP_2(operator(), inline) - // Integer - inline veci operator()(veci a, veci b){ - veci out; + // Integer + inline veci operator()(veci a, veci b){ + veci out; - VECTOR_FOR(i, W::r, 1) + VECTOR_FOR(i, W::r, 1) { out.v[i] = a.v[i]/b.v[i]; } - return out; - } - }; + return out; + } +}; - struct Conj{ - // Complex double - inline vector4double operator()(vector4double v){ - return vec_mul(v, (vector4double){1., -1., 1., -1.}); - } +struct Conj{ + // Complex double + inline vector4double operator()(vector4double v){ + return vec_mul(v, (vector4double){1., -1., 1., -1.}); + } - // Complex float - FLOAT_WRAP_1(operator(), inline) - }; + // Complex float + FLOAT_WRAP_1(operator(), inline) +}; - struct TimesMinusI{ - //Complex double - inline vector4double operator()(vector4double v, vector4double ret){ - return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.}, - (vector4double){0., 0., 0., 0.}); - } +struct TimesMinusI{ + //Complex double + inline vector4double operator()(vector4double v, vector4double ret){ + return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.}, + (vector4double){0., 0., 0., 0.}); + } - // Complex float - FLOAT_WRAP_2(operator(), inline) - }; + // Complex float + FLOAT_WRAP_2(operator(), inline) +}; - struct TimesI{ - //Complex double - inline vector4double operator()(vector4double v, vector4double ret){ - return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.}, - (vector4double){0., 0., 0., 0.}); - } +struct TimesI{ + //Complex double + inline vector4double operator()(vector4double v, vector4double ret){ + return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.}, + (vector4double){0., 0., 0., 0.}); + } - // Complex float - FLOAT_WRAP_2(operator(), inline) - }; + // Complex float + FLOAT_WRAP_2(operator(), inline) +}; #define USE_FP16 - struct PrecisionChange { - static inline vech StoH (const vector4float &a, const vector4float &b) { - vech ret; - std::cout << GridLogError << "QPX single to half precision conversion not yet supported." << std::endl; - assert(0); - return ret; - } - static inline void HtoS (vech h, vector4float &sa, vector4float &sb) { - std::cout << GridLogError << "QPX half to single precision conversion not yet supported." << std::endl; - assert(0); - } - static inline vector4float DtoS (vector4double a, vector4double b) { - vector4float ret; - std::cout << GridLogError << "QPX double to single precision conversion not yet supported." << std::endl; - assert(0); - return ret; - } - static inline void StoD (vector4float s, vector4double &a, vector4double &b) { - std::cout << GridLogError << "QPX single to double precision conversion not yet supported." << std::endl; - assert(0); - } - static inline vech DtoH (vector4double a, vector4double b, - vector4double c, vector4double d) { - vech ret; - std::cout << GridLogError << "QPX double to half precision conversion not yet supported." << std::endl; - assert(0); - return ret; - } - static inline void HtoD (vech h, vector4double &a, vector4double &b, - vector4double &c, vector4double &d) { - std::cout << GridLogError << "QPX half to double precision conversion not yet supported." << std::endl; - assert(0); - } - }; +struct PrecisionChange { + static inline vech StoH (const vector4float &a, const vector4float &b) { + vech ret; + std::cout << GridLogError << "QPX single to half precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void HtoS (vech h, vector4float &sa, vector4float &sb) { + std::cout << GridLogError << "QPX half to single precision conversion not yet supported." << std::endl; + assert(0); + } + static inline vector4float DtoS (vector4double a, vector4double b) { + vector4float ret; + std::cout << GridLogError << "QPX double to single precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void StoD (vector4float s, vector4double &a, vector4double &b) { + std::cout << GridLogError << "QPX single to double precision conversion not yet supported." << std::endl; + assert(0); + } + static inline vech DtoH (vector4double a, vector4double b, + vector4double c, vector4double d) { + vech ret; + std::cout << GridLogError << "QPX double to half precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void HtoD (vech h, vector4double &a, vector4double &b, + vector4double &c, vector4double &d) { + std::cout << GridLogError << "QPX half to double precision conversion not yet supported." << std::endl; + assert(0); + } +}; - ////////////////////////////////////////////// - // Exchange support -#define FLOAT_WRAP_EXCHANGE(fn) \ +////////////////////////////////////////////// +// Exchange support +#define FLOAT_WRAP_EXCHANGE(fn) \ static inline void fn(vector4float &out1, vector4float &out2, \ - vector4float in1, vector4float in2) \ - { \ - vector4double out1d, out2d, in1d, in2d; \ - in1d = Vset()(in1); \ - in2d = Vset()(in2); \ - fn(out1d, out2d, in1d, in2d); \ - Vstore()(out1d, out1); \ - Vstore()(out2d, out2); \ + vector4float in1, vector4float in2) \ + { \ + vector4double out1d, out2d, in1d, in2d; \ + in1d = Vset()(in1); \ + in2d = Vset()(in2); \ + fn(out1d, out2d, in1d, in2d); \ + Vstore()(out1d, out1); \ + Vstore()(out2d, out2); \ } - struct Exchange{ +struct Exchange{ - // double precision - static inline void Exchange0(vector4double &out1, vector4double &out2, - vector4double in1, vector4double in2) { - out1 = vec_perm(in1, in2, vec_gpci(0145)); - out2 = vec_perm(in1, in2, vec_gpci(02367)); - } - static inline void Exchange1(vector4double &out1, vector4double &out2, - vector4double in1, vector4double in2) { - out1 = vec_perm(in1, in2, vec_gpci(0426)); - out2 = vec_perm(in1, in2, vec_gpci(01537)); - } - static inline void Exchange2(vector4double &out1, vector4double &out2, - vector4double in1, vector4double in2) { - assert(0); - } - static inline void Exchange3(vector4double &out1, vector4double &out2, - vector4double in1, vector4double in2) { - assert(0); - } + // double precision + static inline void Exchange0(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + out1 = vec_perm(in1, in2, vec_gpci(0145)); + out2 = vec_perm(in1, in2, vec_gpci(02367)); + } + static inline void Exchange1(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + out1 = vec_perm(in1, in2, vec_gpci(0426)); + out2 = vec_perm(in1, in2, vec_gpci(01537)); + } + static inline void Exchange2(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + assert(0); + } + static inline void Exchange3(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + assert(0); + } - // single precision - FLOAT_WRAP_EXCHANGE(Exchange0); - FLOAT_WRAP_EXCHANGE(Exchange1); - FLOAT_WRAP_EXCHANGE(Exchange2); - FLOAT_WRAP_EXCHANGE(Exchange3); + // single precision + FLOAT_WRAP_EXCHANGE(Exchange0); + FLOAT_WRAP_EXCHANGE(Exchange1); + FLOAT_WRAP_EXCHANGE(Exchange2); + FLOAT_WRAP_EXCHANGE(Exchange3); +}; + +struct Permute{ + //Complex double + static inline vector4double Permute0(vector4double v){ //0123 -> 2301 + return vec_perm(v, v, vec_gpci(02301)); + }; + static inline vector4double Permute1(vector4double v){ //0123 -> 1032 + return vec_perm(v, v, vec_gpci(01032)); + }; + static inline vector4double Permute2(vector4double v){ + return v; + }; + static inline vector4double Permute3(vector4double v){ + return v; }; - struct Permute{ - //Complex double - static inline vector4double Permute0(vector4double v){ //0123 -> 2301 - return vec_perm(v, v, vec_gpci(02301)); - }; - static inline vector4double Permute1(vector4double v){ //0123 -> 1032 - return vec_perm(v, v, vec_gpci(01032)); - }; - static inline vector4double Permute2(vector4double v){ - return v; - }; - static inline vector4double Permute3(vector4double v){ - return v; - }; + // Complex float + FLOAT_WRAP_1(Permute0, static inline) + FLOAT_WRAP_1(Permute1, static inline) + FLOAT_WRAP_1(Permute2, static inline) + FLOAT_WRAP_1(Permute3, static inline) +}; + +struct Rotate{ - // Complex float - FLOAT_WRAP_1(Permute0, static inline) - FLOAT_WRAP_1(Permute1, static inline) - FLOAT_WRAP_1(Permute2, static inline) - FLOAT_WRAP_1(Permute3, static inline) + template static inline vector4double tRotate(vector4double v){ + if ( n==1 ) return vec_perm(v, v, vec_gpci(01230)); + if ( n==2 ) return vec_perm(v, v, vec_gpci(02301)); + if ( n==3 ) return vec_perm(v, v, vec_gpci(03012)); + return v; }; - - struct Rotate{ - - template static inline vector4double tRotate(vector4double v){ - if ( n==1 ) return vec_perm(v, v, vec_gpci(01230)); - if ( n==2 ) return vec_perm(v, v, vec_gpci(02301)); - if ( n==3 ) return vec_perm(v, v, vec_gpci(03012)); - return v; - }; - template static inline vector4float tRotate(vector4float a) - { - vector4double ad, rd; - vector4float r; - ad = Vset()(a); - rd = tRotate(ad); - Vstore()(rd, r); - return r; - }; - - static inline vector4double rotate(vector4double v, int n){ - switch(n){ - case 0: - return v; - break; - case 1: - return tRotate<1>(v); - break; - case 2: - return tRotate<2>(v); - break; - case 3: - return tRotate<3>(v); - break; - default: assert(0); - } - } - - static inline vector4float rotate(vector4float v, int n){ - vector4double vd, rd; - vector4float r; - vd = Vset()(v); - rd = rotate(vd, n); - Vstore()(rd, r); - return r; - } + template static inline vector4float tRotate(vector4float a) + { + vector4double ad, rd; + vector4float r; + ad = Vset()(a); + rd = tRotate(ad); + Vstore()(rd, r); + return r; }; - - //Complex float Reduce - template<> - inline Grid::ComplexF - Reduce::operator()(vector4float v) { //2 complex - vector4float v1,v2; - - v1 = Optimization::Permute::Permute0(v); - v1 = Optimization::Sum()(v1, v); - - return Grid::ComplexF(v1.v0, v1.v1); - } - //Real float Reduce - template<> - inline Grid::RealF - Reduce::operator()(vector4float v){ //4 floats - vector4float v1,v2; - - v1 = Optimization::Permute::Permute0(v); - v1 = Optimization::Sum()(v1, v); - v2 = Optimization::Permute::Permute1(v1); - v1 = Optimization::Sum()(v1, v2); - - return v1.v0; - } - - - //Complex double Reduce - template<> - inline Grid::ComplexD - Reduce::operator()(vector4double v){ //2 complex - vector4double v1; - - v1 = Optimization::Permute::Permute0(v); - v1 = vec_add(v1, v); - - return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1)); - } - - //Real double Reduce - template<> - inline Grid::RealD - Reduce::operator()(vector4double v){ //4 doubles - vector4double v1,v2; - - v1 = Optimization::Permute::Permute0(v); - v1 = vec_add(v1, v); - v2 = Optimization::Permute::Permute1(v1); - v1 = vec_add(v1, v2); - return vec_extract(v1, 0); - } - - //Integer Reduce - template<> - inline Integer Reduce::operator()(veci in){ - Integer a = 0; - for (unsigned int i = 0; i < W::r; ++i) - { - a += in.v[i]; + static inline vector4double rotate(vector4double v, int n){ + switch(n){ + case 0: + return v; + break; + case 1: + return tRotate<1>(v); + break; + case 2: + return tRotate<2>(v); + break; + case 3: + return tRotate<3>(v); + break; + default: assert(0); } - return a; } + + static inline vector4float rotate(vector4float v, int n){ + vector4double vd, rd; + vector4float r; + vd = Vset()(v); + rd = rotate(vd, n); + Vstore()(rd, r); + return r; + } +}; + +//Complex float Reduce +template<> +inline Grid::ComplexF +Reduce::operator()(vector4float v) { //2 complex + vector4float v1,v2; + + v1 = Optimization::Permute::Permute0(v); + v1 = Optimization::Sum()(v1, v); + + return Grid::ComplexF(v1.v0, v1.v1); } +//Real float Reduce +template<> +inline Grid::RealF +Reduce::operator()(vector4float v){ //4 floats + vector4float v1,v2; + + v1 = Optimization::Permute::Permute0(v); + v1 = Optimization::Sum()(v1, v); + v2 = Optimization::Permute::Permute1(v1); + v1 = Optimization::Sum()(v1, v2); + + return v1.v0; +} + + +//Complex double Reduce +template<> +inline Grid::ComplexD +Reduce::operator()(vector4double v){ //2 complex + vector4double v1; + + v1 = Optimization::Permute::Permute0(v); + v1 = vec_add(v1, v); + + return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1)); +} + +//Real double Reduce +template<> +inline Grid::RealD +Reduce::operator()(vector4double v){ //4 doubles + vector4double v1,v2; + + v1 = Optimization::Permute::Permute0(v); + v1 = vec_add(v1, v); + v2 = Optimization::Permute::Permute1(v1); + v1 = vec_add(v1, v2); + + return vec_extract(v1, 0); +} + +//Integer Reduce +template<> +inline Integer Reduce::operator()(veci in){ + Integer a = 0; + for (unsigned int i = 0; i < W::r; ++i) + { + a += in.v[i]; + } + return a; +} + +NAMESPACE_END(Optimization); //////////////////////////////////////////////////////////////////////////////// // Here assign types @@ -596,7 +598,6 @@ typedef Optimization::veci SIMD_Itype; // Integer type inline void v_prefetch0(int size, const char *ptr){}; inline void prefetch_HINT_T0(const char *ptr){}; - // Function name aliases typedef Optimization::Vsplat VsplatSIMD; typedef Optimization::Vstore VstoreSIMD; @@ -615,5 +616,5 @@ typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; - -} + +NAMESPACE_END(Grid)