From e3d53194704ea052968379660c1e7589440e5a05 Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Wed, 6 Jul 2016 14:16:03 +0100 Subject: [PATCH] Debugged the real() and imag() functions and added tests to Test_Simd --- lib/Simd.h | 45 +- lib/lattice/Lattice_reduction.h | 2 +- lib/qcd/smearing/StoutSmearing.h | 247 ++++--- lib/simd/Grid_vector_types.h | 1080 +++++++++++++++++------------- lib/simd/Grid_vector_unops.h | 415 ++++++------ tests/Test_simd.cc | 77 ++- 6 files changed, 999 insertions(+), 867 deletions(-) diff --git a/lib/Simd.h b/lib/Simd.h index de49cca7..6a812e5e 100644 --- a/lib/Simd.h +++ b/lib/Simd.h @@ -1,32 +1,33 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/Simd.h +Source file: ./lib/Simd.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: neo Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_SIMD_H #define GRID_SIMD_H @@ -118,6 +119,14 @@ namespace Grid { inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));} inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));} inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));} + + // define projections to real and imaginay parts + inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));} + inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));} + inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));} + inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));} + + // define auxiliary functions for complex computations inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);} inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);} inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);} diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 4599f2f9..2615af48 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -40,7 +40,7 @@ namespace Grid { //////////////////////////////////////////////////////////////////////////////////////////////////// template inline RealD norm2(const Lattice &arg){ ComplexD nrm = innerProduct(arg,arg); - return real(nrm); + return std::real(nrm); } template diff --git a/lib/qcd/smearing/StoutSmearing.h b/lib/qcd/smearing/StoutSmearing.h index 511a5c29..50a09972 100644 --- a/lib/qcd/smearing/StoutSmearing.h +++ b/lib/qcd/smearing/StoutSmearing.h @@ -5,163 +5,156 @@ #ifndef STOUT_SMEAR_ #define STOUT_SMEAR_ - namespace Grid { - namespace QCD { +namespace Grid { +namespace QCD { - /*! @brief Stout smearing of link variable. */ - template - class Smear_Stout: public Smear { - private: - const Smear < Gimpl > * SmearBase; +/*! @brief Stout smearing of link variable. */ +template +class Smear_Stout : public Smear { + private: + const Smear* SmearBase; - public: - INHERIT_GIMPL_TYPES(Gimpl) + public: + INHERIT_GIMPL_TYPES(Gimpl) - Smear_Stout(Smear < Gimpl >* base):SmearBase(base){ - static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3"); - } + Smear_Stout(Smear* base) : SmearBase(base) { + static_assert(Nc == 3, + "Stout smearing currently implemented only for Nc==3"); + } - /*! Default constructor */ - Smear_Stout(double rho = 1.0):SmearBase(new Smear_APE < Gimpl > (rho)){ - static_assert(Nc==3, "Stout smearing currently implemented only for Nc==3"); - } + /*! Default constructor */ + Smear_Stout(double rho = 1.0) : SmearBase(new Smear_APE(rho)) { + static_assert(Nc == 3, + "Stout smearing currently implemented only for Nc==3"); + } - ~Smear_Stout(){} //delete SmearBase... + ~Smear_Stout() {} // delete SmearBase... - void smear(GaugeField& u_smr,const GaugeField& U) const{ - GaugeField C(U._grid); - GaugeLinkField tmp(U._grid), iq_mu(U._grid), Umu(U._grid); + void smear(GaugeField& u_smr, const GaugeField& U) const { + GaugeField C(U._grid); + GaugeLinkField tmp(U._grid), iq_mu(U._grid), Umu(U._grid); - std::cout<< GridLogDebug << "Stout smearing started\n"; + std::cout << GridLogDebug << "Stout smearing started\n"; - //Smear the configurations - SmearBase->smear(C, U); + // Smear the configurations + SmearBase->smear(C, U); - for (int mu = 0; muderivative(SigmaTerm, iLambda, Gauge); + }; - void derivative(GaugeField& SigmaTerm, - const GaugeField& iLambda, - const GaugeField& Gauge) const{ - SmearBase->derivative(SigmaTerm, iLambda, Gauge); - }; + void BaseSmear(GaugeField& C, const GaugeField& U) const { + SmearBase->smear(C, U); + }; + void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const { + // Put this outside + // only valid for SU(3) matrices - void BaseSmear(GaugeField& C, - const GaugeField& U) const{ - SmearBase->smear(C, U); - }; + // only one Lorentz direction at a time - void exponentiate_iQ(GaugeLinkField& e_iQ, - const GaugeLinkField& iQ) const{ - // Put this outside - // only valid for SU(3) matrices + // notice that it actually computes + // exp ( input matrix ) + // the i sign is coming from outside + // input matrix is anti-hermitian NOT hermitian - // only one Lorentz direction at a time + GridBase* grid = iQ._grid; + GaugeLinkField unity(grid); + unity = 1.0; - // notice that it actually computes - // exp ( input matrix ) - // the i sign is coming from outside - // input matrix is anti-hermitian NOT hermitian + GaugeLinkField iQ2(grid), iQ3(grid); + LatticeComplex u(grid), w(grid); + LatticeComplex f0(grid), f1(grid), f2(grid); - GridBase *grid = iQ._grid; - GaugeLinkField unity(grid); - unity=1.0; + iQ2 = iQ * iQ; + iQ3 = iQ * iQ2; - GaugeLinkField iQ2(grid), iQ3(grid); - LatticeComplex u(grid), w(grid); - LatticeComplex f0(grid), f1(grid), f2(grid); + set_uw(u, w, iQ2, iQ3); + set_fj(f0, f1, f2, u, w); - iQ2 = iQ * iQ; - iQ3 = iQ * iQ2; + e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2; + }; - set_uw(u, w, iQ2, iQ3); - set_fj(f0, f1, f2, u, w); + void set_uw(LatticeComplex& u, LatticeComplex& w, GaugeLinkField& iQ2, + GaugeLinkField& iQ3) const { + Complex one_over_three = 1.0 / 3.0; + Complex one_over_two = 1.0 / 2.0; - e_iQ = f0*unity + timesMinusI(f1) * iQ - f2 * iQ2; + GridBase* grid = u._grid; + LatticeComplex c0(grid), c1(grid), tmp(grid), c0max(grid), theta(grid); + // sign in c0 from the conventions on the Ta + c0 = -imag(trace(iQ3)) * one_over_three; + c1 = -real(trace(iQ2)) * one_over_two; - }; + // Cayley Hamilton checks to machine precision, tested + tmp = c1 * one_over_three; + c0max = 2.0 * pow(tmp, 1.5); + theta = acos(c0 / c0max) * + one_over_three; // divide by three here, now leave as it is + u = sqrt(tmp) * cos(theta); + w = sqrt(c1) * sin(theta); + } - void set_uw(LatticeComplex& u, LatticeComplex& w, - GaugeLinkField& iQ2, GaugeLinkField& iQ3) const{ - Complex one_over_three = 1.0/3.0; - Complex one_over_two = 1.0/2.0; + void set_fj(LatticeComplex& f0, LatticeComplex& f1, LatticeComplex& f2, + const LatticeComplex& u, const LatticeComplex& w) const { + GridBase* grid = u._grid; + LatticeComplex xi0(grid), u2(grid), w2(grid), cosw(grid); + LatticeComplex fden(grid); + LatticeComplex h0(grid), h1(grid), h2(grid); + LatticeComplex e2iu(grid), emiu(grid), ixi0(grid), qt(grid); + LatticeComplex unity(grid); + unity = 1.0; - GridBase *grid = u._grid; - LatticeComplex c0(grid), c1(grid), tmp(grid), c0max(grid), theta(grid); + xi0 = func_xi0(w); + u2 = u * u; + w2 = w * w; + cosw = cos(w); - // sign in c0 from the conventions on the Ta - c0 = - real(timesMinusI(trace(iQ3))) * one_over_three; //temporary hack - c1 = - real(trace(iQ2)) * one_over_two; + ixi0 = timesI(xi0); + emiu = cos(u) - timesI(sin(u)); + e2iu = cos(2.0 * u) + timesI(sin(2.0 * u)); - //Cayley Hamilton checks to machine precision, tested - tmp = c1 * one_over_three; - c0max = 2.0 * pow(tmp, 1.5); + h0 = e2iu * (u2 - w2) + + emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0)); + h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0); + h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0); - theta = acos(c0/c0max)*one_over_three; // divide by three here, now leave as it is - u = sqrt(tmp) * cos( theta ); - w = sqrt(c1) * sin( theta ); - } + fden = unity / (9.0 * u2 - w2); // reals + f0 = h0 * fden; + f1 = h1 * fden; + f2 = h2 * fden; + } - void set_fj(LatticeComplex& f0, LatticeComplex& f1, LatticeComplex& f2, - const LatticeComplex& u, const LatticeComplex& w) const{ + LatticeComplex func_xi0(const LatticeComplex& w) const { + // Define a function to do the check + // if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small: + // "<< w <<"\n"; + return sin(w) / w; + } - GridBase *grid = u._grid; - LatticeComplex xi0(grid), u2(grid), w2(grid), cosw(grid); - LatticeComplex fden(grid); - LatticeComplex h0(grid), h1(grid), h2(grid); - LatticeComplex e2iu(grid), emiu(grid), ixi0(grid), qt(grid); - LatticeComplex unity(grid); - unity = 1.0; - - xi0 = func_xi0(w); - u2 = u * u; - w2 = w * w; - cosw = cos(w); - - ixi0 = timesI(xi0); - emiu = cos(u) - timesI(sin(u)); - e2iu = cos(2.0*u) + timesI(sin(2.0*u)); - - h0 = e2iu * (u2 - w2) + emiu * ( (8.0*u2*cosw) + (2.0*u*(3.0*u2 + w2)*ixi0)); - h1 = e2iu * (2.0 * u) - emiu * ( (2.0*u*cosw) - (3.0*u2-w2)*ixi0); - h2 = e2iu - emiu * ( cosw + (3.0*u)*ixi0); - - fden = unity/(9.0*u2 - w2);// reals - f0 = h0 * fden; - f1 = h1 * fden; - f2 = h2 * fden; - } - - - - - LatticeComplex func_xi0(const LatticeComplex& w) const{ - // Define a function to do the check - //if( w < 1e-4 ) std::cout << GridLogWarning<< "[Smear_stout] w too small: "<< w <<"\n"; - return sin(w)/w; - } - - LatticeComplex func_xi1(const LatticeComplex& w) const{ - // Define a function to do the check - //if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small: "<< w <<"\n"; - return cos(w)/(w*w) - sin(w)/(w*w*w); - } - - }; - - } + LatticeComplex func_xi1(const LatticeComplex& w) const { + // Define a function to do the check + // if( w < 1e-4 ) std::cout << GridLogWarning << "[Smear_stout] w too small: + // "<< w <<"\n"; + return cos(w) / (w * w) - sin(w) / (w * w * w); + } +}; +} } -#endif +#endif diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index b9f7169e..0b2b6728 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -1,33 +1,34 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/simd/Grid_vector_types.h +Source file: ./lib/simd/Grid_vector_types.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Guido Cossu Author: Peter Boyle Author: neo - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ //--------------------------------------------------------------------------- /*! @file Grid_vector_types.h @brief Defines templated class Grid_simd to deal with inner vector types @@ -43,7 +44,7 @@ Author: neo #ifdef SSE4 #include "Grid_sse4.h" #endif -#if defined (AVX1)|| defined (AVX2) || defined (AVXFMA4) +#if defined(AVX1) || defined(AVX2) || defined(AVXFMA4) #include "Grid_avx.h" #endif #if defined AVX512 @@ -61,518 +62,635 @@ Author: neo namespace Grid { - ////////////////////////////////////// - // To take the floating point type of real/complex type - ////////////////////////////////////// - template struct RealPart { - typedef T type; - }; - template struct RealPart< std::complex >{ - typedef T type; - }; - - ////////////////////////////////////// - // demote a vector to real type - ////////////////////////////////////// +////////////////////////////////////// +// To take the floating point type of real/complex type +////////////////////////////////////// +template +struct RealPart { + typedef T type; +}; +template +struct RealPart > { + typedef T type; +}; - // type alias used to simplify the syntax of std::enable_if - template using Invoke = typename T::type; - template using EnableIf = Invoke >; - template using NotEnableIf= Invoke >; +////////////////////////////////////// +// demote a vector to real type +////////////////////////////////////// +// type alias used to simplify the syntax of std::enable_if +template +using Invoke = typename T::type; +template +using EnableIf = Invoke >; +template +using NotEnableIf = Invoke >; - //////////////////////////////////////////////////////// - // Check for complexity with type traits - template struct is_complex : public std::false_type {}; - template <> struct is_complex >: public std::true_type {}; - template <> struct is_complex > : public std::true_type {}; +//////////////////////////////////////////////////////// +// Check for complexity with type traits +template +struct is_complex : public std::false_type {}; +template <> +struct is_complex > : public std::true_type {}; +template <> +struct is_complex > : public std::true_type {}; - template using IfReal = Invoke::value,int> > ; - template using IfComplex = Invoke::value,int> > ; - template using IfInteger = Invoke::value,int> > ; +template +using IfReal = Invoke::value, int> >; +template +using IfComplex = Invoke::value, int> >; +template +using IfInteger = Invoke::value, int> >; - template using IfNotReal = Invoke::value,int> > ; - template using IfNotComplex = Invoke::value,int> > ; - template using IfNotInteger = Invoke::value,int> > ; +template +using IfNotReal = + Invoke::value, int> >; +template +using IfNotComplex = Invoke::value, int> >; +template +using IfNotInteger = Invoke::value, int> >; - //////////////////////////////////////////////////////// - // Define the operation templates functors - // general forms to allow for vsplat syntax - // need explicit declaration of types when used since - // clang cannot automatically determine the output type sometimes - template < class Out, class Input1, class Input2, class Operation > - Out binary(Input1 src_1, Input2 src_2, Operation op){ - return op(src_1, src_2); - } +//////////////////////////////////////////////////////// +// Define the operation templates functors +// general forms to allow for vsplat syntax +// need explicit declaration of types when used since +// clang cannot automatically determine the output type sometimes +template +Out binary(Input1 src_1, Input2 src_2, Operation op) { + return op(src_1, src_2); +} - template < class Out, class Input, class Operation > - Out unary(Input src, Operation op){ - return op(src); - } - /////////////////////////////////////////////// +template +Out unary(Input src, Operation op) { + return op(src); +} +/////////////////////////////////////////////// +/* + @brief Grid_simd class for the SIMD vector type operations + */ +template +class Grid_simd { + public: + typedef typename RealPart::type Real; + typedef Vector_type vector_type; + typedef Scalar_type scalar_type; - - /* - @brief Grid_simd class for the SIMD vector type operations - */ - template < class Scalar_type, class Vector_type > - class Grid_simd { - - public: - typedef typename RealPart < Scalar_type >::type Real; - typedef Vector_type vector_type; - typedef Scalar_type scalar_type; - - - typedef union conv_t_union { - Vector_type v; - Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)]; - conv_t_union(){}; - } conv_t; - - + typedef union conv_t_union { Vector_type v; - - static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);} - - Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;}; - Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler - Grid_simd()=default; - Grid_simd(const Grid_simd& rhs) :v(rhs.v){}; //compiles in movaps - Grid_simd(const Grid_simd&& rhs):v(rhs.v){}; + Scalar_type s[sizeof(Vector_type) / sizeof(Scalar_type)]; + conv_t_union(){}; + } conv_t; - ///////////////////////////// - // Constructors - ///////////////////////////// - Grid_simd & operator = ( Zero & z){ - vzero(*this); - return (*this); - } - - //Enable if complex type - template < typename S = Scalar_type > - Grid_simd(const typename std::enable_if< is_complex < S >::value, S>::type a){ - vsplat(*this,a); - }; + Vector_type v; - Grid_simd(const Real a){ - vsplat(*this,Scalar_type(a)); - }; - - /////////////////////////////////////////////// - // mac, mult, sub, add, adj - /////////////////////////////////////////////// - - // FIXME -- alias this to an inline MAC struct. - friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); }; - - - friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } - friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } - friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } - - friend inline void mac (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); }; - friend inline void mult(Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } - friend inline void sub (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } - friend inline void add (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } - - friend inline void mac (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ a,const Scalar_type *__restrict__ x){ *y = (*a)*(*x)+(*y); }; - friend inline void mult(Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) * (*r); } - friend inline void sub (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) - (*r); } - friend inline void add (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) + (*r); } - - //////////////////////////////////////////////////////////////////////// - // FIXME: gonna remove these load/store, get, set, prefetch - //////////////////////////////////////////////////////////////////////// - friend inline void vset(Grid_simd &ret, Scalar_type *a){ - ret.v = unary(a, VsetSIMD()); - } - - /////////////////////// - // Vstore - /////////////////////// - friend inline void vstore(const Grid_simd &ret, Scalar_type *a){ - binary(ret.v, (Real*)a, VstoreSIMD()); - } - - /////////////////////// - // Vprefetch - /////////////////////// - friend inline void vprefetch(const Grid_simd &v) - { - prefetch_HINT_T0((const char*)&v.v); - } - - /////////////////////// - // Reduce - /////////////////////// - friend inline Scalar_type Reduce(const Grid_simd & in) - { - return unary(in.v, ReduceSIMD()); - } - - //////////////////////////// - // opreator scalar * simd - //////////////////////////// - friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){ - Grid_simd va; - vsplat(va,a); - return va*b; - } - friend inline Grid_simd operator * (Grid_simd b,const Scalar_type &a){ - return a*b; - } - - /////////////////////// - // Unary negation - /////////////////////// - friend inline Grid_simd operator -(const Grid_simd &r) { - Grid_simd ret; - vzero(ret); - ret = ret - r; - return ret; - } - // *=,+=,-= operators - inline Grid_simd &operator *=(const Grid_simd &r) { - *this = (*this)*r; - return *this; - // return (*this)*r; ? - } - inline Grid_simd &operator +=(const Grid_simd &r) { - *this = *this+r; - return *this; - } - inline Grid_simd &operator -=(const Grid_simd &r) { - *this = *this-r; - return *this; - } - - /////////////////////////////////////// - // Not all functions are supported - // through SIMD and must breakout to - // scalar type and back again. This - // provides support - /////////////////////////////////////// - - template friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) { - Grid_simd ret; - Grid_simd::conv_t conv; - - conv.v = v.v; - for(int i=0;i friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) { - Grid_simd ret; - Grid_simd::conv_t cx; - Grid_simd::conv_t cy; - - cx.v = x.v; - cy.v = y.v; - for(int i=0;i =0> - inline Grid_simd rotate(Grid_simd b,int nrot) - { - nrot = nrot % Grid_simd::Nsimd(); - Grid_simd ret; - // std::cout << "Rotate Real by "< =0> - inline Grid_simd rotate(Grid_simd b,int nrot) - { - nrot = nrot % Grid_simd::Nsimd(); - Grid_simd ret; - // std::cout << "Rotate Complex by "< =0, class ABtype> - inline void vsplat(Grid_simd &ret,ABtype a, ABtype b){ - ret.v = binary(a, b, VsplatSIMD()); - } + Grid_simd &operator=(const Grid_simd &&rhs) { + v = rhs.v; + return *this; + }; + Grid_simd &operator=(const Grid_simd &rhs) { + v = rhs.v; + return *this; + }; // faster than not declaring it and leaving to the compiler + Grid_simd() = default; + Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps + Grid_simd(const Grid_simd &&rhs) : v(rhs.v){}; - // overload if complex - template inline void vsplat(Grid_simd &ret, EnableIf, S> c) { - vsplat(ret,real(c),imag(c)); + ///////////////////////////// + // Constructors + ///////////////////////////// + Grid_simd &operator=(Zero &z) { + vzero(*this); + return (*this); } - //if real fill with a, if complex fill with a in the real part (first function above) - template - inline void vsplat(Grid_simd &ret,NotEnableIf,S> a){ - ret.v = unary(a, VsplatSIMD()); - } - ////////////////////////// + // Enable if complex type + template + Grid_simd(const typename std::enable_if::value, S>::type a) { + vsplat(*this, a); + }; + + Grid_simd(const Real a) { vsplat(*this, Scalar_type(a)); }; /////////////////////////////////////////////// - // Initialise to 1,0,i for the correct types + // mac, mult, sub, add, adj /////////////////////////////////////////////// - // For complex types - template = 0 > inline void vone(Grid_simd &ret) { vsplat(ret,S(1.0,0.0)); } - template = 0 > inline void vzero(Grid_simd &ret) { vsplat(ret,S(0.0,0.0)); }// use xor? - template = 0 > inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,S(0.0,1.0));} - template = 0 > inline void visign(Grid_simd &ret){ vsplat(ret,S(1.0,-1.0));} - template = 0 > inline void vrsign(Grid_simd &ret){ vsplat(ret,S(-1.0,1.0));} - - // if not complex overload here - template = 0 > inline void vone (Grid_simd &ret){ vsplat(ret,S(1.0)); } - template = 0 > inline void vzero(Grid_simd &ret){ vsplat(ret,S(0.0)); } - - // For integral types - template = 0 > inline void vone(Grid_simd &ret) {vsplat(ret,1); } - template = 0 > inline void vzero(Grid_simd &ret) {vsplat(ret,0); } - template = 0 > inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);} - template = 0 > inline void vfalse(Grid_simd &ret){vsplat(ret,0);} - - template inline void zeroit(Grid_simd &z){ vzero(z);} - - /////////////////////// - // Vstream - /////////////////////// - template = 0 > - inline void vstream(Grid_simd &out,const Grid_simd &in){ - binary((S *)&out.v, in.v, VstreamSIMD()); - } - template = 0 > - inline void vstream(Grid_simd &out,const Grid_simd &in){ - typedef typename S::value_type T; - binary((T *)&out.v, in.v, VstreamSIMD()); - } - - template = 0 > - inline void vstream(Grid_simd &out,const Grid_simd &in){ - out=in; - } - - //////////////////////////////////// - // Arithmetic operator overloads +,-,* - //////////////////////////////////// - template inline Grid_simd operator + (Grid_simd a, Grid_simd b) { - Grid_simd ret; - ret.v = binary(a.v, b.v, SumSIMD()); - return ret; - }; - - template inline Grid_simd operator - (Grid_simd a, Grid_simd b) { - Grid_simd ret; - ret.v = binary(a.v, b.v, SubSIMD()); - return ret; - }; - - // Distinguish between complex types and others - template = 0 > inline Grid_simd operator * (Grid_simd a, Grid_simd b) { - Grid_simd ret; - ret.v = binary(a.v,b.v, MultComplexSIMD()); - return ret; + // FIXME -- alias this to an inline MAC struct. + friend inline void mac(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ a, + const Grid_simd *__restrict__ x) { + *y = (*a) * (*x) + (*y); }; - // Real/Integer types - template = 0 > inline Grid_simd operator * (Grid_simd a, Grid_simd b) { - Grid_simd ret; - ret.v = binary(a.v,b.v, MultSIMD()); - return ret; + friend inline void mult(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ l, + const Grid_simd *__restrict__ r) { + *y = (*l) * (*r); + } + friend inline void sub(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ l, + const Grid_simd *__restrict__ r) { + *y = (*l) - (*r); + } + friend inline void add(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ l, + const Grid_simd *__restrict__ r) { + *y = (*l) + (*r); + } + + friend inline void mac(Grid_simd *__restrict__ y, + const Scalar_type *__restrict__ a, + const Grid_simd *__restrict__ x) { + *y = (*a) * (*x) + (*y); }; - - - /////////////////////// - // Conjugate - /////////////////////// - template = 0 > - inline Grid_simd conjugate(const Grid_simd &in){ - Grid_simd ret ; - ret.v = unary(in.v, ConjSIMD()); - return ret; + friend inline void mult(Grid_simd *__restrict__ y, + const Scalar_type *__restrict__ l, + const Grid_simd *__restrict__ r) { + *y = (*l) * (*r); } - template = 0 > inline Grid_simd conjugate(const Grid_simd &in){ - return in; // for real objects + friend inline void sub(Grid_simd *__restrict__ y, + const Scalar_type *__restrict__ l, + const Grid_simd *__restrict__ r) { + *y = (*l) - (*r); + } + friend inline void add(Grid_simd *__restrict__ y, + const Scalar_type *__restrict__ l, + const Grid_simd *__restrict__ r) { + *y = (*l) + (*r); + } + + friend inline void mac(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ a, + const Scalar_type *__restrict__ x) { + *y = (*a) * (*x) + (*y); + }; + friend inline void mult(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ l, + const Scalar_type *__restrict__ r) { + *y = (*l) * (*r); + } + friend inline void sub(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ l, + const Scalar_type *__restrict__ r) { + *y = (*l) - (*r); + } + friend inline void add(Grid_simd *__restrict__ y, + const Grid_simd *__restrict__ l, + const Scalar_type *__restrict__ r) { + *y = (*l) + (*r); + } + + //////////////////////////////////////////////////////////////////////// + // FIXME: gonna remove these load/store, get, set, prefetch + //////////////////////////////////////////////////////////////////////// + friend inline void vset(Grid_simd &ret, Scalar_type *a) { + ret.v = unary(a, VsetSIMD()); } - //Suppress adj for integer types... // odd; why conjugate above but not adj?? - template < class S, class V, IfNotInteger = 0 > - inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); } - /////////////////////// - // timesMinusI + // Vstore /////////////////////// - template = 0 > - inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){ - ret.v = binary(in.v, ret.v, TimesMinusISIMD()); + friend inline void vstore(const Grid_simd &ret, Scalar_type *a) { + binary(ret.v, (Real *)a, VstoreSIMD()); } - template = 0 > - inline Grid_simd timesMinusI(const Grid_simd &in){ - Grid_simd ret; - timesMinusI(ret,in); + /////////////////////// + // Vprefetch + /////////////////////// + friend inline void vprefetch(const Grid_simd &v) { + prefetch_HINT_T0((const char *)&v.v); + } + + /////////////////////// + // Reduce + /////////////////////// + friend inline Scalar_type Reduce(const Grid_simd &in) { + return unary(in.v, ReduceSIMD()); + } + + //////////////////////////// + // opreator scalar * simd + //////////////////////////// + friend inline Grid_simd operator*(const Scalar_type &a, Grid_simd b) { + Grid_simd va; + vsplat(va, a); + return va * b; + } + friend inline Grid_simd operator*(Grid_simd b, const Scalar_type &a) { + return a * b; + } + + /////////////////////// + // Unary negation + /////////////////////// + friend inline Grid_simd operator-(const Grid_simd &r) { + Grid_simd ret; + vzero(ret); + ret = ret - r; return ret; } - - template = 0 > - inline Grid_simd timesMinusI(const Grid_simd &in){ - return in; + // *=,+=,-= operators + inline Grid_simd &operator*=(const Grid_simd &r) { + *this = (*this) * r; + return *this; + // return (*this)*r; ? + } + inline Grid_simd &operator+=(const Grid_simd &r) { + *this = *this + r; + return *this; + } + inline Grid_simd &operator-=(const Grid_simd &r) { + *this = *this - r; + return *this; } - /////////////////////// - // timesI - /////////////////////// - template = 0 > - inline void timesI(Grid_simd &ret,const Grid_simd &in){ - ret.v = binary(in.v, ret.v, TimesISIMD()); - } - - template = 0 > - inline Grid_simd timesI(const Grid_simd &in){ - Grid_simd ret; - timesI(ret,in); - return ret; - } + /////////////////////////////////////// + // Not all functions are supported + // through SIMD and must breakout to + // scalar type and back again. This + // provides support + /////////////////////////////////////// - template = 0 > - inline Grid_simd timesI(const Grid_simd &in){ - return in; - } + template + friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) { + Grid_simd ret; + Grid_simd::conv_t conv; - ///////////////////// - // Inner, outer - ///////////////////// - - template - inline Grid_simd< S, V> innerProduct(const Grid_simd< S, V> & l, const Grid_simd< S, V> & r) - { - return conjugate(l)*r; - } - - template - inline Grid_simd< S, V> outerProduct(const Grid_simd< S, V> &l, const Grid_simd< S, V> & r) - { - return l*conjugate(r); - } - - template - inline Grid_simd< S, V> trace(const Grid_simd< S, V> &arg){ - return arg; - } - - - //////////////////////////////////////////////////////////// - // copy/splat complex real parts into real; - // insert real into complex and zero imag; - //////////////////////////////////////////////////////////// - - //real = toReal( complex ) - template = 0> - inline Grid_simd toReal(const Grid_simd,V> &in) - { - typedef Grid_simd simd; - simd ret; - typename simd::conv_t conv; - conv.v = in.v; // copy the vector content (bytewise) - for(int i=0;i = 0 > // must be a real arg - inline Grid_simd,V> toComplex (const Grid_simd &in) - { - typedef Grid_simd Rsimd; - typedef Grid_simd,V> Csimd; - typename Rsimd::conv_t conv;// address as real - - conv.v = in.v; - for(int i=0;i + friend inline Grid_simd SimdApplyBinop(const functor &func, + const Grid_simd &x, + const Grid_simd &y) { + Grid_simd ret; + Grid_simd::conv_t cx; + Grid_simd::conv_t cy; + + cx.v = x.v; + cy.v = y.v; + for (int i = 0; i < Nsimd(); i++) { + cx.s[i] = func(cx.s[i], cy.s[i]); } - Csimd ret; - ret.v = conv.v; + ret.v = cx.v; return ret; } + //////////////////////////////////////////////////////////////////// + // General permute; assumes vector length is same across + // all subtypes; may not be a good assumption, but could + // add the vector width as a template param for BG/Q for example + //////////////////////////////////////////////////////////////////// + friend inline void permute0(Grid_simd &y, Grid_simd b) { + y.v = Optimization::Permute::Permute0(b.v); + } + friend inline void permute1(Grid_simd &y, Grid_simd b) { + y.v = Optimization::Permute::Permute1(b.v); + } + friend inline void permute2(Grid_simd &y, Grid_simd b) { + y.v = Optimization::Permute::Permute2(b.v); + } + friend inline void permute3(Grid_simd &y, Grid_simd b) { + y.v = Optimization::Permute::Permute3(b.v); + } + friend inline void permute(Grid_simd &y, Grid_simd b, int perm) { + if (perm & RotateBit) { + int dist = perm & 0xF; + y = rotate(b, dist); + return; + } + switch (perm) { + case 3: + permute3(y, b); + break; + case 2: + permute2(y, b); + break; + case 1: + permute1(y, b); + break; + case 0: + permute0(y, b); + break; + default: + assert(0); + } + } +}; // end of Grid_simd class definition - - /////////////////////////////// - // Define available types - /////////////////////////////// - typedef Grid_simd< float , SIMD_Ftype > vRealF; - typedef Grid_simd< double , SIMD_Dtype > vRealD; - typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF; - typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD; - typedef Grid_simd< Integer , SIMD_Itype > vInteger; +//////////////////////////////////////////////////////////////////// +// General rotate +//////////////////////////////////////////////////////////////////// +template = 0> +inline Grid_simd rotate(Grid_simd b, int nrot) { + nrot = nrot % Grid_simd::Nsimd(); + Grid_simd ret; + // std::cout << "Rotate Real by "< = 0> +inline Grid_simd rotate(Grid_simd b, int nrot) { + nrot = nrot % Grid_simd::Nsimd(); + Grid_simd ret; + // std::cout << "Rotate Complex by "< struct is_simd : public std::false_type{}; - template <> struct is_simd : public std::true_type {}; - template <> struct is_simd : public std::true_type {}; - template <> struct is_simd: public std::true_type {}; - template <> struct is_simd: public std::true_type {}; - template <> struct is_simd : public std::true_type {}; +/////////////////////// +// Splat +/////////////////////// - template using IfSimd = Invoke::value,int> > ; - template using IfNotSimd = Invoke::value,unsigned> > ; +// this is only for the complex version +template = 0, class ABtype> +inline void vsplat(Grid_simd &ret, ABtype a, ABtype b) { + ret.v = binary(a, b, VsplatSIMD()); +} +// overload if complex +template +inline void vsplat(Grid_simd &ret, EnableIf, S> c) { + vsplat(ret, real(c), imag(c)); +} + +// if real fill with a, if complex fill with a in the real part (first function +// above) +template +inline void vsplat(Grid_simd &ret, NotEnableIf, S> a) { + ret.v = unary(a, VsplatSIMD()); +} +////////////////////////// + +/////////////////////////////////////////////// +// Initialise to 1,0,i for the correct types +/////////////////////////////////////////////// +// For complex types +template = 0> +inline void vone(Grid_simd &ret) { + vsplat(ret, S(1.0, 0.0)); +} +template = 0> +inline void vzero(Grid_simd &ret) { + vsplat(ret, S(0.0, 0.0)); +} // use xor? +template = 0> +inline void vcomplex_i(Grid_simd &ret) { + vsplat(ret, S(0.0, 1.0)); +} + +template = 0> +inline void visign(Grid_simd &ret) { + vsplat(ret, S(1.0, -1.0)); +} +template = 0> +inline void vrsign(Grid_simd &ret) { + vsplat(ret, S(-1.0, 1.0)); +} + +// if not complex overload here +template = 0> +inline void vone(Grid_simd &ret) { + vsplat(ret, S(1.0)); +} +template = 0> +inline void vzero(Grid_simd &ret) { + vsplat(ret, S(0.0)); +} + +// For integral types +template = 0> +inline void vone(Grid_simd &ret) { + vsplat(ret, 1); +} +template = 0> +inline void vzero(Grid_simd &ret) { + vsplat(ret, 0); +} +template = 0> +inline void vtrue(Grid_simd &ret) { + vsplat(ret, 0xFFFFFFFF); +} +template = 0> +inline void vfalse(Grid_simd &ret) { + vsplat(ret, 0); +} + +template +inline void zeroit(Grid_simd &z) { + vzero(z); +} + +/////////////////////// +// Vstream +/////////////////////// +template = 0> +inline void vstream(Grid_simd &out, const Grid_simd &in) { + binary((S *)&out.v, in.v, VstreamSIMD()); +} +template = 0> +inline void vstream(Grid_simd &out, const Grid_simd &in) { + typedef typename S::value_type T; + binary((T *)&out.v, in.v, VstreamSIMD()); +} + +template = 0> +inline void vstream(Grid_simd &out, const Grid_simd &in) { + out = in; +} + +//////////////////////////////////// +// Arithmetic operator overloads +,-,* +//////////////////////////////////// +template +inline Grid_simd operator+(Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, SumSIMD()); + return ret; +}; + +template +inline Grid_simd operator-(Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, SubSIMD()); + return ret; +}; + +// Distinguish between complex types and others +template = 0> +inline Grid_simd operator*(Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, MultComplexSIMD()); + return ret; +}; + +// Real/Integer types +template = 0> +inline Grid_simd operator*(Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, MultSIMD()); + return ret; +}; + +/////////////////////// +// Conjugate +/////////////////////// +template = 0> +inline Grid_simd conjugate(const Grid_simd &in) { + Grid_simd ret; + ret.v = unary(in.v, ConjSIMD()); + return ret; +} +template = 0> +inline Grid_simd conjugate(const Grid_simd &in) { + return in; // for real objects +} + +// Suppress adj for integer types... // odd; why conjugate above but not adj?? +template = 0> +inline Grid_simd adj(const Grid_simd &in) { + return conjugate(in); +} + +/////////////////////// +// timesMinusI +/////////////////////// +template = 0> +inline void timesMinusI(Grid_simd &ret, const Grid_simd &in) { + ret.v = binary(in.v, ret.v, TimesMinusISIMD()); +} + +template = 0> +inline Grid_simd timesMinusI(const Grid_simd &in) { + Grid_simd ret; + timesMinusI(ret, in); + return ret; +} + +template = 0> +inline Grid_simd timesMinusI(const Grid_simd &in) { + return in; +} + +/////////////////////// +// timesI +/////////////////////// +template = 0> +inline void timesI(Grid_simd &ret, const Grid_simd &in) { + ret.v = binary(in.v, ret.v, TimesISIMD()); +} + +template = 0> +inline Grid_simd timesI(const Grid_simd &in) { + Grid_simd ret; + timesI(ret, in); + return ret; +} + +template = 0> +inline Grid_simd timesI(const Grid_simd &in) { + return in; +} + +///////////////////// +// Inner, outer +///////////////////// + +template +inline Grid_simd innerProduct(const Grid_simd &l, + const Grid_simd &r) { + return conjugate(l) * r; +} + +template +inline Grid_simd outerProduct(const Grid_simd &l, + const Grid_simd &r) { + return l * conjugate(r); +} + +template +inline Grid_simd trace(const Grid_simd &arg) { + return arg; +} + +//////////////////////////////////////////////////////////// +// copy/splat complex real parts into real; +// insert real into complex and zero imag; +//////////////////////////////////////////////////////////// + +// real = toReal( complex ) +template = 0> +inline Grid_simd toReal(const Grid_simd, V> &in) { + typedef Grid_simd simd; + simd ret; + typename simd::conv_t conv; + conv.v = in.v; // copy the vector content (bytewise) + for (int i = 0; i < simd::Nsimd(); i += 2) { + conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc... + } + ret.v = conv.v; + return ret; +} + +// complex = toComplex( real ) +template = 0> // must be a real arg +inline Grid_simd, V> toComplex(const Grid_simd &in) { + typedef Grid_simd Rsimd; + typedef Grid_simd, V> Csimd; + typename Rsimd::conv_t conv; // address as real + + conv.v = in.v; + for (int i = 0; i < Rsimd::Nsimd(); i += 2) { + assert(conv.s[i + 1] == + conv.s[i]); // trap any cases where real was not duplicated + // indicating the SIMD grids of real and imag assignment did not correctly + // match + conv.s[i + 1] = 0.0; // zero imaginary parts + } + Csimd ret; + ret.v = conv.v; + return ret; +} + +/////////////////////////////// +// Define available types +/////////////////////////////// +typedef Grid_simd vRealF; +typedef Grid_simd vRealD; +typedef Grid_simd, SIMD_Ftype> vComplexF; +typedef Grid_simd, SIMD_Dtype> vComplexD; +typedef Grid_simd vInteger; + +///////////////////////////////////////// +// Some traits to recognise the types +///////////////////////////////////////// +template +struct is_simd : public std::false_type {}; +template <> +struct is_simd : public std::true_type {}; +template <> +struct is_simd : public std::true_type {}; +template <> +struct is_simd : public std::true_type {}; +template <> +struct is_simd : public std::true_type {}; +template <> +struct is_simd : public std::true_type {}; + +template +using IfSimd = Invoke::value, int> >; +template +using IfNotSimd = Invoke::value, unsigned> >; } #endif diff --git a/lib/simd/Grid_vector_unops.h b/lib/simd/Grid_vector_unops.h index 2ce8590d..a67f4e7d 100644 --- a/lib/simd/Grid_vector_unops.h +++ b/lib/simd/Grid_vector_unops.h @@ -1,247 +1,234 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/simd/Grid_vector_unops.h +Source file: ./lib/simd/Grid_vector_unops.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: neo Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_VECTOR_UNOPS #define GRID_VECTOR_UNOPS #include -namespace Grid { +namespace Grid { - template struct SqrtRealFunctor { - scalar operator()(const scalar &a) const { - return sqrt(real(a)); - } - }; +template +struct SqrtRealFunctor { + scalar operator()(const scalar &a) const { return sqrt(real(a)); } +}; - template struct RSqrtRealFunctor { - scalar operator()(const scalar &a) const { - return scalar(1.0/sqrt(real(a))); - } - }; +template +struct RSqrtRealFunctor { + scalar operator()(const scalar &a) const { + return scalar(1.0 / sqrt(real(a))); + } +}; - template struct CosRealFunctor { - scalar operator()(const scalar &a) const { - return cos(real(a)); - } - }; +template +struct CosRealFunctor { + scalar operator()(const scalar &a) const { return cos(real(a)); } +}; - template struct SinRealFunctor { - scalar operator()(const scalar &a) const { - return sin(real(a)); - } - }; +template +struct SinRealFunctor { + scalar operator()(const scalar &a) const { return sin(real(a)); } +}; - template struct AcosRealFunctor { - scalar operator()(const scalar &a) const { - return acos(real(a)); - } - }; +template +struct AcosRealFunctor { + scalar operator()(const scalar &a) const { return acos(real(a)); } +}; - template struct AsinRealFunctor { - scalar operator()(const scalar &a) const { - return asin(real(a)); - } - }; +template +struct AsinRealFunctor { + scalar operator()(const scalar &a) const { return asin(real(a)); } +}; - template struct LogRealFunctor { - scalar operator()(const scalar &a) const { - return log(real(a)); - } - }; +template +struct LogRealFunctor { + scalar operator()(const scalar &a) const { return log(real(a)); } +}; - template struct ExpRealFunctor { - scalar operator()(const scalar &a) const { - return exp(real(a)); - } - }; - template struct NotFunctor { - scalar operator()(const scalar &a) const { - return (!a); - } - }; - template struct AbsRealFunctor { - scalar operator()(const scalar &a) const { - return std::abs(real(a)); - } - }; +template +struct ExpRealFunctor { + scalar operator()(const scalar &a) const { return exp(real(a)); } +}; +template +struct NotFunctor { + scalar operator()(const scalar &a) const { return (!a); } +}; +template +struct AbsRealFunctor { + scalar operator()(const scalar &a) const { return std::abs(real(a)); } +}; - template struct PowRealFunctor { - double y; - PowRealFunctor(double _y) : y(_y) {}; - scalar operator()(const scalar &a) const { - return pow(real(a),y); - } - }; +template +struct PowRealFunctor { + double y; + PowRealFunctor(double _y) : y(_y){}; + scalar operator()(const scalar &a) const { return pow(real(a), y); } +}; - template struct ModIntFunctor { - Integer y; - ModIntFunctor(Integer _y) : y(_y) {}; - scalar operator()(const scalar &a) const { - return Integer(a)%y; - } - }; +template +struct ModIntFunctor { + Integer y; + ModIntFunctor(Integer _y) : y(_y){}; + scalar operator()(const scalar &a) const { return Integer(a) % y; } +}; - template struct DivIntFunctor { - Integer y; - DivIntFunctor(Integer _y) : y(_y) {}; - scalar operator()(const scalar &a) const { - return Integer(a)/y; - } - }; +template +struct DivIntFunctor { + Integer y; + DivIntFunctor(Integer _y) : y(_y){}; + scalar operator()(const scalar &a) const { return Integer(a) / y; } +}; - template struct RealFunctor { - scalar operator()(const std::complex &a) const { - return real(a); - } - }; - template struct ImagFunctor { - scalar operator()(const std::complex &a) const { - return imag(a); - } - }; - template < class S, class V > - inline Grid_simd real(const Grid_simd &r) { - return SimdApply(RealFunctor(),r); - } - template < class S, class V > - inline Grid_simd imag(const Grid_simd &r) { - return SimdApply(ImagFunctor(),r); - } +template +struct RealFunctor { + scalar operator()(const scalar &a) const { return std::real(a); } +}; +template +struct ImagFunctor { + scalar operator()(const scalar &a) const { return std::imag(a); } +}; +template +inline Grid_simd real(const Grid_simd &r) { + return SimdApply(RealFunctor(), r); +} +template +inline Grid_simd imag(const Grid_simd &r) { + return SimdApply(ImagFunctor(), r); +} +template +inline Grid_simd sqrt(const Grid_simd &r) { + return SimdApply(SqrtRealFunctor(), r); +} +template +inline Grid_simd rsqrt(const Grid_simd &r) { + return SimdApply(RSqrtRealFunctor(), r); +} +template +inline Scalar rsqrt(const Scalar &r) { + return (RSqrtRealFunctor(), r); +} - template < class S, class V > - inline Grid_simd sqrt(const Grid_simd &r) { - return SimdApply(SqrtRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd rsqrt(const Grid_simd &r) { - return SimdApply(RSqrtRealFunctor(),r); - } - template < class Scalar > - inline Scalar rsqrt(const Scalar &r) { - return (RSqrtRealFunctor(),r); - } - - template < class S, class V > - inline Grid_simd cos(const Grid_simd &r) { - return SimdApply(CosRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd sin(const Grid_simd &r) { - return SimdApply(SinRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd acos(const Grid_simd &r) { - return SimdApply(AcosRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd asin(const Grid_simd &r) { - return SimdApply(AsinRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd log(const Grid_simd &r) { - return SimdApply(LogRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd abs(const Grid_simd &r) { - return SimdApply(AbsRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd exp(const Grid_simd &r) { - return SimdApply(ExpRealFunctor(),r); - } - template < class S, class V > - inline Grid_simd Not(const Grid_simd &r) { - return SimdApply(NotFunctor(),r); - } - template < class S, class V > - inline Grid_simd pow(const Grid_simd &r,double y) { - return SimdApply(PowRealFunctor(y),r); - } - template < class S, class V > - inline Grid_simd mod(const Grid_simd &r,Integer y) { - return SimdApply(ModIntFunctor(y),r); - } - template < class S, class V > - inline Grid_simd div(const Grid_simd &r,Integer y) { - return SimdApply(DivIntFunctor(y),r); - } - //////////////////////////////////////////////////////////////////////////// - // Allows us to assign into **conformable** real vectors from complex - //////////////////////////////////////////////////////////////////////////// - // template < class S, class V > - // inline auto ComplexRemove(const Grid_simd &c) -> Grid_simd::Real,V> { - // Grid_simd::Real,V> ret; - // ret.v = c.v; - // return ret; - // } - template struct AndFunctor { - scalar operator()(const scalar &x, const scalar &y) const { - return x & y; - } - }; - template struct OrFunctor { - scalar operator()(const scalar &x, const scalar &y) const { - return x | y; - } - }; - template struct AndAndFunctor { - scalar operator()(const scalar &x, const scalar &y) const { - return x && y; - } - }; - template struct OrOrFunctor { - scalar operator()(const scalar &x, const scalar &y) const { - return x || y; - } - }; - - //////////////////////////////// - // Calls to simd binop functors - //////////////////////////////// - template < class S, class V > - inline Grid_simd operator &(const Grid_simd &x,const Grid_simd &y) { - return SimdApplyBinop(AndFunctor(),x,y); - } - template < class S, class V > - inline Grid_simd operator &&(const Grid_simd &x,const Grid_simd &y) { - return SimdApplyBinop(AndAndFunctor(),x,y); - } - template < class S, class V > - inline Grid_simd operator |(const Grid_simd &x,const Grid_simd &y) { - return SimdApplyBinop(OrFunctor(),x,y); - } - template < class S, class V > - inline Grid_simd operator ||(const Grid_simd &x,const Grid_simd &y) { - return SimdApplyBinop(OrOrFunctor(),x,y); - } +template +inline Grid_simd cos(const Grid_simd &r) { + return SimdApply(CosRealFunctor(), r); +} +template +inline Grid_simd sin(const Grid_simd &r) { + return SimdApply(SinRealFunctor(), r); +} +template +inline Grid_simd acos(const Grid_simd &r) { + return SimdApply(AcosRealFunctor(), r); +} +template +inline Grid_simd asin(const Grid_simd &r) { + return SimdApply(AsinRealFunctor(), r); +} +template +inline Grid_simd log(const Grid_simd &r) { + return SimdApply(LogRealFunctor(), r); +} +template +inline Grid_simd abs(const Grid_simd &r) { + return SimdApply(AbsRealFunctor(), r); +} +template +inline Grid_simd exp(const Grid_simd &r) { + return SimdApply(ExpRealFunctor(), r); +} +template +inline Grid_simd Not(const Grid_simd &r) { + return SimdApply(NotFunctor(), r); +} +template +inline Grid_simd pow(const Grid_simd &r, double y) { + return SimdApply(PowRealFunctor(y), r); +} +template +inline Grid_simd mod(const Grid_simd &r, Integer y) { + return SimdApply(ModIntFunctor(y), r); +} +template +inline Grid_simd div(const Grid_simd &r, Integer y) { + return SimdApply(DivIntFunctor(y), r); +} +//////////////////////////////////////////////////////////////////////////// +// Allows us to assign into **conformable** real vectors from complex +//////////////////////////////////////////////////////////////////////////// +// template < class S, class V > +// inline auto ComplexRemove(const Grid_simd &c) -> +// Grid_simd::Real,V> { +// Grid_simd::Real,V> ret; +// ret.v = c.v; +// return ret; +// } +template +struct AndFunctor { + scalar operator()(const scalar &x, const scalar &y) const { return x & y; } +}; +template +struct OrFunctor { + scalar operator()(const scalar &x, const scalar &y) const { return x | y; } +}; +template +struct AndAndFunctor { + scalar operator()(const scalar &x, const scalar &y) const { return x && y; } +}; +template +struct OrOrFunctor { + scalar operator()(const scalar &x, const scalar &y) const { return x || y; } +}; +//////////////////////////////// +// Calls to simd binop functors +//////////////////////////////// +template +inline Grid_simd operator&(const Grid_simd &x, + const Grid_simd &y) { + return SimdApplyBinop(AndFunctor(), x, y); +} +template +inline Grid_simd operator&&(const Grid_simd &x, + const Grid_simd &y) { + return SimdApplyBinop(AndAndFunctor(), x, y); +} +template +inline Grid_simd operator|(const Grid_simd &x, + const Grid_simd &y) { + return SimdApplyBinop(OrFunctor(), x, y); +} +template +inline Grid_simd operator||(const Grid_simd &x, + const Grid_simd &y) { + return SimdApplyBinop(OrOrFunctor(), x, y); +} } #endif diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc index 2255c598..e5957db7 100644 --- a/tests/Test_simd.cc +++ b/tests/Test_simd.cc @@ -1,31 +1,32 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./tests/Test_simd.cc +Source file: ./tests/Test_simd.cc - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: neo - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #include using namespace std; @@ -62,6 +63,18 @@ public: template void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);} std::string name(void) const { return std::string("Adj"); } }; +class funcImag { +public: + funcImag() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = imag(i1);} + std::string name(void) const { return std::string("imag"); } +}; +class funcReal { +public: + funcReal() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = real(i1);} + std::string name(void) const { return std::string("real"); } +}; class funcTimesI { public: @@ -141,7 +154,13 @@ void Tester(const functor &func) } extract(v_result,result); - std::cout<(funcTimes()); Tester(funcConj()); Tester(funcAdj()); + Tester(funcReal()); + Tester(funcImag()); Tester(funcInnerProduct()); ReductionTester(funcReduce()); @@ -421,17 +442,21 @@ int main (int argc, char ** argv) Tester(funcTimes()); Tester(funcConj()); Tester(funcAdj()); - Tester(funcInnerProduct()); - ReductionTester(funcReduce()); + Tester(funcReal()); + Tester(funcImag()); + Tester(funcInnerProduct()); + ReductionTester(funcReduce()); - std::cout<(funcPermute(i)); + for (int i = 0; (1 << i) < vComplexD::Nsimd(); i++) { + PermTester(funcPermute(i)); }