diff --git a/lib/Makefile.am b/lib/Makefile.am index f761b59e..a5b89c0a 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -96,7 +96,7 @@ nobase_include_HEADERS = algorithms/approx/bigfloat.h \ simd/Grid_vector_types.h \ simd/Grid_sse4.h \ simd/Grid_avx.h \ - simd/Grid_knc.h + simd/Grid_avx512.h diff --git a/lib/simd/Grid_knc.h b/lib/simd/Grid_avx512.h similarity index 100% rename from lib/simd/Grid_knc.h rename to lib/simd/Grid_avx512.h diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 0bedf32f..e389f3c2 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -14,7 +14,7 @@ #include "Grid_avx.h" #endif #if defined AVX512 -#include "Grid_knc.h" +#include "Grid_avx512.h" #endif #if defined QPX #include "Grid_qpx.h" @@ -32,13 +32,24 @@ namespace Grid { // type alias used to simplify the syntax of std::enable_if template using Invoke = typename T::type; - template using EnableIf = Invoke>; - template using NotEnableIf= Invoke>; - + template using EnableIf = Invoke >; + template using NotEnableIf= Invoke >; + + //////////////////////////////////////////////////////// // Check for complexity with type traits - template struct is_complex : std::false_type {}; - template < typename T > struct is_complex< std::complex >: std::true_type {}; + template struct is_complex : public std::false_type {}; + template <> struct is_complex >: public std::true_type {}; + template <> struct is_complex > : public std::true_type {}; + + template using IfReal = Invoke::value,int> > ; + template using IfComplex = Invoke::value,int> > ; + template using IfInteger = Invoke::value,int> > ; + + template using IfNotReal = Invoke::value,int> > ; + template using IfNotComplex = Invoke::value,int> > ; + template using IfNotInteger = Invoke::value,int> > ; + //////////////////////////////////////////////////////// // Define the operation templates functors // general forms to allow for vsplat syntax @@ -54,11 +65,9 @@ namespace Grid { Out unary(Input src, Operation op){ return op(src); } - /////////////////////////////////////////////// - /* @brief Grid_simd class for the SIMD vector type operations @@ -73,27 +82,28 @@ namespace Grid { Vector_type v; - static inline int Nsimd(void) { return sizeof(Vector_type)/sizeof(Scalar_type);} - - // Constructors - Grid_simd & operator = ( Zero & z){ - vzero(*this); - return (*this); - } + Grid_simd& operator=(const Grid_simd&& rhs){v=rhs.v;return *this;}; Grid_simd& operator=(const Grid_simd& rhs){v=rhs.v;return *this;}; //faster than not declaring it and leaving to the compiler Grid_simd()=default; - Grid_simd(const Grid_simd& rhs):v(rhs.v){}; //compiles in movaps + Grid_simd(const Grid_simd& rhs) :v(rhs.v){}; //compiles in movaps Grid_simd(const Grid_simd&& rhs):v(rhs.v){}; + + ///////////////////////////// + // Constructors + ///////////////////////////// + Grid_simd & operator = ( Zero & z){ + vzero(*this); + return (*this); + } //Enable if complex type - template < class S = Scalar_type > + template < typename S = Scalar_type > Grid_simd(const typename std::enable_if< is_complex < S >::value, S>::type a){ vsplat(*this,a); }; - Grid_simd(const Real a){ vsplat(*this,Scalar_type(a)); @@ -107,86 +117,16 @@ namespace Grid { friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } - friend inline void mac (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); }; friend inline void mult(Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } friend inline void sub (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } friend inline void add (Grid_simd *__restrict__ y,const Scalar_type *__restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } + friend inline void mac (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ a,const Scalar_type *__restrict__ x){ *y = (*a)*(*x)+(*y); }; friend inline void mult(Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) * (*r); } friend inline void sub (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) - (*r); } friend inline void add (Grid_simd *__restrict__ y,const Grid_simd *__restrict__ l,const Scalar_type *__restrict__ r){ *y = (*l) + (*r); } - - - //not for integer types... - template < class S = Scalar_type, NotEnableIf, int> = 0 > - friend inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); } - - /////////////////////////////////////////////// - // Initialise to 1,0,i for the correct types - /////////////////////////////////////////////// - // For complex types - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); } - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor? - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);} - - // if not complex overload here - template < class S = Scalar_type, EnableIf,int> = 0 > - friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); } - template < class S = Scalar_type, EnableIf,int> = 0 > - friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } - - - - // For integral types - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vone(Grid_simd &ret) { vsplat(ret,1); } - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);} - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vfalse(Grid_simd &ret){vsplat(ret,0);} - - //////////////////////////////////// - // Arithmetic operator overloads +,-,* - //////////////////////////////////// - friend inline Grid_simd operator + (Grid_simd a, Grid_simd b) - { - Grid_simd ret; - ret.v = binary(a.v, b.v, SumSIMD()); - return ret; - }; - - friend inline Grid_simd operator - (Grid_simd a, Grid_simd b) - { - Grid_simd ret; - ret.v = binary(a.v, b.v, SubSIMD()); - return ret; - }; - - // Distinguish between complex types and others - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) - { - Grid_simd ret; - ret.v = binary(a.v,b.v, MultComplexSIMD()); - return ret; - }; - - // Real/Integer types - template < class S = Scalar_type, NotEnableIf, int> = 0 > - friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) - { - Grid_simd ret; - ret.v = binary(a.v,b.v, MultSIMD()); - return ret; - }; - //////////////////////////////////////////////////////////////////////// // FIXME: gonna remove these load/store, get, set, prefetch //////////////////////////////////////////////////////////////////////// @@ -194,28 +134,6 @@ namespace Grid { ret.v = unary(a, VsetSIMD()); } - /////////////////////// - // Splat - /////////////////////// - // overload if complex - template < class S = Scalar_type > - friend inline void vsplat(Grid_simd &ret, EnableIf, S> c){ - Real a = real(c); - Real b = imag(c); - vsplat(ret,a,b); - } - - // this is only for the complex version - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vsplat(Grid_simd &ret,Real a, Real b){ - ret.v = binary(a, b, VsplatSIMD()); - } - - //if real fill with a, if complex fill with a in the real part (first function above) - friend inline void vsplat(Grid_simd &ret,Real a){ - ret.v = unary(a, VsplatSIMD()); - } - /////////////////////// // Vstore /////////////////////// @@ -223,19 +141,6 @@ namespace Grid { binary(ret.v, (Real*)a, VstoreSIMD()); } - /////////////////////// - // Vstream - /////////////////////// - template < class S = Scalar_type, NotEnableIf, int> = 0 > - friend inline void vstream(Grid_simd &out,const Grid_simd &in){ - binary((Real*)&out.v, in.v, VstreamSIMD()); - } - - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void vstream(Grid_simd &out,const Grid_simd &in){ - out=in; - } - /////////////////////// // Vprefetch /////////////////////// @@ -244,7 +149,6 @@ namespace Grid { _mm_prefetch((const char*)&v.v,_MM_HINT_T0); } - /////////////////////// // Reduce /////////////////////// @@ -265,63 +169,6 @@ namespace Grid { return a*b; } - /////////////////////// - // Conjugate - /////////////////////// - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline Grid_simd conjugate(const Grid_simd &in){ - Grid_simd ret ; - ret.v = unary(in.v, ConjSIMD()); - return ret; - } - template < class S = Scalar_type, NotEnableIf, int> = 0 > - friend inline Grid_simd conjugate(const Grid_simd &in){ - return in; // for real objects - } - - - /////////////////////// - // timesMinusI - /////////////////////// - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){ - ret.v = binary(in.v, ret.v, TimesMinusISIMD()); - } - - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline Grid_simd timesMinusI(const Grid_simd &in){ - Grid_simd ret; - timesMinusI(ret,in); - return ret; - } - - template < class S = Scalar_type, NotEnableIf, int> = 0 > - friend inline Grid_simd timesMinusI(const Grid_simd &in){ - return in; - } - - - /////////////////////// - // timesI - /////////////////////// - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline void timesI(Grid_simd &ret,const Grid_simd &in){ - ret.v = binary(in.v, ret.v, TimesISIMD()); - } - - template < class S = Scalar_type, EnableIf, int> = 0 > - friend inline Grid_simd timesI(const Grid_simd &in){ - Grid_simd ret; - timesI(ret,in); - return ret; - } - - template < class S = Scalar_type, NotEnableIf, int> = 0 > - friend inline Grid_simd timesI(const Grid_simd &in){ - return in; - } - - /////////////////////// // Unary negation /////////////////////// @@ -346,9 +193,6 @@ namespace Grid { return *this; } - - - //////////////////////////////////////////////////////////////////// // General permute; assumes vector length is same across // all subtypes; may not be a good assumption, but could @@ -359,48 +203,183 @@ namespace Grid { Gpermute(y,b,perm); } - };// end of Grid_simd class definition + /////////////////////// + // Splat + /////////////////////// + + // this is only for the complex version + template =0, class ABtype> + inline void vsplat(Grid_simd &ret,ABtype a, ABtype b){ + ret.v = binary(a, b, VsplatSIMD()); + } - template - inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r) + // overload if complex + template inline void vsplat(Grid_simd &ret, EnableIf, S> c) { + Real a = real(c); + Real b = imag(c); + vsplat(ret,a,b); + } + + //if real fill with a, if complex fill with a in the real part (first function above) + template + inline void vsplat(Grid_simd &ret,NotEnableIf,S> a){ + ret.v = unary(a, VsplatSIMD()); + } + ////////////////////////// + + /////////////////////////////////////////////// + // Initialise to 1,0,i for the correct types + /////////////////////////////////////////////// + // For complex types + template = 0 > inline void vone(Grid_simd &ret) { vsplat(ret,S(1.0,0.0)); } + template = 0 > inline void vzero(Grid_simd &ret) { vsplat(ret,S(0.0,0.0)); }// use xor? + template = 0 > inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,S(0.0,1.0));} + + // if not complex overload here + template = 0 > inline void vone (Grid_simd &ret){ vsplat(ret,1.0); } + template = 0 > inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } + + // For integral types + template = 0 > inline void vone(Grid_simd &ret) {vsplat(ret,1); } + template = 0 > inline void vzero(Grid_simd &ret) {vsplat(ret,0); } + template = 0 > inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);} + template = 0 > inline void vfalse(Grid_simd &ret){vsplat(ret,0);} + + template inline void zeroit(Grid_simd &z){ vzero(z);} + + /////////////////////// + // Vstream + /////////////////////// + template = 0 > + inline void vstream(Grid_simd &out,const Grid_simd &in){ + binary((Real*)&out.v, in.v, VstreamSIMD()); + } + + template = 0 > + inline void vstream(Grid_simd &out,const Grid_simd &in){ + out=in; + } + + //////////////////////////////////// + // Arithmetic operator overloads +,-,* + //////////////////////////////////// + template inline Grid_simd operator + (Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, SumSIMD()); + return ret; + }; + + template inline Grid_simd operator - (Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v, b.v, SubSIMD()); + return ret; + }; + + // Distinguish between complex types and others + template = 0 > inline Grid_simd operator * (Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v,b.v, MultComplexSIMD()); + return ret; + }; + + // Real/Integer types + template = 0 > inline Grid_simd operator * (Grid_simd a, Grid_simd b) { + Grid_simd ret; + ret.v = binary(a.v,b.v, MultSIMD()); + return ret; + }; + + + /////////////////////// + // Conjugate + /////////////////////// + template = 0 > + inline Grid_simd conjugate(const Grid_simd &in){ + Grid_simd ret ; + ret.v = unary(in.v, ConjSIMD()); + return ret; + } + template = 0 > inline Grid_simd conjugate(const Grid_simd &in){ + return in; // for real objects + } + + //Suppress adj for integer types... // odd; why conjugate above but not adj?? + template < class S, class V, IfNotInteger = 0 > + inline Grid_simd adj(const Grid_simd &in){ return conjugate(in); } + + /////////////////////// + // timesMinusI + /////////////////////// + template = 0 > + inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){ + ret.v = binary(in.v, ret.v, TimesMinusISIMD()); + } + + template = 0 > + inline Grid_simd timesMinusI(const Grid_simd &in){ + Grid_simd ret; + timesMinusI(ret,in); + return ret; + } + + template = 0 > + inline Grid_simd timesMinusI(const Grid_simd &in){ + return in; + } + + /////////////////////// + // timesI + /////////////////////// + template = 0 > + inline void timesI(Grid_simd &ret,const Grid_simd &in){ + ret.v = binary(in.v, ret.v, TimesISIMD()); + } + + template = 0 > + inline Grid_simd timesI(const Grid_simd &in){ + Grid_simd ret; + timesI(ret,in); + return ret; + } + + template = 0 > + inline Grid_simd timesI(const Grid_simd &in){ + return in; + } + + + ///////////////////// + // Inner, outer + ///////////////////// + + template + inline Grid_simd< S, V> innerProduct(const Grid_simd< S, V> & l, const Grid_simd< S, V> & r) { return conjugate(l)*r; } - template - inline void zeroit(Grid_simd< scalar_type, vector_type> &z){ vzero(z);} - - - template - inline Grid_simd< scalar_type, vector_type> outerProduct(const Grid_simd< scalar_type, vector_type> &l, const Grid_simd< scalar_type, vector_type>& r) + template + inline Grid_simd< S, V> outerProduct(const Grid_simd< S, V> &l, const Grid_simd< S, V> & r) { return l*r; } - - template - inline Grid_simd< scalar_type, vector_type> trace(const Grid_simd< scalar_type, vector_type> &arg){ + template + inline Grid_simd< S, V> trace(const Grid_simd< S, V> &arg){ return arg; } - + /////////////////////////////// // Define available types - + /////////////////////////////// typedef Grid_simd< float , SIMD_Ftype > vRealF; typedef Grid_simd< double , SIMD_Dtype > vRealD; typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF; typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD; typedef Grid_simd< Integer , SIMD_Itype > vInteger; - - - - - - - } #endif