diff --git a/TODO b/TODO index 9a428f98..5c7cf35d 100644 --- a/TODO +++ b/TODO @@ -1,11 +1,15 @@ ================================================================ *** Hacks and bug fixes to clean up and Audits ================================================================ -* Base class to share common code between vRealF, VComplexF etc... done - - Performance check on Guido's reimplementation strategy - (GUIDO) tested and no difference was found, merged + +* Extract/merge/set cleanup ; too many variants; rationalise and call simpler ones +* Used #define repetitive sequences to minimise code. +* Rewrite core tensor arithmetic support to be more systematic +* Ensure we ET as much as possible; move unop functions into ET framework. + - tests with expression args to all functions + * FIXME audit - * const audit Insert/Extract diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index 0e74e842..fd7bbb78 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -33,7 +33,7 @@ public: ~alignedAllocator() throw() { } pointer address(reference __x) const { return &__x; } - const_pointer address(const_reference __x) const { return &__x; } + // const_pointer address(const_reference __x) const { return &__x; } size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } diff --git a/lib/Tensors.h b/lib/Tensors.h index e585b116..a2246f4a 100644 --- a/lib/Tensors.h +++ b/lib/Tensors.h @@ -14,5 +14,6 @@ #include #include #include - +#include + #endif diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index 96a40fe7..fe801106 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -183,7 +183,7 @@ namespace Grid { int dir = geom.directions[p]; int disp = geom.displacements[p]; - int block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]); + Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]); LatticeCoordinate(coor,dir); @@ -204,8 +204,8 @@ namespace Grid { oblock = where(mod(coor,block)==(block-1),Mphi,zz); iblock = where(mod(coor,block)!=(block-1),Mphi,zz); } else if ( disp==-1 ) { - oblock = where(mod(coor,block)==0,Mphi,zz); - iblock = where(mod(coor,block)!=0,Mphi,zz); + oblock = where(mod(coor,block)==(Integer)0,Mphi,zz); + iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz); } else { assert(0); } diff --git a/lib/lattice/Lattice_ET.h b/lib/lattice/Lattice_ET.h index 2a7172ea..caa45e69 100644 --- a/lib/lattice/Lattice_ET.h +++ b/lib/lattice/Lattice_ET.h @@ -9,6 +9,37 @@ namespace Grid { + //////////////////////////////////////////////////// + // Predicated where support + //////////////////////////////////////////////////// + template + inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) { + + typename std::remove_const::type ret; + + typedef typename vobj::scalar_object scalar_object; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + const int Nsimd = vobj::vector_type::Nsimd(); + const int words = sizeof(vobj)/sizeof(vector_type); + + std::vector mask(Nsimd); + std::vector truevals (Nsimd); + std::vector falsevals(Nsimd); + + extract(iftrue ,truevals); + extract(iffalse ,falsevals); + extract(TensorRemove(predicate),mask); + + for(int s=0;s struct name\ }; GridUnopClass(UnarySub,-a); +GridUnopClass(UnaryNot,Not(a)); GridUnopClass(UnaryAdj,adj(a)); GridUnopClass(UnaryConj,conjugate(a)); GridUnopClass(UnaryTrace,trace(a)); GridUnopClass(UnaryTranspose,transpose(a)); +GridUnopClass(UnaryTa,Ta(a)); +GridUnopClass(UnaryReal,real(a)); +GridUnopClass(UnaryImag,imag(a)); +GridUnopClass(UnaryToReal,toReal(a)); +GridUnopClass(UnaryToComplex,toComplex(a)); +GridUnopClass(UnaryAbs,abs(a)); +GridUnopClass(UnarySqrt,sqrt(a)); +GridUnopClass(UnaryRsqrt,rsqrt(a)); +GridUnopClass(UnarySin,sin(a)); +GridUnopClass(UnaryCos,cos(a)); +GridUnopClass(UnaryLog,log(a)); +GridUnopClass(UnaryExp,exp(a)); //////////////////////////////////////////// // Binary operators @@ -163,6 +207,28 @@ GridBinOpClass(BinaryAdd,lhs+rhs); GridBinOpClass(BinarySub,lhs-rhs); GridBinOpClass(BinaryMul,lhs*rhs); +GridBinOpClass(BinaryAnd ,lhs&rhs); +GridBinOpClass(BinaryOr ,lhs|rhs); +GridBinOpClass(BinaryAndAnd,lhs&&rhs); +GridBinOpClass(BinaryOrOr ,lhs||rhs); + +//////////////////////////////////////////////////// +// Trinary conditional op +//////////////////////////////////////////////////// +#define GridTrinOpClass(name,combination)\ +template \ +struct name\ +{\ + static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \ + {\ + return combination;\ + }\ +} + +GridTrinOpClass(TrinaryWhere,(predicatedWhere::type, \ + typename std::remove_reference::type> (pred,lhs,rhs))); + //////////////////////////////////////////// // Operator syntactical glue //////////////////////////////////////////// @@ -218,15 +284,67 @@ template inline auto op(const T1 &pred,con //////////////////////// GRID_DEF_UNOP(operator -,UnarySub); +GRID_DEF_UNOP(Not,UnaryNot); +GRID_DEF_UNOP(operator !,UnaryNot); GRID_DEF_UNOP(adj,UnaryAdj); GRID_DEF_UNOP(conjugate,UnaryConj); GRID_DEF_UNOP(trace,UnaryTrace); GRID_DEF_UNOP(transpose,UnaryTranspose); +GRID_DEF_UNOP(Ta,UnaryTa); +GRID_DEF_UNOP(real,UnaryReal); +GRID_DEF_UNOP(imag,UnaryImag); +GRID_DEF_UNOP(toReal,UnaryToReal); +GRID_DEF_UNOP(toComplex,UnaryToComplex); +GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing +GRID_DEF_UNOP(sqrt ,UnarySqrt); +GRID_DEF_UNOP(rsqrt,UnarySqrt); +GRID_DEF_UNOP(sin ,UnarySin); +GRID_DEF_UNOP(cos ,UnaryCos); +GRID_DEF_UNOP(log ,UnaryLog); +GRID_DEF_UNOP(exp ,UnaryExp); GRID_DEF_BINOP(operator+,BinaryAdd); GRID_DEF_BINOP(operator-,BinarySub); GRID_DEF_BINOP(operator*,BinaryMul); +GRID_DEF_BINOP(operator&,BinaryAnd); +GRID_DEF_BINOP(operator|,BinaryOr); +GRID_DEF_BINOP(operator&&,BinaryAndAnd); +GRID_DEF_BINOP(operator||,BinaryOrOr); + +GRID_DEF_TRINOP(where,TrinaryWhere); + +///////////////////////////////////////////////////////////// +// Closure convenience to force expression to evaluate +///////////////////////////////////////////////////////////// +template + auto closure(const LatticeUnaryExpression & expr) + -> Lattice(expr.second))))> +{ + Lattice(expr.second))))> ret(expr); + return ret; +} +template + auto closure(const LatticeBinaryExpression & expr) + -> Lattice(expr.second)), + eval(0,std::get<1>(expr.second))))> +{ + Lattice(expr.second)), + eval(0,std::get<1>(expr.second))))> ret(expr); + return ret; +} +template + auto closure(const LatticeTrinaryExpression & expr) + -> Lattice(expr.second)), + eval(0,std::get<1>(expr.second)), + eval(0,std::get<2>(expr.second))))> +{ + Lattice(expr.second)), + eval(0,std::get<1>(expr.second)), + eval(0,std::get<2>(expr.second))))> ret(expr); + return ret; +} + #undef GRID_UNOP #undef GRID_BINOP #undef GRID_TRINOP diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index 5a17826b..cd2fa855 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -93,7 +93,7 @@ PARALLEL_FOR_LOOP } return *this; } - template strong_inline Lattice & operator=(const LatticeBinaryExpression &expr) + template strong_inline Lattice & operator=(const LatticeBinaryExpression &expr) { GridBase *egrid(nullptr); GridFromExpression(egrid,expr); @@ -131,8 +131,8 @@ PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ #ifdef STREAMING_STORES - vobj tmp = eval(ss,expr); - vstream(_odata[ss] ,tmp); + //vobj tmp = eval(ss,expr); + vstream(_odata[ss] ,eval(ss,expr)); #else _odata[ss] = eval(ss,expr); #endif @@ -196,12 +196,7 @@ PARALLEL_FOR_LOOP _odata.resize(_grid->oSites()); PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ -#ifdef STREAMING_STORES - vobj tmp = eval(ss,expr); - vstream(_odata[ss] ,tmp); -#else _odata[ss]=eval(ss,expr); -#endif } }; @@ -254,7 +249,7 @@ PARALLEL_FOR_LOOP Lattice ret(lhs._grid); PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = lhs._odata[ss]/rhs._odata[ss]; + ret._odata[ss] = lhs._odata[ss]*pow(rhs._odata[ss],-1.0); } return ret; }; diff --git a/lib/lattice/Lattice_comparison.h b/lib/lattice/Lattice_comparison.h index 431cf911..604ea4b9 100644 --- a/lib/lattice/Lattice_comparison.h +++ b/lib/lattice/Lattice_comparison.h @@ -11,7 +11,11 @@ namespace Grid { //Query supporting bitwise &, |, ^, ! //Query supporting logical &&, ||, ////////////////////////////////////////////////////////////////////////// - template + + ////////////////////////////////////////////////////////////////////////// + // compare lattice to lattice + ////////////////////////////////////////////////////////////////////////// + template inline Lattice LLComparison(vfunctor op,const Lattice &lhs,const Lattice &rhs) { Lattice ret(rhs._grid); @@ -21,6 +25,9 @@ PARALLEL_FOR_LOOP } return ret; } + ////////////////////////////////////////////////////////////////////////// + // compare lattice to scalar + ////////////////////////////////////////////////////////////////////////// template inline Lattice LSComparison(vfunctor op,const Lattice &lhs,const robj &rhs) { @@ -31,6 +38,9 @@ PARALLEL_FOR_LOOP } return ret; } + ////////////////////////////////////////////////////////////////////////// + // compare scalar to lattice + ////////////////////////////////////////////////////////////////////////// template inline Lattice SLComparison(vfunctor op,const lobj &lhs,const Lattice &rhs) { @@ -42,6 +52,9 @@ PARALLEL_FOR_LOOP return ret; } + ////////////////////////////////////////////////////////////////////////// + // Map to functors + ////////////////////////////////////////////////////////////////////////// // Less than template inline Lattice operator < (const Lattice & lhs, const Lattice & rhs) { @@ -99,7 +112,6 @@ PARALLEL_FOR_LOOP return SLComparison(vge(),lhs,rhs); } - // equal template inline Lattice operator == (const Lattice & lhs, const Lattice & rhs) { diff --git a/lib/lattice/Lattice_comparison_utils.h b/lib/lattice/Lattice_comparison_utils.h index d5e3b3e5..d25c49f2 100644 --- a/lib/lattice/Lattice_comparison_utils.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -5,140 +5,197 @@ namespace Grid { ///////////////////////////////////////// // This implementation is a bit poor. - // Only support logical operations (== etc) - // on scalar objects. Strip any tensor structures. + // + // Only support relational logical operations (<, > etc) + // on scalar objects. Therefore can strip any tensor structures. + // // Should guard this with isGridTensor<> enable if? ///////////////////////////////////////// - // Generic list of functors - template class veq { + // + // Generic list of functors + // + template class veq { + public: + vInteger operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) == (rhs); + } + }; + template class vne { + public: + vInteger operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) != (rhs); + } + }; + template class vlt { + public: + vInteger operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) < (rhs); + } + }; + template class vle { + public: + vInteger operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) <= (rhs); + } + }; + template class vgt { + public: + vInteger operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) > (rhs); + } + }; + template class vge { public: - vInteger operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) == TensorRemove(rhs); - } - }; - template class vne { - public: - vInteger operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) != TensorRemove(rhs); - } - }; - template class vlt { - public: - vInteger operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) < TensorRemove(rhs); - } - }; - template class vle { - public: - vInteger operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) <= TensorRemove(rhs); - } - }; - template class vgt { - public: - vInteger operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) > TensorRemove(rhs); - } - }; - template class vge { - public: - vInteger operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) >= TensorRemove(rhs); - } - }; + vInteger operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) >= (rhs); + } + }; + + // Generic list of functors + template class seq { + public: + Integer operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) == (rhs); + } + }; + template class sne { + public: + Integer operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) != (rhs); + } + }; + template class slt { + public: + Integer operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) < (rhs); + } + }; + template class sle { + public: + Integer operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) <= (rhs); + } + }; + template class sgt { + public: + Integer operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) > (rhs); + } + }; + template class sge { + public: + Integer operator()(const lobj &lhs, const robj &rhs) + { + return (lhs) >= (rhs); + } + }; - // Generic list of functors - template class seq { - public: - Integer operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) == TensorRemove(rhs); - } - }; - template class sne { - public: - Integer operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) != TensorRemove(rhs); - } - }; - template class slt { - public: - Integer operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) < TensorRemove(rhs); - } - }; - template class sle { - public: - Integer operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) <= TensorRemove(rhs); - } - }; - template class sgt { - public: - Integer operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) > TensorRemove(rhs); - } - }; - template class sge { - public: - Integer operator()(const lobj &lhs, const robj &rhs) - { - return TensorRemove(lhs) >= TensorRemove(rhs); - } - }; - - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // Integer gets extra relational functions. Could also implement these for RealF, RealD etc.. - ////////////////////////////////////////////////////////////////////////////////////////////////////// - template - inline vInteger Comparison(sfunctor sop,const vInteger & lhs, const vInteger & rhs) + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Integer and real get extra relational functions. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + template = 0> + inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs) { - std::vector vlhs(vInteger::Nsimd()); // Use functors to reduce this to single implementation - std::vector vrhs(vInteger::Nsimd()); + typedef typename vsimd::scalar_type scalar; + std::vector vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation + std::vector vrhs(vsimd::Nsimd()); + std::vector vpred(vsimd::Nsimd()); vInteger ret; - extract(lhs,vlhs); - extract(rhs,vrhs); - for(int s=0;s(lhs,vlhs); + extract(rhs,vrhs); + for(int s=0;s(ret,vlhs); + merge(ret,vpred); return ret; } - inline vInteger operator < (const vInteger & lhs, const vInteger & rhs) + + template = 0> + inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs) { - return Comparison(slt(),lhs,rhs); + typedef typename vsimd::scalar_type scalar; + std::vector vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation + std::vector vpred(vsimd::Nsimd()); + vInteger ret; + extract(lhs,vlhs); + for(int s=0;s(ret,vpred); + return ret; } - inline vInteger operator <= (const vInteger & lhs, const vInteger & rhs) + + template = 0> + inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs) { - return Comparison(sle(),lhs,rhs); - } - inline vInteger operator > (const vInteger & lhs, const vInteger & rhs) - { - return Comparison(sgt(),lhs,rhs); - } - inline vInteger operator >= (const vInteger & lhs, const vInteger & rhs) - { - return Comparison(sge(),lhs,rhs); - } - inline vInteger operator == (const vInteger & lhs, const vInteger & rhs) - { - return Comparison(seq(),lhs,rhs); - } - inline vInteger operator != (const vInteger & lhs, const vInteger & rhs) - { - return Comparison(sne(),lhs,rhs); + typedef typename vsimd::scalar_type scalar; + std::vector vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation + std::vector vpred(vsimd::Nsimd()); + vInteger ret; + extract(rhs,vrhs); + for(int s=0;s(ret,vpred); + return ret; } + +#define DECLARE_RELATIONAL(op,functor) \ + template = 0>\ + inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\ + {\ + typedef typename vsimd::scalar_type scalar;\ + return Comparison(functor(),lhs,rhs);\ + }\ + template = 0>\ + inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \ + {\ + typedef typename vsimd::scalar_type scalar;\ + return Comparison(functor(),lhs,rhs);\ + }\ + template = 0>\ + inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \ + {\ + typedef typename vsimd::scalar_type scalar;\ + return Comparison(functor(),lhs,rhs);\ + }\ + template\ + inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ + { \ + return lhs._internal op rhs._internal; \ + } \ + template\ + inline vInteger operator op(const iScalar &lhs,const typename vsimd::scalar_type &rhs) \ + { \ + return lhs._internal op rhs; \ + } \ + template\ + inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar &rhs) \ + { \ + return lhs op rhs._internal; \ + } + + +DECLARE_RELATIONAL(<,slt); +DECLARE_RELATIONAL(<=,sle); +DECLARE_RELATIONAL(>,sgt); +DECLARE_RELATIONAL(>=,sge); +DECLARE_RELATIONAL(==,seq); +DECLARE_RELATIONAL(!=,sne); + +#undef DECLARE_RELATIONAL + } diff --git a/lib/lattice/Lattice_peekpoke.h b/lib/lattice/Lattice_peekpoke.h index 9416226a..5455fdd6 100644 --- a/lib/lattice/Lattice_peekpoke.h +++ b/lib/lattice/Lattice_peekpoke.h @@ -117,7 +117,9 @@ PARALLEL_FOR_LOOP int Nsimd = grid->Nsimd(); assert( l.checkerboard == l._grid->CheckerBoard(site)); - assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + + // FIXME + // assert( sizeof(sobj)*Nsimd == sizeof(vobj)); int rank,odx,idx; grid->GlobalCoorToRankIndex(rank,odx,idx,site); @@ -132,6 +134,7 @@ PARALLEL_FOR_LOOP return; }; + ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// diff --git a/lib/lattice/Lattice_reality.h b/lib/lattice/Lattice_reality.h index 8c8bc17a..29a7976a 100644 --- a/lib/lattice/Lattice_reality.h +++ b/lib/lattice/Lattice_reality.h @@ -27,37 +27,6 @@ PARALLEL_FOR_LOOP return ret; }; - template inline auto real(const Lattice &z) -> Lattice - { - Lattice ret(z._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = real(z._odata[ss]); - } - return ret; - } - - template inline auto imag(const Lattice &z) -> Lattice - { - Lattice ret(z._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = imag(z._odata[ss]); - } - return ret; - } - - - template inline auto Ta(const Lattice &z) -> Lattice - { - Lattice ret(z._grid); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = Ta(z._odata[ss]); - } - return ret; - } - } #endif diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index a6d3e0dc..922821ed 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -51,6 +51,31 @@ PARALLEL_FOR_LOOP return nrm; } + template + inline auto sum(const LatticeUnaryExpression & expr) + ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object + { + return sum(closure(expr)); + } + + template + inline auto sum(const LatticeBinaryExpression & expr) + ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object + { + return sum(closure(expr)); + } + + + template + inline auto sum(const LatticeTrinaryExpression & expr) + ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), + eval(0,std::get<1>(expr.second)), + eval(0,std::get<2>(expr.second)) + ))::scalar_object + { + return sum(closure(expr)); + } + template inline typename vobj::scalar_object sum(const Lattice &arg){ diff --git a/lib/lattice/Lattice_transpose.h b/lib/lattice/Lattice_transpose.h index c8e7433f..73ef1747 100644 --- a/lib/lattice/Lattice_transpose.h +++ b/lib/lattice/Lattice_transpose.h @@ -24,8 +24,7 @@ PARALLEL_FOR_LOOP // Index level dependent transpose //////////////////////////////////////////////////////////////////////////////////////////////////// template - inline auto transposeIndex(const Lattice &lhs) - -> Lattice(lhs._odata[0]))> + inline auto transposeIndex(const Lattice &lhs) -> Lattice(lhs._odata[0]))> { Lattice(lhs._odata[0]))> ret(lhs._grid); PARALLEL_FOR_LOOP diff --git a/lib/lattice/Lattice_unary.h b/lib/lattice/Lattice_unary.h index fcdcc484..91b39007 100644 --- a/lib/lattice/Lattice_unary.h +++ b/lib/lattice/Lattice_unary.h @@ -3,51 +3,6 @@ namespace Grid { - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // avoid copy back routines for mult, mac, sub, add - ////////////////////////////////////////////////////////////////////////////////////////////////////// - template Lattice sqrt(const Lattice &rhs){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=sqrt(rhs._odata[ss]); - } - return ret; - } - template Lattice rsqrt(const Lattice &rhs){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=rsqrt(rhs._odata[ss]); - } - return ret; - } - template Lattice sin(const Lattice &rhs){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=sin(rhs._odata[ss]); - } - return ret; - } - template Lattice cos(const Lattice &rhs){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=cos(rhs._odata[ss]); - } - return ret; - } - - template Lattice pow(const Lattice &rhs,RealD y){ Lattice ret(rhs._grid); ret.checkerboard = rhs.checkerboard; diff --git a/lib/lattice/Lattice_where.h b/lib/lattice/Lattice_where.h index 84d7471c..c83d80c1 100644 --- a/lib/lattice/Lattice_where.h +++ b/lib/lattice/Lattice_where.h @@ -8,13 +8,14 @@ namespace Grid { // and blow away the tensor structures. // template -inline void where(Lattice &ret,const Lattice &predicate,Lattice &iftrue,Lattice &iffalse) +inline void whereWolf(Lattice &ret,const Lattice &predicate,Lattice &iftrue,Lattice &iffalse) { conformable(iftrue,iffalse); conformable(iftrue,predicate); conformable(iftrue,ret); GridBase *grid=iftrue._grid; + typedef typename vobj::scalar_object scalar_object; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; @@ -43,7 +44,7 @@ PARALLEL_FOR_LOOP } template -inline Lattice where(const Lattice &predicate,Lattice &iftrue,Lattice &iffalse) +inline Lattice whereWolf(const Lattice &predicate,Lattice &iftrue,Lattice &iffalse) { conformable(iftrue,iffalse); conformable(iftrue,predicate); diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index f17c557b..ab180ff6 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -250,6 +250,7 @@ namespace QCD { ////////////////////////////////////////////////////////////////////////////// // Peek and Poke named after physics attributes ////////////////////////////////////////////////////////////////////////////// + //spin template auto peekSpin(const vobj &rhs,int i) -> decltype(peekIndex(rhs,0)) { @@ -289,20 +290,117 @@ namespace QCD { { return peekIndex(rhs,i); } - template auto peekLorentz(const vobj &rhs,int i,int j) -> decltype(peekIndex(rhs,0,0)) - { - return peekIndex(rhs,i,j); - } template auto peekLorentz(const Lattice &rhs,int i) -> decltype(peekIndex(rhs,0)) { return peekIndex(rhs,i); } - template auto peekLorentz(const Lattice &rhs,int i,int j) -> decltype(peekIndex(rhs,0,0)) + + ////////////////////////////////////////////// + // Poke lattice + ////////////////////////////////////////////// + template + void pokeColour(Lattice &lhs, + const Lattice(lhs._odata[0],0))> & rhs, + int i) { - return peekIndex(rhs,i,j); + pokeIndex(lhs,rhs,i); + } + template + void pokeColour(Lattice &lhs, + const Lattice(lhs._odata[0],0,0))> & rhs, + int i,int j) + { + pokeIndex(lhs,rhs,i,j); + } + template + void pokeSpin(Lattice &lhs, + const Lattice(lhs._odata[0],0))> & rhs, + int i) + { + pokeIndex(lhs,rhs,i); + } + template + void pokeSpin(Lattice &lhs, + const Lattice(lhs._odata[0],0,0))> & rhs, + int i,int j) + { + pokeIndex(lhs,rhs,i,j); + } + template + void pokeLorentz(Lattice &lhs, + const Lattice(lhs._odata[0],0))> & rhs, + int i) + { + pokeIndex(lhs,rhs,i); } - // FIXME transpose Colour, transpose Spin, traceColour traceSpin + ////////////////////////////////////////////// + // Poke scalars + ////////////////////////////////////////////// + + template void pokeSpin(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) + { + pokeIndex(lhs,rhs,i); + } + template void pokeSpin(vobj &lhs,const decltype(peekIndex(lhs,0,0)) & rhs,int i,int j) + { + pokeIndex(lhs,rhs,i,j); + } + + template void pokeColour(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) + { + pokeIndex(lhs,rhs,i); + } + template void pokeColour(vobj &lhs,const decltype(peekIndex(lhs,0,0)) & rhs,int i,int j) + { + pokeIndex(lhs,rhs,i,j); + } + + template void pokeLorentz(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) + { + pokeIndex(lhs,rhs,i); + } + + + ////////////////////////////////////////////// + // transpose array and scalar + ////////////////////////////////////////////// + template inline Lattice transposeSpin(const Lattice &lhs){ + return transposeIndex(lhs); + } + template inline Lattice transposeColour(const Lattice &lhs){ + return transposeIndex(lhs); + } + template inline vobj transposeSpin(const vobj &lhs){ + return transposeIndex(lhs); + } + template inline vobj transposeColour(const vobj &lhs){ + return transposeIndex(lhs); + } + + ////////////////////////////////////////// + // Trace lattice and non-lattice + ////////////////////////////////////////// + template + inline auto traceSpin(const Lattice &lhs) -> Lattice(lhs._odata[0]))> + { + return traceIndex(lhs); + } + template + inline auto traceColour(const Lattice &lhs) -> Lattice(lhs._odata[0]))> + { + return traceIndex(lhs); + } + template + inline auto traceSpin(const vobj &lhs) -> Lattice(lhs))> + { + return traceIndex(lhs); + } + template + inline auto traceColour(const vobj &lhs) -> Lattice(lhs))> + { + return traceIndex(lhs); + } } //namespace QCD } // Grid diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 82230aa8..69b86859 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -70,7 +70,7 @@ public: ////////////////////////////////////////////////// // the sum over all staples on each site ////////////////////////////////////////////////// - static void Staple(GaugeMat &staple,GaugeLorentz &Umu,int mu){ + static void Staple(GaugeMat &staple,const GaugeLorentz &Umu,int mu){ std::vector U(4,Umu._grid); for(int d=0;d &U, const int mu, typedef WilsonLoops ColourWilsonLoops; + typedef WilsonLoops U1WilsonLoops; + typedef WilsonLoops SU2WilsonLoops; + typedef WilsonLoops SU3WilsonLoops; }} - - - - - #endif diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index c670f531..034d5314 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -28,13 +28,19 @@ namespace Grid { + ////////////////////////////////////// // To take the floating point type of real/complex type + ////////////////////////////////////// template struct RealPart { typedef T type; }; template struct RealPart< std::complex >{ typedef T type; }; + + ////////////////////////////////////// + // demote a vector to real type + ////////////////////////////////////// // type alias used to simplify the syntax of std::enable_if template using Invoke = typename T::type; @@ -90,7 +96,7 @@ namespace Grid { Vector_type v; Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)]; conv_t_union(){}; - } conv_t; + } conv_t; Vector_type v; @@ -205,7 +211,6 @@ namespace Grid { return *this; } - /////////////////////////////////////// // Not all functions are supported // through SIMD and must breakout to @@ -214,7 +219,6 @@ namespace Grid { /////////////////////////////////////// template friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) { - Grid_simd ret; Grid_simd::conv_t conv; @@ -225,6 +229,19 @@ namespace Grid { ret.v = conv.v; return ret; } + template friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) { + Grid_simd ret; + Grid_simd::conv_t cx; + Grid_simd::conv_t cy; + + cx.v = x.v; + cy.v = y.v; + for(int i=0;i(y,b,perm); } + };// end of Grid_simd class definition @@ -383,7 +401,6 @@ namespace Grid { return in; } - ///////////////////// // Inner, outer ///////////////////// @@ -405,6 +422,46 @@ namespace Grid { return arg; } + + //////////////////////////////////////////////////////////// + // copy/splat complex real parts into real; + // insert real into complex and zero imag; + //////////////////////////////////////////////////////////// + + //real = toReal( complex ) + template = 0> + inline Grid_simd toReal(const Grid_simd,V> &in) + { + typedef Grid_simd simd; + simd ret; + typename simd::conv_t conv; + conv.v = in.v; + for(int i=0;i = 0 > // must be a real arg + inline Grid_simd,V> toComplex (const Grid_simd &in) + { + typedef Grid_simd Rsimd; + typedef Grid_simd,V> Csimd; + typename Rsimd::conv_t conv;// address as real + + conv.v = in.v; + for(int i=0;i , SIMD_Ftype > vComplexF; typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD; typedef Grid_simd< Integer , SIMD_Itype > vInteger; + + ///////////////////////////////////////// + // Some traits to recognise the types + ///////////////////////////////////////// + template struct is_simd : public std::false_type{}; + template <> struct is_simd : public std::true_type {}; + template <> struct is_simd : public std::true_type {}; + template <> struct is_simd: public std::true_type {}; + template <> struct is_simd: public std::true_type {}; + template <> struct is_simd : public std::true_type {}; + + template using IfSimd = Invoke::value,int> > ; + template using IfNotSimd = Invoke::value,unsigned> > ; + } #endif diff --git a/lib/simd/Grid_vector_unops.h b/lib/simd/Grid_vector_unops.h index 8e2b6956..3ba77d82 100644 --- a/lib/simd/Grid_vector_unops.h +++ b/lib/simd/Grid_vector_unops.h @@ -1,6 +1,8 @@ #ifndef GRID_VECTOR_UNOPS #define GRID_VECTOR_UNOPS +#include + namespace Grid { template struct SqrtRealFunctor { @@ -27,6 +29,28 @@ namespace Grid { } }; + template struct LogRealFunctor { + scalar operator()(const scalar &a) const { + return log(real(a)); + } + }; + + template struct ExpRealFunctor { + scalar operator()(const scalar &a) const { + return exp(real(a)); + } + }; + template struct NotFunctor { + scalar operator()(const scalar &a) const { + return (!a); + } + }; + template struct AbsRealFunctor { + scalar operator()(const scalar &a) const { + return std::abs(real(a)); + } + }; + template struct PowRealFunctor { double y; PowRealFunctor(double _y) : y(_y) {}; @@ -43,6 +67,25 @@ namespace Grid { } }; + template struct RealFunctor { + scalar operator()(const scalar &a) const { + return real(a); + } + }; + template struct ImagFunctor { + scalar operator()(const scalar &a) const { + return imag(a); + } + }; + template < class S, class V > + inline Grid_simd real(const Grid_simd &r) { + return SimdApply(RealFunctor(),r); + } + template < class S, class V > + inline Grid_simd imag(const Grid_simd &r) { + return SimdApply(ImagFunctor(),r); + } + template < class S, class V > inline Grid_simd sqrt(const Grid_simd &r) { return SimdApply(SqrtRealFunctor(),r); @@ -60,6 +103,22 @@ namespace Grid { return SimdApply(CosRealFunctor(),r); } template < class S, class V > + inline Grid_simd log(const Grid_simd &r) { + return SimdApply(LogRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd abs(const Grid_simd &r) { + return SimdApply(AbsRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd exp(const Grid_simd &r) { + return SimdApply(ExpRealFunctor(),r); + } + template < class S, class V > + inline Grid_simd Not(const Grid_simd &r) { + return SimdApply(NotFunctor(),r); + } + template < class S, class V > inline Grid_simd pow(const Grid_simd &r,double y) { return SimdApply(PowRealFunctor(y),r); } @@ -67,6 +126,55 @@ namespace Grid { inline Grid_simd mod(const Grid_simd &r,Integer y) { return SimdApply(ModIntFunctor(y),r); } + //////////////////////////////////////////////////////////////////////////// + // Allows us to assign into **conformable** real vectors from complex + //////////////////////////////////////////////////////////////////////////// + // template < class S, class V > + // inline auto ComplexRemove(const Grid_simd &c) -> Grid_simd::Real,V> { + // Grid_simd::Real,V> ret; + // ret.v = c.v; + // return ret; + // } + template struct AndFunctor { + scalar operator()(const scalar &x, const scalar &y) const { + return x & y; + } + }; + template struct OrFunctor { + scalar operator()(const scalar &x, const scalar &y) const { + return x | y; + } + }; + template struct AndAndFunctor { + scalar operator()(const scalar &x, const scalar &y) const { + return x && y; + } + }; + template struct OrOrFunctor { + scalar operator()(const scalar &x, const scalar &y) const { + return x || y; + } + }; + + //////////////////////////////// + // Calls to simd binop functors + //////////////////////////////// + template < class S, class V > + inline Grid_simd operator &(const Grid_simd &x,const Grid_simd &y) { + return SimdApplyBinop(AndFunctor(),x,y); + } + template < class S, class V > + inline Grid_simd operator &&(const Grid_simd &x,const Grid_simd &y) { + return SimdApplyBinop(AndAndFunctor(),x,y); + } + template < class S, class V > + inline Grid_simd operator |(const Grid_simd &x,const Grid_simd &y) { + return SimdApplyBinop(OrFunctor(),x,y); + } + template < class S, class V > + inline Grid_simd operator ||(const Grid_simd &x,const Grid_simd &y) { + return SimdApplyBinop(OrOrFunctor(),x,y); + } } #endif diff --git a/lib/tensors/Tensor_arith_scalar.h b/lib/tensors/Tensor_arith_scalar.h index c6418638..5999f935 100644 --- a/lib/tensors/Tensor_arith_scalar.h +++ b/lib/tensors/Tensor_arith_scalar.h @@ -9,12 +9,12 @@ namespace Grid { ////////////////////////////////////////////////////////////////////////////////////////// // multiplication by fundamental scalar type -template strong_inline iScalar operator * (const iScalar& lhs,const typename iScalar::scalar_type rhs) +template strong_inline iScalar operator * (const iScalar& lhs,const typename iScalar::scalar_type rhs) { typename iScalar::tensor_reduced srhs; srhs=rhs; return lhs*srhs; } -template strong_inline iScalar operator * (const typename iScalar::scalar_type lhs,const iScalar& rhs) { return rhs*lhs; } +template strong_inline iScalar operator * (const typename iScalar::scalar_type lhs,const iScalar& rhs) { return rhs*lhs; } template strong_inline iVector operator * (const iVector& lhs,const typename iScalar::scalar_type rhs) { @@ -118,12 +118,12 @@ template strong_inline iMatrix operator * (Integer lhs,const /////////////////////////////////////////////////////////////////////////////////////////////// // addition by fundamental scalar type applies to matrix(down diag) and scalar /////////////////////////////////////////////////////////////////////////////////////////////// -template strong_inline iScalar operator + (const iScalar& lhs,const typename iScalar::scalar_type rhs) +template strong_inline iScalar operator + (const iScalar& lhs,const typename iScalar::scalar_type rhs) { typename iScalar::tensor_reduced srhs; srhs=rhs; return lhs+srhs; } -template strong_inline iScalar operator + (const typename iScalar::scalar_type lhs,const iScalar& rhs) { return rhs+lhs; } +template strong_inline iScalar operator + (const typename iScalar::scalar_type lhs,const iScalar& rhs) { return rhs+lhs; } template strong_inline iMatrix operator + (const iMatrix& lhs,const typename iScalar::scalar_type rhs) { @@ -176,12 +176,12 @@ template strong_inline iMatrix operator + (Integer lhs,const /////////////////////////////////////////////////////////////////////////////////////////////// // subtraction of fundamental scalar type applies to matrix(down diag) and scalar /////////////////////////////////////////////////////////////////////////////////////////////// -template strong_inline iScalar operator - (const iScalar& lhs,const typename iScalar::scalar_type rhs) +template strong_inline iScalar operator - (const iScalar& lhs,const typename iScalar::scalar_type rhs) { typename iScalar::tensor_reduced srhs; srhs=rhs; return lhs-srhs; } -template strong_inline iScalar operator - (const typename iScalar::scalar_type lhs,const iScalar& rhs) +template strong_inline iScalar operator - (const typename iScalar::scalar_type lhs,const iScalar& rhs) { typename iScalar::tensor_reduced slhs;slhs=lhs; return slhs-rhs; diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index 019e6fb1..7962517c 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -23,13 +23,17 @@ template class iScalar public: vtype _internal; - typedef typename GridTypeMapper::scalar_type scalar_type; + typedef typename GridTypeMapper::scalar_type scalar_type; typedef typename GridTypeMapper::vector_type vector_type; typedef typename GridTypeMapper::tensor_reduced tensor_reduced_v; typedef iScalar tensor_reduced; typedef typename GridTypeMapper::scalar_object recurse_scalar_object; typedef iScalar scalar_object; + // substitutes a real or complex version with same tensor structure + typedef iScalar::Complexified > Complexified; + typedef iScalar::Realified > Realified; + enum { TensorLevel = GridTypeMapper::TensorLevel + 1}; // Scalar no action @@ -86,9 +90,19 @@ public: strong_inline const vtype & operator ()(void) const { return _internal; } - - operator ComplexD () const { return(TensorRemove(_internal)); }; - operator RealD () const { return(real(TensorRemove(_internal))); } + + // Type casts meta programmed + template = 0,IfNotSimd = 0> + operator ComplexF () const { return(TensorRemove(_internal)); }; + template = 0,IfNotSimd = 0> + operator ComplexD () const { return(TensorRemove(_internal)); }; + template = 0,IfNotSimd = 0> + operator RealD () const { return(real(TensorRemove(_internal))); } + template = 0,IfNotSimd = 0> + operator RealD () const { return TensorRemove(_internal); } + template = 0,IfNotSimd = 0> + operator Integer () const { return Integer(TensorRemove(_internal)); } + // convert from a something to a scalar via constructor of something arg template::value, T>::type* = nullptr > strong_inline iScalar operator = (T arg) @@ -123,6 +137,10 @@ public: typedef iScalar tensor_reduced; typedef iVector scalar_object; + // substitutes a real or complex version with same tensor structure + typedef iVector::Complexified,N > Complexified; + typedef iVector::Realified,N > Realified; + template::value, T>::type* = nullptr > strong_inline auto operator = (T arg) -> iVector { zeroit(*this); @@ -211,6 +229,12 @@ public: typedef typename GridTypeMapper::vector_type vector_type; typedef typename GridTypeMapper::tensor_reduced tensor_reduced_v; typedef typename GridTypeMapper::scalar_object recurse_scalar_object; + + // substitutes a real or complex version with same tensor structure + typedef iMatrix::Complexified,N > Complexified; + typedef iMatrix::Realified,N > Realified; + + // Tensure removal typedef iScalar tensor_reduced; typedef iMatrix scalar_object; diff --git a/lib/tensors/Tensor_extract_merge.h b/lib/tensors/Tensor_extract_merge.h index 75868463..cea9bb64 100644 --- a/lib/tensors/Tensor_extract_merge.h +++ b/lib/tensors/Tensor_extract_merge.h @@ -31,18 +31,17 @@ inline void merge(typename std::enable_if::value, vsimd >:: std::vector &extracted,int offset){ int Nextr=extracted.size(); int Nsimd=vsimd::Nsimd(); - int s=Nsimd/Nextr; - + int s=Nsimd/Nextr; // can have sparse occupation of simd vector if simd_layout does not fill it + // replicate n-fold. Use to allow Integer masks to + // predicate floating point of various width assignments and maintain conformable. scalar *buf =(scalar *) y; for(int i=0;i::value, const v scalar *buf = (scalar *)&y; for(int i=0;i::value, vsimd >:: for(int i=0;i -inline void AmergeA(typename std::enable_if::value, vsimd >::type &y,std::vector &extracted){ - int Nextr=extracted.size(); - int Nsimd=vsimd::Nsimd(); - int s=Nsimd/Nextr; - - scalar *buf = (scalar *)&y; - for(int i=0;i inline void extract(const vobj &vec,std::vector pointers(Nsimd); - for(int i=0;i pointers(Nextr); + for(int i=0;i &extrac const int words=sizeof(vobj)/sizeof(vector_type); const int Nsimd=vobj::vector_type::Nsimd(); - - assert(extracted.size()==Nsimd); + int Nextr=extracted.size(); + int s = Nsimd/Nextr; std::vector pointers(Nsimd); - for(int i=0;i &extracted) const int Nsimd=vobj::vector_type::Nsimd(); const int words=sizeof(vobj)/sizeof(vector_type); - assert(extracted.size()==Nsimd); + int Nextr = extracted.size(); + int splat=Nsimd/Nextr; - std::vector pointers(Nsimd); - for(int i=0;i pointers(Nextr); + for(int i=0;i &extracted,int const int Nsimd=vobj::vector_type::Nsimd(); const int words=sizeof(vobj)/sizeof(vector_type); - assert(extracted.size()==Nsimd); + int Nextr=extracted.size(); - std::vector pointers(Nsimd); - for(int i=0;i pointers(Nextr); + for(int i=0;i(&vp[w],pointers,w); } diff --git a/lib/tensors/Tensor_inner.h b/lib/tensors/Tensor_inner.h index bbb35605..c77adff1 100644 --- a/lib/tensors/Tensor_inner.h +++ b/lib/tensors/Tensor_inner.h @@ -10,7 +10,8 @@ namespace Grid { typedef typename sobj::scalar_type scalar; decltype(innerProduct(arg,arg)) nrm; nrm = innerProduct(arg,arg); - return real(nrm); + RealD ret = real(nrm); + return ret; } template inline diff --git a/lib/tensors/Tensor_logical.h b/lib/tensors/Tensor_logical.h new file mode 100644 index 00000000..5ded9f9c --- /dev/null +++ b/lib/tensors/Tensor_logical.h @@ -0,0 +1,32 @@ +#ifndef GRID_TENSOR_LOGICAL_H +#define GRID_TENSOR_LOGICAL_H + +namespace Grid { + +#define LOGICAL_BINOP(Op)\ +template strong_inline iScalar operator Op (const iScalar& lhs,const iScalar& rhs) \ +{\ + iScalar ret;\ + ret._internal = lhs._internal Op rhs._internal ;\ + return ret;\ +}\ +template strong_inline iScalar operator Op (const iScalar& lhs,Integer rhs) \ +{\ + typename iScalar::scalar_type t; t=rhs;\ + typename iScalar::tensor_reduced srhs; srhs=t;\ + return lhs Op srhs;\ +}\ +template strong_inline iScalar operator Op (Integer lhs,const iScalar& rhs) \ +{\ + typename iScalar::scalar_type t;t=lhs;\ + typename iScalar::tensor_reduced slhs;slhs=t;\ + return slhs Op rhs;\ +} + +LOGICAL_BINOP(|); +LOGICAL_BINOP(&); +LOGICAL_BINOP(||); +LOGICAL_BINOP(&&); + +} +#endif diff --git a/lib/tensors/Tensor_traits.h b/lib/tensors/Tensor_traits.h index b5715c93..703a2ff5 100644 --- a/lib/tensors/Tensor_traits.h +++ b/lib/tensors/Tensor_traits.h @@ -26,6 +26,8 @@ namespace Grid { typedef typename T::vector_type vector_type; typedef typename T::tensor_reduced tensor_reduced; typedef typename T::scalar_object scalar_object; + typedef typename T::Complexified Complexified; + typedef typename T::Realified Realified; enum { TensorLevel = T::TensorLevel }; }; @@ -38,6 +40,8 @@ namespace Grid { typedef RealF vector_type; typedef RealF tensor_reduced ; typedef RealF scalar_object; + typedef ComplexF Complexified; + typedef RealF Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -46,6 +50,8 @@ namespace Grid { typedef RealD vector_type; typedef RealD tensor_reduced; typedef RealD scalar_object; + typedef ComplexD Complexified; + typedef RealD Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -54,6 +60,8 @@ namespace Grid { typedef ComplexF vector_type; typedef ComplexF tensor_reduced; typedef ComplexF scalar_object; + typedef ComplexF Complexified; + typedef RealF Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -62,6 +70,8 @@ namespace Grid { typedef ComplexD vector_type; typedef ComplexD tensor_reduced; typedef ComplexD scalar_object; + typedef ComplexD Complexified; + typedef RealD Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -70,6 +80,8 @@ namespace Grid { typedef Integer vector_type; typedef Integer tensor_reduced; typedef Integer scalar_object; + typedef void Complexified; + typedef void Realified; enum { TensorLevel = 0 }; }; @@ -79,6 +91,8 @@ namespace Grid { typedef vRealF vector_type; typedef vRealF tensor_reduced; typedef RealF scalar_object; + typedef vComplexF Complexified; + typedef vRealF Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -87,6 +101,8 @@ namespace Grid { typedef vRealD vector_type; typedef vRealD tensor_reduced; typedef RealD scalar_object; + typedef vComplexD Complexified; + typedef vRealD Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -95,6 +111,8 @@ namespace Grid { typedef vComplexF vector_type; typedef vComplexF tensor_reduced; typedef ComplexF scalar_object; + typedef vComplexF Complexified; + typedef vRealF Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -103,6 +121,8 @@ namespace Grid { typedef vComplexD vector_type; typedef vComplexD tensor_reduced; typedef ComplexD scalar_object; + typedef vComplexD Complexified; + typedef vRealD Realified; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -111,6 +131,8 @@ namespace Grid { typedef vInteger vector_type; typedef vInteger tensor_reduced; typedef Integer scalar_object; + typedef void Complexified; + typedef void Realified; enum { TensorLevel = 0 }; }; diff --git a/lib/tensors/Tensor_unary.h b/lib/tensors/Tensor_unary.h index 2a546701..045097a3 100644 --- a/lib/tensors/Tensor_unary.h +++ b/lib/tensors/Tensor_unary.h @@ -2,7 +2,7 @@ #define GRID_TENSOR_UNARY_H namespace Grid { -#define UNARY_REAL(func)\ +#define UNARY(func)\ template inline auto func(const iScalar &z) -> iScalar\ {\ iScalar ret;\ @@ -53,14 +53,71 @@ template inline iScalar func(const iScalar &z,scal y) \ return ret;\ } -UNARY_REAL(sqrt); -UNARY_REAL(rsqrt); -UNARY_REAL(sin); -UNARY_REAL(cos); +UNARY(sqrt); +UNARY(rsqrt); +UNARY(sin); +UNARY(cos); +UNARY(log); +UNARY(exp); +UNARY(abs); +UNARY(Not); + + +template inline auto toReal(const iScalar &z) -> typename iScalar::Realified +{ + typename iScalar::Realified ret; + ret._internal = toReal(z._internal); + return ret; +} + template inline auto toReal(const iVector &z) -> typename iVector::Realified +{ + typename iVector::Realified ret; + for(int c1=0;c1 inline auto toReal(const iMatrix &z) -> typename iMatrix::Realified +{ + typename iMatrix::Realified ret; + for(int c1=0;c1 inline auto toComplex(const iScalar &z) -> typename iScalar::Complexified +{ + typename iScalar::Complexified ret; + ret._internal = toComplex(z._internal); + return ret; +} + template inline auto toComplex(const iVector &z) -> typename iVector::Complexified +{ + typename iVector::Complexified ret; + for(int c1=0;c1 inline auto toComplex(const iMatrix &z) -> typename iMatrix::Complexified +{ + typename iMatrix::Complexified ret; + for(int c1=0;c1 peer(4); - int index=real(cm); + Complex tmp =cm; + Integer index=real(tmp); Fine.CoorFromIndex(peer,index,latt_size); if (nrm > 0){ diff --git a/tests/Test_cshift_red_black.cc b/tests/Test_cshift_red_black.cc index 9ffa66b1..5cafd04f 100644 --- a/tests/Test_cshift_red_black.cc +++ b/tests/Test_cshift_red_black.cc @@ -100,7 +100,8 @@ int main (int argc, char ** argv) double nrm = abs(scm-cm()()()); std::vector peer(4); - int index=real(cm); + Complex ctmp = cm; + Integer index=real(ctmp); Fine.CoorFromIndex(peer,index,latt_size); if (nrm > 0){ @@ -138,7 +139,8 @@ int main (int argc, char ** argv) nrm = abs(scm-cm()()()); std::vector peer(4); - int index=real(cm); + Complex ctmp=cm; + Integer index=real(ctmp); Fine.CoorFromIndex(peer,index,latt_size); if (nrm > 0){ diff --git a/tests/Test_gamma.cc b/tests/Test_gamma.cc index bae77ecc..118362e4 100644 --- a/tests/Test_gamma.cc +++ b/tests/Test_gamma.cc @@ -95,7 +95,7 @@ int main (int argc, char ** argv) for(int mu=0;mu<6;mu++){ result = Gamma(g[mu])* ident * Gamma(g[mu]); result = result - ident; - double mag = TensorRemove(norm2(result)); + RealD mag = norm2(result); std::cout << list[mu]<<" " << mag< #include +#include using namespace std; using namespace Grid; using namespace Grid::QCD; -class suN { -public: - - static int generators(int ncolour) { return ncolour*ncolour-1; } - static int su2subgroups(int ncolour) { return (ncolour*(ncolour-1))/2; } - - template using suNmatrix = iScalar > > ; - - //////////////////////////////////////////////////////////////////////// - // There are N^2-1 generators for SU(N). - // - // We take a traceless hermitian generator basis as follows - // - // * Normalisation: trace ta tb = 1/2 delta_ab - // - // * Off diagonal - // - pairs of rows i1,i2 behaving like pauli matrices signma_x, sigma_y - // - // - there are (Nc-1-i1) slots for i2 on each row [ x 0 x ] - // direct count off each row - // - // - Sum of all pairs is Nc(Nc-1)/2: proof arithmetic series - // - // (Nc-1) + (Nc-2)+... 1 ==> Nc*(Nc-1)/2 - // 1+ 2+ + + Nc-1 - // - // - There are 2 x Nc (Nc-1)/ 2 of these = Nc^2 - Nc - // - // - We enumerate the row-col pairs. - // - for each row col pair there is a (sigma_x) and a (sigma_y) like generator - // - // - // t^a_ij = { in 0.. Nc(Nc-1)/2 -1} => delta_{i,i1} delta_{j,i2} + delta_{i,i1} delta_{j,i2} - // t^a_ij = { in Nc(Nc-1)/2 ... Nc^(Nc-1) -1} => i delta_{i,i1} delta_{j,i2} - i delta_{i,i1} delta_{j,i2} - // - // * Diagonal; must be traceless and normalised - // - Sequence is - // N (1,-1,0,0...) - // N (1, 1,-2,0...) - // N (1, 1, 1,-3,0...) - // N (1, 1, 1, 1,-4,0...) - // - // where 1/2 = N^2 (1+.. m^2)etc.... for the m-th diagonal generator - // NB this gives the famous SU3 result for su2 index 8 - // - // N= sqrt(1/2 . 1/6 ) = 1/2 . 1/sqrt(3) - // - // ( 1 ) - // ( 1 ) / sqrt(3) /2 = 1/2 lambda_8 - // ( -2) - //////////////////////////////////////////////////////////////////////// - template - static void suNgenerator(int lieIndex,suNmatrix &ta){ - // map lie index to which type of generator - int diagIndex; - int su2Index; - int sigxy; - int NNm1 = Ncolour*(Ncolour-1); - if ( lieIndex>= NNm1 ) { - diagIndex = lieIndex -NNm1; - suNgeneratorDiagonal(diagIndex,ta); - return; - } - sigxy = lieIndex&0x1; - su2Index= lieIndex>>1; - if ( sigxy ) suNgeneratorSigmaY(su2Index,ta); - else suNgeneratorSigmaX(su2Index,ta); - } - template - static void suNgeneratorSigmaX(int su2Index,suNmatrix &ta){ - ta=zero; - int i1,i2; - su2SubGroupIndex(i1,i2,su2Index); - ta()()(i1,i2)=1.0; - ta()()(i2,i1)=1.0; - ta= ta *0.5; - } - template - static void suNgeneratorSigmaY(int su2Index,suNmatrix &ta){ - ta=zero; - Complex i(0.0,1.0); - int i1,i2; - su2SubGroupIndex(i1,i2,su2Index); - ta()()(i1,i2)=-i; - ta()()(i2,i1)= i; - ta= ta *0.5; - } - template - static void suNgeneratorDiagonal(int diagIndex,suNmatrix &ta){ - ta=zero; - int trsq=0; - int last=diagIndex+1; - for(int i=0;i<=diagIndex;i++){ - ta()()(i,i) = 1.0; - trsq++; - } - ta()()(last,last) = -last; - trsq+=last*last; - RealD nrm = 1.0/std::sqrt(2.0*trsq); - ta = ta *nrm; - } - //////////////////////////////////////////////////////////////////////// - // Map a - // - //////////////////////////////////////////////////////////////////////// - template - static void su2SubGroupIndex(int &i1,int &i2,int su2_index){ - - assert( (su2_index>=0) && (su2_index< (Ncolour*(Ncolour-1))/2) ); - - int spare=su2_index; - for(i1=0;spare >= (Ncolour-1-i1);i1++ ){ - spare = spare - (Ncolour-1-i1); // remove the Nc-1-i1 terms - } - i2=i1+1+spare; - } - template - static void su2Extract(std::vector &r,const Lattice > &source, int su2_index) - { - assert(r.size() == 4); // store in 4 real parts - - for(int i=0;i<4;i++){ - conformable(r[i],source); - } - - int i1,i2; - su2SubGroupIndex(i1,i2,su2_index); - - /* Compute the b(k) of A_SU(2) = b0 + i sum_k bk sigma_k */ - r[0] = real(source()()(i1,i1) + source()()(i2,i2)); - r[1] = imag(source()()(i1,i2) + source()()(i2,i1)); - r[2] = real(source()()(i1,i2) - source()()(i2,i1)); - r[3] = imag(source()()(i1,i1) - source()()(i2,i2)); - } - - - template static void printGenerators(void) - { - for(int gen=0;gen ta; - suN::suNgenerator(gen,ta); - std::cout<< "Nc = "< static void testGenerators(void){ - suNmatrix ta; - suNmatrix tb; - std::cout<<"Checking trace ta tb is 0.5 delta_ab"< simd_layout = GridDefaultSimd(4,vComplexF::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); - std::vector latt_size ({4,4,4,4}); - - GridCartesian Fine(latt_size,simd_layout,mpi_layout); - - LatticeGaugeField Umu(&Fine); + std::vector latt({4,4,4,8}); + GridCartesian * grid = SpaceTimeGrid::makeFourDimGrid(latt, + GridDefaultSimd(Nd,vComplexF::Nsimd()), + GridDefaultMpi()); + + GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); std::cout<<"*********************************************"<(); - suN::testGenerators<2>(); + SU2::printGenerators(); + SU2::testGenerators(); + std::cout<<"*********************************************"<(); - suN::testGenerators<3>(); - std::cout<<"*********************************************"<(); - suN::testGenerators<4>(); - std::cout<<"*********************************************"<(); - suN::testGenerators<5>(); + SU3::printGenerators(); + SU3::testGenerators(); + + // std::cout<<"*********************************************"< pseeds({1,2,3,4,5}); // once I caught a fish alive + std::vector sseeds({6,7,8,9,10});// then i let it go again + GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds); + GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds); + + // SU3 colour operatoions + LatticeColourMatrix link(grid); + LatticeColourMatrix staple(grid); + int mu=0; + + // Get Staple + ColourWilsonLoops::Staple(staple,Umu,mu); + // Get Link + link = peekIndex(Umu,mu); + + // Apply heatbath to the link + RealD beta=6.0; + int subgroup=0; + int nhb=1; + int trials=0; + int fails=0; + + LatticeInteger one(rbGrid); one = 1; // fill with ones + LatticeInteger mask(grid); mask= zero; + one.checkerboard=Even; + setCheckerboard(mask,one); + + // update Even checkerboard + + SU3::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup, + nhb,trials,fails,mask); + + Grid_finalize(); } diff --git a/tests/Test_main.cc b/tests/Test_main.cc index 88d69192..daa6c3aa 100644 --- a/tests/Test_main.cc +++ b/tests/Test_main.cc @@ -242,7 +242,7 @@ int main (int argc, char ** argv) { // Peek-ology and Poke-ology, with a little app-ology - TComplex c; + Complex c; ColourMatrix c_m; SpinMatrix s_m; SpinColourMatrix sc_m; @@ -299,7 +299,7 @@ int main (int argc, char ** argv) } Bar = zero; - Bar = where(lex<10,Foo,Bar); + Bar = where(lex coor(4); for(coor[3]=0;coor[3] 0 ) cout<<"Shift real trace fail "<