From 9f0e990b4054ee825b4eb4ab171748e5ad2f362e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 16 May 2015 04:36:22 +0100 Subject: [PATCH] Optimisation and syntax pretty --- lib/lattice/Grid_lattice_base.h | 20 ++++++++++---------- lib/lattice/Grid_lattice_trace.h | 2 +- lib/math/Grid_math_arith_mac.h | 5 +++-- lib/math/Grid_math_arith_mul.h | 17 ++++++++++++----- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/lib/lattice/Grid_lattice_base.h b/lib/lattice/Grid_lattice_base.h index 64103c5d..ae606ae7 100644 --- a/lib/lattice/Grid_lattice_base.h +++ b/lib/lattice/Grid_lattice_base.h @@ -62,7 +62,7 @@ public: //////////////////////////////////////////////////////////////////////////////// // Expression Template closure support //////////////////////////////////////////////////////////////////////////////// - template inline Lattice & operator=(const LatticeUnaryExpression &expr) + template strong_inline Lattice & operator=(const LatticeUnaryExpression &expr) { PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ @@ -71,7 +71,7 @@ PARALLEL_FOR_LOOP } return *this; } - template inline Lattice & operator=(const LatticeBinaryExpression &expr) + template strong_inline Lattice & operator=(const LatticeBinaryExpression &expr) { PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ @@ -80,7 +80,7 @@ PARALLEL_FOR_LOOP } return *this; } - template inline Lattice & operator=(const LatticeTrinaryExpression &expr) + template strong_inline Lattice & operator=(const LatticeTrinaryExpression &expr) { PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ @@ -132,14 +132,14 @@ PARALLEL_FOR_LOOP checkerboard=0; } - template inline Lattice & operator = (const sobj & r){ + template strong_inline Lattice & operator = (const sobj & r){ PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ this->_odata[ss]=r; } return *this; } - template inline Lattice & operator = (const Lattice & r){ + template strong_inline Lattice & operator = (const Lattice & r){ conformable(*this,r); std::cout<<"Lattice operator ="< inline Lattice &operator *=(const T &r) { + template strong_inline Lattice &operator *=(const T &r) { *this = (*this)*r; return *this; } - template inline Lattice &operator -=(const T &r) { + template strong_inline Lattice &operator -=(const T &r) { *this = (*this)-r; return *this; } - template inline Lattice &operator +=(const T &r) { + template strong_inline Lattice &operator +=(const T &r) { *this = (*this)+r; return *this; } - inline friend Lattice operator / (const Lattice &lhs,const Lattice &rhs){ + strong_inline friend Lattice operator / (const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); Lattice ret(lhs._grid); PARALLEL_FOR_LOOP @@ -176,7 +176,7 @@ PARALLEL_FOR_LOOP }; // class Lattice - template inline std::ostream& operator<< (std::ostream& stream, const Lattice &o){ + template std::ostream& operator<< (std::ostream& stream, const Lattice &o){ std::vector gcoor; typedef typename vobj::scalar_object sobj; sobj ss; diff --git a/lib/lattice/Grid_lattice_trace.h b/lib/lattice/Grid_lattice_trace.h index 75cc5b87..4ce26170 100644 --- a/lib/lattice/Grid_lattice_trace.h +++ b/lib/lattice/Grid_lattice_trace.h @@ -26,7 +26,7 @@ PARALLEL_FOR_LOOP // Trace Index level dependent operation //////////////////////////////////////////////////////////////////////////////////////////////////// template - inline auto traceIndex(const Lattice &lhs) -> Lattice(lhs._odata[0]))> + inline auto latTraceIndex(const Lattice &lhs) -> Lattice(lhs._odata[0]))> { Lattice(lhs._odata[0]))> ret(lhs._grid); PARALLEL_FOR_LOOP diff --git a/lib/math/Grid_math_arith_mac.h b/lib/math/Grid_math_arith_mac.h index 68b0acf1..06b1661d 100644 --- a/lib/math/Grid_math_arith_mac.h +++ b/lib/math/Grid_math_arith_mac.h @@ -27,13 +27,14 @@ strong_inline void mac(iScalar * __restrict__ ret,const iScalar * } template strong_inline void mac(iMatrix * __restrict__ ret,const iMatrix * __restrict__ lhs,const iMatrix * __restrict__ rhs){ - for(int c2=0;c2_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]); }}} return; } + template strong_inline void mac(iMatrix * __restrict__ ret,const iMatrix * __restrict__ lhs,const iScalar * __restrict__ rhs){ for(int c1=0;c1 * __restrict__ ret,const iScalar * template strong_inline void mult(iMatrix * __restrict__ ret,const iMatrix * __restrict__ lhs,const iMatrix * __restrict__ rhs){ + for(int c1=0;c1_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]); + } + } + for(int c3=1;c3_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]); - for(int c3=1;c3_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]); - } - }} + for(int c2=0;c2_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]); + } + } + } return; } + template strong_inline void mult(iMatrix * __restrict__ ret,const iMatrix * __restrict__ lhs,const iScalar * __restrict__ rhs){ for(int c2=0;c2