diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index b39a475d..aebc093a 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -36,6 +36,7 @@ NAMESPACE_BEGIN(Grid); ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("mult"); ret.Checkerboard() = lhs.Checkerboard(); autoView( ret_v , ret, AcceleratorWrite); autoView( lhs_v , lhs, AcceleratorRead); @@ -53,6 +54,7 @@ void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ template inline void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("mac"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); @@ -70,6 +72,7 @@ void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ template inline void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("sub"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); @@ -86,6 +89,7 @@ void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ } template inline void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + GRID_TRACE("add"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); @@ -106,6 +110,7 @@ void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("mult"); ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); autoView( ret_v , ret, AcceleratorWrite); @@ -119,6 +124,7 @@ void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ template inline void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("mac"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); autoView( ret_v , ret, AcceleratorWrite); @@ -133,6 +139,7 @@ void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ template inline void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("sub"); ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); autoView( ret_v , ret, AcceleratorWrite); @@ -146,6 +153,7 @@ void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ } template inline void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + GRID_TRACE("add"); ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); autoView( ret_v , ret, AcceleratorWrite); @@ -163,6 +171,7 @@ void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("mult"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -177,6 +186,7 @@ void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ template inline void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("mac"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -191,6 +201,7 @@ void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ template inline void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("sub"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -204,6 +215,7 @@ void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ } template inline void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + GRID_TRACE("add"); ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); autoView( ret_v , ret, AcceleratorWrite); @@ -218,6 +230,7 @@ void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ template inline void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice &y){ + GRID_TRACE("axpy"); ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); @@ -231,6 +244,7 @@ void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice & } template inline void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y){ + GRID_TRACE("axpby"); ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); @@ -246,11 +260,13 @@ void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice template inline RealD axpy_norm(Lattice &ret,sobj a,const Lattice &x,const Lattice &y) { + GRID_TRACE("axpy_norm"); return axpy_norm_fast(ret,a,x,y); } template inline RealD axpby_norm(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y) { + GRID_TRACE("axpby_norm"); return axpby_norm_fast(ret,a,b,x,y); } diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 9c3d723f..49c0a100 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -117,6 +117,7 @@ public: //////////////////////////////////////////////////////////////////////////////// template inline Lattice & operator=(const LatticeUnaryExpression &expr) { + GRID_TRACE("ExpressionTemplateEval"); GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); @@ -140,6 +141,7 @@ public: } template inline Lattice & operator=(const LatticeBinaryExpression &expr) { + GRID_TRACE("ExpressionTemplateEval"); GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); @@ -163,6 +165,7 @@ public: } template inline Lattice & operator=(const LatticeTrinaryExpression &expr) { + GRID_TRACE("ExpressionTemplateEval"); GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 16feb856..bcd09c04 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -488,6 +488,14 @@ template inline void sliceSum(const Lattice &Data,std::vector< int words = fd*sizeof(sobj)/sizeof(scalar_type); grid->GlobalSumVector(ptr, words); } +template inline +std::vector +sliceSum(const Lattice &Data,int orthogdim) +{ + std::vector result; + sliceSum(Data,result,orthogdim); + return result; +} template static void sliceInnerProductVector( std::vector & result, const Lattice &lhs,const Lattice &rhs,int orthogdim)