diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 3a076038..3c5b03e5 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -317,116 +317,6 @@ template inline void sliceSum(const Lattice &Data,std::vector< } } -template -static void mySliceInnerProductVector( std::vector & result, const Lattice &lhs,const Lattice &rhs,int orthogdim) -{ - // std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl; - - typedef typename vobj::scalar_type scalar_type; - std::vector lsSum; - localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim); - globalSliceInnerProductVector(result, lhs, lsSum, orthogdim); - // std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl; -} - -template -static void localSliceInnerProductVector(std::vector &result, const Lattice &lhs, const Lattice &rhs, std::vector &lsSum, int orthogdim) -{ - // std::cout << GridLogMessage << "Start prep" << std::endl; - typedef typename vobj::vector_type vector_type; - typedef typename vobj::scalar_type scalar_type; - GridBase *grid = lhs.Grid(); - assert(grid!=NULL); - conformable(grid,rhs.Grid()); - - const int Nd = grid->_ndimension; - const int Nsimd = grid->Nsimd(); - - assert(orthogdim >= 0); - assert(orthogdim < Nd); - - int fd=grid->_fdimensions[orthogdim]; - int ld=grid->_ldimensions[orthogdim]; - int rd=grid->_rdimensions[orthogdim]; - // std::cout << GridLogMessage << "Start alloc" << std::endl; - - Vector lvSum(rd); // will locally sum vectors first - lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars - ExtractBuffer > extracted(Nsimd); // splitting the SIMD - // std::cout << GridLogMessage << "End alloc" << std::endl; - - result.resize(fd); // And then global sum to return the same vector to every node for IO to file - for(int r=0;r_slice_nblock[orthogdim]; - int e2= grid->_slice_block [orthogdim]; - int stride=grid->_slice_stride[orthogdim]; - // std::cout << GridLogMessage << "End prep" << std::endl; - // std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl; - vector_type vv; - auto l_v=lhs.View(); - auto r_v=rhs.View(); - thread_for( r,rd,{ - - int so=r*grid->_ostride[orthogdim]; // base offset for start of plane - - for(int n=0;n temp; - temp._internal = lvSum[rt]; - extract(temp,extracted); - - for(int idx=0;idxiCoorFromIindex(icoor,idx); - - int ldx =rt+icoor[orthogdim]*rd; - - lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal; - - } - } - // std::cout << GridLogMessage << "End sum over simd lanes" << std::endl; -} -template -static void globalSliceInnerProductVector(std::vector &result, const Lattice &lhs, std::vector &lsSum, int orthogdim) -{ - typedef typename vobj::scalar_type scalar_type; - GridBase *grid = lhs.Grid(); - int fd = result.size(); - int ld = lsSum.size(); - // sum over nodes. - std::vector gsum; - gsum.resize(fd, scalar_type(0.0)); - // std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl; - for(int t=0;t_processor_coor[orthogdim] ) { - gsum[t]=lsSum[lt]; - } - } - // std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl; - // std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl; - grid->GlobalSumVector(&gsum[0], fd); - // std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl; - - result = gsum; -} template static void sliceInnerProductVector( std::vector & result, const Lattice &lhs,const Lattice &rhs,int orthogdim) {