#ifndef GRID_LATTICE_REDUCTION_H #define GRID_LATTICE_REDUCTION_H namespace Grid { #ifdef GRID_WARN_SUBOPTIMAL #warning "Optimisation alert all these reduction loops are NOT threaded " #endif //////////////////////////////////////////////////////////////////////////////////////////////////// // Reduction operations //////////////////////////////////////////////////////////////////////////////////////////////////// template inline RealD norm2(const Lattice &arg){ typedef typename vobj::scalar_type scalar; typedef typename vobj::vector_type vector; decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm; scalar nrm; //FIXME make this loop parallelisable vnrm=zero; for(int ss=0;ssoSites(); ss++){ vnrm = vnrm + innerProduct(arg._odata[ss],arg._odata[ss]); } vector vvnrm =TensorRemove(vnrm) ; nrm = Reduce(vvnrm); arg._grid->GlobalSum(nrm); return real(nrm); } template inline ComplexD innerProduct(const Lattice &left,const Lattice &right) // inline auto innerProduct(const Lattice &left,const Lattice &right) //->decltype(innerProduct(left._odata[0],right._odata[0])) { typedef typename vobj::scalar_type scalar; decltype(innerProduct(left._odata[0],right._odata[0])) vnrm; scalar nrm; //FIXME make this loop parallelisable vnrm=zero; for(int ss=0;ssoSites(); ss++){ vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); } nrm = Reduce(vnrm); right._grid->GlobalSum(nrm); return nrm; } template inline typename vobj::scalar_object sum(const Lattice &arg){ GridBase *grid=arg._grid; int Nsimd = grid->Nsimd(); typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_type scalar_type; vobj vsum; sobj ssum; vsum=zero; ssum=zero; //FIXME make this loop parallelisable for(int ss=0;ssoSites(); ss++){ vsum = vsum + arg._odata[ss]; } std::vector buf(Nsimd); extract(vsum,buf); for(int i=0;iGlobalSum(ssum); return ssum; } template inline void sliceSum(const Lattice &Data,std::vector &result,int orthogdim) { typedef typename vobj::scalar_object sobj; GridBase *grid = Data._grid; const int Nd = grid->_ndimension; const int Nsimd = grid->Nsimd(); assert(orthogdim >= 0); assert(orthogdim < Nd); int fd=grid->_fdimensions[orthogdim]; int ld=grid->_ldimensions[orthogdim]; int rd=grid->_rdimensions[orthogdim]; sobj szero; szero=zero; std::vector > lvSum(rd); // will locally sum vectors first std::vector lsSum(ld,szero); // sum across these down to scalars std::vector extracted(Nsimd); // splitting the SIMD result.resize(fd); // And then global sum to return the same vector to every node for IO to file for(int r=0;r coor(Nd); // sum over reduced dimension planes, breaking out orthog dir for(int ss=0;ssoSites();ss++){ GridBase::CoorFromIndex(coor,ss,grid->_rdimensions); int r = coor[orthogdim]; lvSum[r]=lvSum[r]+Data._odata[ss]; } // Sum across simd lanes in the plane, breaking out orthog dir. std::vector icoor(Nd); for(int rt=0;rtiCoorFromIindex(icoor,idx); int ldx =rt+icoor[orthogdim]*rd; lsSum[ldx]=lsSum[ldx]+extracted[idx]; } } // sum over nodes. sobj gsum; for(int t=0;t_processor_coor[orthogdim] ) { gsum=lsSum[lt]; } else { gsum=zero; } grid->GlobalSum(gsum); result[t]=gsum; } } } #endif