From 733f8ff0b2de7210763a341ca454a870dc10ebeb Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 24 Jan 2018 13:38:13 +0000 Subject: [PATCH] Still using parallel_for -- don't know how to implement reduction on GPU yet. Look at some sample code is best. --- lib/lattice/Lattice_reduction.h | 205 +------------------------------- 1 file changed, 4 insertions(+), 201 deletions(-) diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 3e0a36fd..c6c44706 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -22,8 +22,6 @@ Author: paboyle #ifndef GRID_LATTICE_REDUCTION_H #define GRID_LATTICE_REDUCTION_H -#include - #ifdef GRID_WARN_SUBOPTIMAL #warning "Optimisation alert all these reduction loops are NOT threaded " #endif @@ -35,7 +33,7 @@ NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////////////////////////////////// template inline RealD norm2(const Lattice &arg){ ComplexD nrm = innerProduct(arg,arg); - return std::real(nrm); + return real(nrm); } // Double inner product @@ -51,7 +49,7 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ std::vector > sumarray(grid->SumArraySize()); parallel_for(int thr=0;thrSumArraySize();thr++){ - int nwork, mywork, myoff; + int mywork, myoff; GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation @@ -107,7 +105,7 @@ inline typename vobj::scalar_object sum(const Lattice &arg) } parallel_for(int thr=0;thrSumArraySize();thr++){ - int nwork, mywork, myoff; + int mywork, myoff; GridThread::GetWork(grid->oSites(),thr,mywork,myoff); vobj vvsum=zero; @@ -123,7 +121,7 @@ inline typename vobj::scalar_object sum(const Lattice &arg) } typedef typename vobj::scalar_object sobj; - sobj ssum=zero; + sobj ssum; zeroit(ssum); std::vector buf(Nsimd); extract(vsum,buf); @@ -370,201 +368,6 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice } }; -/* - inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) - { - int NN = BlockSolverGrid->_ndimension; - int nsimd = BlockSolverGrid->Nsimd(); - - std::vector latt_phys(0); - std::vector simd_phys(0); - std::vector mpi_phys(0); - - for(int d=0;d_fdimensions[d]); - simd_phys.push_back(BlockSolverGrid->_simd_layout[d]); - mpi_phys.push_back(BlockSolverGrid->_processors[d]); - } - } - return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys); - } -*/ - -template -static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) -{ - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nblock = X._grid->GlobalDimensions()[Orthog]; - - GridBase *FullGrid = X._grid; - // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - - // Lattice Xslice(SliceGrid); - // Lattice Rslice(SliceGrid); - - assert( FullGrid->_simd_layout[Orthog]==1); - int nh = FullGrid->_ndimension; - // int nl = SliceGrid->_ndimension; - int nl = nh-1; - - //FIXME package in a convenient iterator - //Should loop over a plane orthogonal to direction "Orthog" - int stride=FullGrid->_slice_stride[Orthog]; - int block =FullGrid->_slice_block [Orthog]; - int nblock=FullGrid->_slice_nblock[Orthog]; - int ostride=FullGrid->_ostride[Orthog]; -#pragma omp parallel - { - std::vector s_x(Nblock); - -#pragma omp for collapse(2) - for(int n=0;n -static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) -{ - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nblock = X._grid->GlobalDimensions()[Orthog]; - - GridBase *FullGrid = X._grid; - // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - // Lattice Xslice(SliceGrid); - // Lattice Rslice(SliceGrid); - - assert( FullGrid->_simd_layout[Orthog]==1); - int nh = FullGrid->_ndimension; - // int nl = SliceGrid->_ndimension; - int nl=1; - - //FIXME package in a convenient iterator - //Should loop over a plane orthogonal to direction "Orthog" - int stride=FullGrid->_slice_stride[Orthog]; - int block =FullGrid->_slice_block [Orthog]; - int nblock=FullGrid->_slice_nblock[Orthog]; - int ostride=FullGrid->_ostride[Orthog]; -#pragma omp parallel - { - std::vector s_x(Nblock); - -#pragma omp for collapse(2) - for(int n=0;n -static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) -{ - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - GridBase *FullGrid = lhs._grid; - // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - - int Nblock = FullGrid->GlobalDimensions()[Orthog]; - - // Lattice Lslice(SliceGrid); - // Lattice Rslice(SliceGrid); - - mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); - - assert( FullGrid->_simd_layout[Orthog]==1); - int nh = FullGrid->_ndimension; - // int nl = SliceGrid->_ndimension; - int nl = nh-1; - - //FIXME package in a convenient iterator - //Should loop over a plane orthogonal to direction "Orthog" - int stride=FullGrid->_slice_stride[Orthog]; - int block =FullGrid->_slice_block [Orthog]; - int nblock=FullGrid->_slice_nblock[Orthog]; - int ostride=FullGrid->_ostride[Orthog]; - - typedef typename vobj::vector_typeD vector_typeD; - -#pragma omp parallel - { - std::vector Left(Nblock); - std::vector Right(Nblock); - Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); - -#pragma omp for collapse(2) - for(int n=0;nGlobalSum(sum); - mat(i,j)=sum; - }} - - return; -} - NAMESPACE_END(Grid); #endif