From d66b2423cbbc0e9d9a55437f18293a7b17d97dbe Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 27 Aug 2024 11:28:47 -0400 Subject: [PATCH] Move slice operations to GPU for BlockCG --- Grid/lattice/Lattice_reduction.h | 242 ++++++++++--------------------- 1 file changed, 74 insertions(+), 168 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 6ce70232..e72f3b5c 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -536,7 +536,20 @@ sliceSum(const Lattice &Data,int orthogdim) return result; } +/* +Reimplement +1) +template +static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) + +2) +template +static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) + +3) +-- Make Slice Mul Matrix call sliceMaddMatrix + */ template static void sliceInnerProductVector( std::vector & result, const Lattice &lhs,const Lattice &rhs,int orthogdim) { @@ -687,203 +700,96 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice } }; -/* inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) { int NN = BlockSolverGrid->_ndimension; int nsimd = BlockSolverGrid->Nsimd(); - std::vector latt_phys(0); - std::vector simd_phys(0); - std::vector mpi_phys(0); - + std::vector latt_phys(NN-1); + Coordinate simd_phys; + std::vector mpi_phys(NN-1); + Coordinate checker_dim_mask(NN-1); + int checker_dim=-1; + + int dd; for(int d=0;d_fdimensions[d]); - simd_phys.push_back(BlockSolverGrid->_simd_layout[d]); - mpi_phys.push_back(BlockSolverGrid->_processors[d]); + latt_phys[dd]=BlockSolverGrid->_fdimensions[d]; + mpi_phys[dd] =BlockSolverGrid->_processors[d]; + checker_dim_mask[dd] = BlockSolverGrid->_checker_dim_mask[d]; + if ( d == BlockSolverGrid->_checker_dim ) checker_dim = dd; + dd++; } } - return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys); + simd_phys=GridDefaultSimd(latt_phys.size(),nsimd); + GridCartesian *tmp = new GridCartesian(latt_phys,simd_phys,mpi_phys); + if(BlockSolverGrid->_isCheckerBoarded) { + GridRedBlackCartesian *ret = new GridRedBlackCartesian(tmp,checker_dim_mask,checker_dim); + delete tmp; + return (GridBase *) ret; + } else { + return (GridBase *) tmp; + } } -*/ template static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) { + GridBase *FullGrid = X.Grid(); + GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + + Lattice Ys(SliceGrid); + Lattice Rs(SliceGrid); + Lattice Xs(SliceGrid); + Lattice RR(FullGrid); + + RR = R; // Copies checkerboard for insert + typedef typename vobj::scalar_object sobj; typedef typename vobj::vector_type vector_type; - - int Nblock = X.Grid()->GlobalDimensions()[Orthog]; - - GridBase *FullGrid = X.Grid(); - // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - - // Lattice Xslice(SliceGrid); - // Lattice Rslice(SliceGrid); - - assert( FullGrid->_simd_layout[Orthog]==1); - // int nh = FullGrid->_ndimension; - // int nl = SliceGrid->_ndimension; - // int nl = nh-1; - - //FIXME package in a convenient iterator - //Should loop over a plane orthogonal to direction "Orthog" - int stride=FullGrid->_slice_stride[Orthog]; - int block =FullGrid->_slice_block [Orthog]; - int nblock=FullGrid->_slice_nblock[Orthog]; - int ostride=FullGrid->_ostride[Orthog]; - - autoView( X_v, X, CpuRead); - autoView( Y_v, Y, CpuRead); - autoView( R_v, R, CpuWrite); - thread_region - { - Vector s_x(Nblock); - - thread_for_collapse_in_region(2, n,nblock, { - for(int b=0;bGlobalDimensions()[Orthog]; + for(int i=0;i -static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) -{ - typedef typename vobj::scalar_object sobj; - typedef typename vobj::vector_type vector_type; - - int Nblock = X.Grid()->GlobalDimensions()[Orthog]; - - GridBase *FullGrid = X.Grid(); - // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - // Lattice Xslice(SliceGrid); - // Lattice Rslice(SliceGrid); - - assert( FullGrid->_simd_layout[Orthog]==1); - // int nh = FullGrid->_ndimension; - // int nl = SliceGrid->_ndimension; - // int nl=1; - - //FIXME package in a convenient iterator - // thread_for2d_in_region - //Should loop over a plane orthogonal to direction "Orthog" - int stride=FullGrid->_slice_stride[Orthog]; - int block =FullGrid->_slice_block [Orthog]; - int nblock=FullGrid->_slice_nblock[Orthog]; - int ostride=FullGrid->_ostride[Orthog]; - autoView( R_v, R, CpuWrite); - autoView( X_v, X, CpuRead); - thread_region - { - std::vector s_x(Nblock); - - - thread_for_collapse_in_region( 2 ,n,nblock,{ - for(int b=0;b &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) +{ + R=Zero(); + sliceMaddMatrix(R,aa,X,R,Orthog,scale); }; template static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) { + GridBase *SliceGrid = makeSubSliceGrid(lhs.Grid(),Orthog); + + Lattice ls(SliceGrid); + Lattice rs(SliceGrid); + typedef typename vobj::scalar_object sobj; typedef typename vobj::vector_type vector_type; - - GridBase *FullGrid = lhs.Grid(); - // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - - int Nblock = FullGrid->GlobalDimensions()[Orthog]; - - // Lattice Lslice(SliceGrid); - // Lattice Rslice(SliceGrid); - - mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); - - assert( FullGrid->_simd_layout[Orthog]==1); - // int nh = FullGrid->_ndimension; - // int nl = SliceGrid->_ndimension; - // int nl = nh-1; - - //FIXME package in a convenient iterator - //Should loop over a plane orthogonal to direction "Orthog" - int stride=FullGrid->_slice_stride[Orthog]; - int block =FullGrid->_slice_block [Orthog]; - int nblock=FullGrid->_slice_nblock[Orthog]; - int ostride=FullGrid->_ostride[Orthog]; - - typedef typename vobj::vector_typeD vector_typeD; - - autoView( lhs_v, lhs, CpuRead); - autoView( rhs_v, rhs, CpuRead); - thread_region - { - std::vector Left(Nblock); - std::vector Right(Nblock); - Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); - - thread_for_collapse_in_region( 2, n,nblock,{ - for(int b=0;b(real(red),imag(red)); - }} - }}); - thread_critical - { - mat += mat_thread; - } + int Nslice = lhs.Grid()->GlobalDimensions()[Orthog]; + mat = Eigen::MatrixXcd::Zero(Nslice,Nslice); + for(int s=0;sGlobalSum(sum); - mat(i,j)=sum; - }} - - return; + delete SliceGrid; } NAMESPACE_END(Grid);