From 80471bf76224408de8cee544b272beb8b954500e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 18 Oct 2023 22:37:14 -0400 Subject: [PATCH] Alternate implementation involving face operations --- Grid/lattice/PaddedCell.h | 336 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 324 insertions(+), 12 deletions(-) diff --git a/Grid/lattice/PaddedCell.h b/Grid/lattice/PaddedCell.h index e8db707a..b994dd11 100644 --- a/Grid/lattice/PaddedCell.h +++ b/Grid/lattice/PaddedCell.h @@ -45,6 +45,139 @@ struct CshiftImplGauge: public CshiftImplBase inline void ScatterSlice(const cshiftVector &buf, + Lattice &lat, + int x, + int dim, + int offset=0) +{ + typedef typename vobj::scalar_object sobj; + + autoView(lat_v, lat, AcceleratorRead); + + GridBase *grid = lat.Grid(); + Coordinate simd = grid->_simd_layout; + int Nd = grid->Nd(); + int block = grid->_slice_block[dim]; + int stride = grid->_slice_stride[dim]; + int nblock = grid->_slice_nblock[dim]; + int rd = grid->_rdimensions[dim]; + + int ox = x%rd; + int ix = x/rd; + + int isites = 1; for(int d=0;d inline void GatherSlice(cshiftVector &buf, + const Lattice &lat, + int x, + int dim, + int offset=0) +{ + typedef typename vobj::scalar_object sobj; + + autoView(lat_v, lat, AcceleratorRead); + + GridBase *grid = lat.Grid(); + Coordinate simd = grid->_simd_layout; + int Nd = grid->Nd(); + int block = grid->_slice_block[dim]; + int stride = grid->_slice_stride[dim]; + int nblock = grid->_slice_nblock[dim]; + int rd = grid->_rdimensions[dim]; + + int ox = x%rd; + int ix = x/rd; + + int isites = 1; for(int d=0;d_processors; Coordinate plocal =unpadded_grid->LocalDimensions(); Coordinate global(dims); - + GridCartesian *old_grid = unpadded_grid; // expand up one dim at a time for(int d=0;d 1 ) { plocal[d] += 2*depth; - } - for(int d=0;d @@ -125,6 +259,17 @@ public: } return tmp; } + template + inline Lattice ExchangeTest(const Lattice &in, const CshiftImplBase &cshift = CshiftImplDefault()) const + { + GridBase *old_grid = in.Grid(); + int dims = old_grid->Nd(); + Lattice tmp = in; + for(int d=0;d inline Lattice Expand(int dim, const Lattice &in, const CshiftImplBase &cshift = CshiftImplDefault()) const @@ -147,14 +292,23 @@ public: if ( processors[dim] == 1 ) islocal = 1; if ( islocal ) { - + + // replace with a copy and maybe grid swizzle double t = usecond(); - for(int x=0;x + inline Lattice ExpandTest(int dim, const Lattice &in, const CshiftImplBase &cshift = CshiftImplDefault()) const + { + Coordinate processors=unpadded_grid->_processors; + GridBase *old_grid = in.Grid(); + GridCartesian *new_grid = grids[dim];//These are new grids + Lattice padded(new_grid); + Lattice shifted(old_grid); + Coordinate local =old_grid->LocalDimensions(); + Coordinate plocal =new_grid->LocalDimensions(); + if(dim==0) conformable(old_grid,unpadded_grid); + else conformable(old_grid,grids[dim-1]); + + // std::cout << " dim "< + void Face_exchange(const Lattice &from, + Lattice &to, + int dimension,int depth) const + { + typedef typename vobj::vector_type vector_type; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::scalar_object sobj; + + RealD t_gather=0.0; + RealD t_scatter=0.0; + RealD t_comms=0.0; + RealD t_copy=0.0; + + // std::cout << GridLogMessage << "dimension " <_ldimensions; + Coordinate nlds= to.Grid()->_ldimensions; + Coordinate simd= from.Grid()->_simd_layout; + int ld = lds[dimension]; + int nld = to.Grid()->_ldimensions[dimension]; + + + assert(depth<=lds[dimension]); // A must be on neighbouring node + assert(depth>0); // A caller bug if zero + assert(ld+2*depth==nld); + //////////////////////////////////////////////////////////////////////////// + // Face size and byte calculations + //////////////////////////////////////////////////////////////////////////// + int buffer_size = 1; + for(int d=0;d_slice_nblock[dimension]*from.Grid()->_slice_block[dimension] *rNsimd); + + static cshiftVector send_buf; + static cshiftVector recv_buf; + send_buf.resize(buffer_size*2*depth); + recv_buf.resize(buffer_size*2*depth); + + int words = buffer_size; + int bytes = words * sizeof(sobj); + //////////////////////////////////////////////////////////////////////////// + // Gather all surface terms up to depth "d" + //////////////////////////////////////////////////////////////////////////// + RealD t=usecond(); + int plane=0; + for ( int d=0;d < depth ; d ++ ) { + GatherSlice(send_buf,from,d,dimension,plane*buffer_size); plane++; + } + for ( int d=0;d < depth ; d ++ ) { + GatherSlice(send_buf,from,ld-depth+d,dimension,plane*buffer_size); plane++; + } + t_gather= usecond() - t; + + //////////////////////////////////////////////////////////////////////////// + // Communicate + //////////////////////////////////////////////////////////////////////////// + int comm_proc = 1; + int xmit_to_rank; + int recv_from_rank; + grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); + + t=usecond(); + for(int d = 0; dSendToRecvFrom((void *)&send_buf[d*buffer_size], xmit_to_rank, + (void *)&recv_buf[(d+depth)*buffer_size], recv_from_rank, bytes); + + grid->SendToRecvFrom((void *)&send_buf[(d+depth)*buffer_size], recv_from_rank, + (void *)&recv_buf[d*buffer_size], xmit_to_rank, bytes); + } + t_comms= usecond() - t; + + //////////////////////////////////////////////////////////////////////////// + // Copy interior -- overlap this with comms + //////////////////////////////////////////////////////////////////////////// + int Nd = new_grid->Nd(); + Coordinate LL(Nd,0); + Coordinate sz = grid->_ldimensions; + Coordinate toLL(Nd,0); + toLL[dimension]=depth; + t=usecond(); + localCopyRegion(from,to,LL,toLL,sz); + t_copy= usecond() - t; + + //////////////////////////////////////////////////////////////////////////// + // Scatter all faces + //////////////////////////////////////////////////////////////////////////// + // DumpSliceNorm(std::string("Face_exchange to before scatter"),to,dimension); + plane=0; + t=usecond(); + for ( int d=0;d < depth ; d ++ ) { + ScatterSlice(recv_buf,to,d,dimension,plane*buffer_size); plane++; + } + // DumpSliceNorm(std::string("Face_exchange to scatter 1st "),to,dimension); + for ( int d=0;d < depth ; d ++ ) { + ScatterSlice(recv_buf,to,nld-depth+d,dimension,plane*buffer_size); plane++; + } + t_scatter= usecond() - t; + std::cout << GridLogPerformance << "PaddedCell::Expand new timings: gather :" << t_gather/1000 << "ms"<