From addc63885602c108cc7d9d767259241b8ac58be4 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 22 Jan 2024 17:40:38 -0500 Subject: [PATCH] Fast localCopyRegion, blockProjectFast --- Grid/lattice/Lattice_transfer.h | 142 +++++++++++++------------------- 1 file changed, 58 insertions(+), 84 deletions(-) diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 39fcb376..a817fa42 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -744,7 +744,11 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; - static const int words=sizeof(vobj)/sizeof(vector_type); + const int words=sizeof(vobj)/sizeof(vector_type); + + ////////////////////////////////////////////////////////////////////////////////////////// + // checks should guarantee that the operations are local + ////////////////////////////////////////////////////////////////////////////////////////// GridBase *Fg = From.Grid(); GridBase *Tg = To.Grid(); @@ -759,52 +763,39 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro for(int d=0;d_processors[d] == Tg->_processors[d]); } - // the above should guarantee that the operations are local - -#if 1 + + /////////////////////////////////////////////////////////// + // do the index calc on the GPU + /////////////////////////////////////////////////////////// + Coordinate f_ostride = Fg->_ostride; + Coordinate f_istride = Fg->_istride; + Coordinate f_rdimensions = Fg->_rdimensions; + Coordinate t_ostride = Tg->_ostride; + Coordinate t_istride = Tg->_istride; + Coordinate t_rdimensions = Tg->_rdimensions; size_t nsite = 1; for(int i=0;ioIndex(from_coor); - int fiidx = Fg->iIndex(from_coor); - int toidx = Tg->oIndex(to_coor); - int tiidx = Tg->iIndex(to_coor); - int* tt = table + 4*idx; - tt[0] = foidx; - tt[1] = fiidx; - tt[2] = toidx; - tt[3] = tiidx; - }); - - int* table_d = (int*)acceleratorAllocDevice(tbytes); - acceleratorCopyToDevice(table,table_d,tbytes); typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_type scalar_type; autoView(from_v,From,AcceleratorRead); autoView(to_v,To,AcceleratorWrite); - + + accelerator_for(idx,nsite,1,{ - static const int words=sizeof(vobj)/sizeof(vector_type); - int* tt = table_d + 4*idx; - int from_oidx = *tt++; - int from_lane = *tt++; - int to_oidx = *tt++; - int to_lane = *tt; + + Coordinate from_coor, to_coor, base; + Lexicographic::CoorFromIndex(base,idx,RegionSize); + for(int i=0;i &From,Lattice & To,Coordinate Fro stmp = getlane(from[w], from_lane); putlane(to[w], stmp, to_lane); } - }); - - acceleratorFreeDevice(table_d); - free(table); - - -#else - Coordinate ldf = Fg->_ldimensions; - Coordinate rdf = Fg->_rdimensions; - Coordinate isf = Fg->_istride; - Coordinate osf = Fg->_ostride; - Coordinate rdt = Tg->_rdimensions; - Coordinate ist = Tg->_istride; - Coordinate ost = Tg->_ostride; - - autoView( t_v , To, CpuWrite); - autoView( f_v , From, CpuRead); - thread_for(idx,Fg->lSites(),{ - sobj s; - Coordinate Fcoor(nd); - Coordinate Tcoor(nd); - Lexicographic::CoorFromIndex(Fcoor,idx,ldf); - int in_region=1; - for(int d=0;d=FromLowerLeft[d]+RegionSize[d]) ){ - in_region=0; - } - Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d]; - } - if (in_region) { -#if 0 - Integer idx_f = 0; for(int d=0;d > & full,Lattice & split) } } +////////////////////////////////////////////////////// +// Faster but less accurate blockProject +////////////////////////////////////////////////////// +template +inline void blockProjectFast(Lattice > &coarseData, + const Lattice &fineData, + const VLattice &Basis) +{ + GridBase * fine = fineData.Grid(); + GridBase * coarse= coarseData.Grid(); + + Lattice > ip(coarse); + + autoView( coarseData_ , coarseData, AcceleratorWrite); + autoView( ip_ , ip, AcceleratorWrite); + RealD t_IP=0; + RealD t_co=0; + for(int v=0;voSites(), vobj::Nsimd(), { + convertType(coarseData_[sc](v),ip_[sc]); + }); + t_co+=usecond(); + } +} + + NAMESPACE_END(Grid);