mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Faster copy region
This commit is contained in:
parent
a65bc64f10
commit
6cdb09c884
@ -443,9 +443,16 @@ template<class vobj>
|
|||||||
void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate FromLowerLeft, Coordinate ToLowerLeft, Coordinate RegionSize)
|
void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate FromLowerLeft, Coordinate ToLowerLeft, Coordinate RegionSize)
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
|
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
GridBase *Fg = From.Grid();
|
GridBase *Fg = From.Grid();
|
||||||
GridBase *Tg = To.Grid();
|
GridBase *Tg = To.Grid();
|
||||||
|
assert(!Fg->_isCheckerBoarded);
|
||||||
|
assert(!Tg->_isCheckerBoarded);
|
||||||
|
int Nsimd = Fg->Nsimd();
|
||||||
int nF = Fg->_ndimension;
|
int nF = Fg->_ndimension;
|
||||||
int nT = Tg->_ndimension;
|
int nT = Tg->_ndimension;
|
||||||
int nd = nF;
|
int nd = nF;
|
||||||
@ -453,17 +460,24 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
|||||||
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
assert(Fg->_processors[d] == Tg->_processors[d]);
|
assert(Fg->_processors[d] == Tg->_processors[d]);
|
||||||
// assert(Fg->_ldimensions[d] == Tg->_ldimensions[d]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
thread_for(idx,Fg->lSites(),{
|
Coordinate ldf = Fg->_ldimensions;
|
||||||
|
Coordinate rdf = Fg->_rdimensions;
|
||||||
|
Coordinate isf = Fg->_istride;
|
||||||
|
Coordinate osf = Fg->_ostride;
|
||||||
|
Coordinate rdt = Tg->_rdimensions;
|
||||||
|
Coordinate ist = Tg->_istride;
|
||||||
|
Coordinate ost = Tg->_ostride;
|
||||||
|
auto t_v = To.View();
|
||||||
|
auto f_v = From.View();
|
||||||
|
accelerator_for(idx,Fg->lSites(),1,{
|
||||||
sobj s;
|
sobj s;
|
||||||
Coordinate Fcoor(nd);
|
Coordinate Fcoor(nd);
|
||||||
Coordinate Tcoor(nd);
|
Coordinate Tcoor(nd);
|
||||||
Fg->LocalIndexToLocalCoor(idx,Fcoor);
|
Lexicographic::CoorFromIndex(Fcoor,idx,ldf);
|
||||||
int in_region=1;
|
int in_region=1;
|
||||||
// can now repromote to threaded loop
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
if ( (Fcoor[d] < FromLowerLeft[d]) || (Fcoor[d]>=FromLowerLeft[d]+RegionSize[d]) ){
|
if ( (Fcoor[d] < FromLowerLeft[d]) || (Fcoor[d]>=FromLowerLeft[d]+RegionSize[d]) ){
|
||||||
in_region=0;
|
in_region=0;
|
||||||
@ -471,8 +485,17 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
|
|||||||
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
|
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
|
||||||
}
|
}
|
||||||
if (in_region) {
|
if (in_region) {
|
||||||
peekLocalSite(s,From,Fcoor);
|
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
|
||||||
pokeLocalSite(s,To ,Tcoor);
|
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
|
||||||
|
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
|
||||||
|
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
|
||||||
|
scalar_type * fp = (scalar_type *)&f_v[odx_f];
|
||||||
|
scalar_type * tp = (scalar_type *)&t_v[odx_t];
|
||||||
|
for(int w=0;w<words;w++){
|
||||||
|
tp[idx_t+w*Nsimd] = fp[idx_f+w*Nsimd]; // FIXME IF RRII layout, type pun no worke
|
||||||
|
}
|
||||||
|
// peekLocalSite(s,From,Fcoor);
|
||||||
|
// pokeLocalSite(s,To ,Tcoor);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user