1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

Only need to bad non-local dimensions

This commit is contained in:
Peter Boyle 2023-10-05 16:55:48 -04:00
parent dd557af84b
commit 7b41b92d99

View File

@ -86,7 +86,9 @@ public:
// expand up one dim at a time // expand up one dim at a time
for(int d=0;d<dims;d++){ for(int d=0;d<dims;d++){
plocal[d] += 2*depth; if ( processors[d] > 1 ) {
plocal[d] += 2*depth;
}
for(int d=0;d<dims;d++){ for(int d=0;d<dims;d++){
global[d] = plocal[d]*processors[d]; global[d] = plocal[d]*processors[d];
@ -98,11 +100,17 @@ public:
template<class vobj> template<class vobj>
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
{ {
Coordinate processors=unpadded_grid->_processors;
Lattice<vobj> out(unpadded_grid); Lattice<vobj> out(unpadded_grid);
Coordinate local =unpadded_grid->LocalDimensions(); Coordinate local =unpadded_grid->LocalDimensions();
Coordinate fll(dims,depth); // depends on the MPI spread // depends on the MPI spread
Coordinate fll(dims,depth);
Coordinate tll(dims,0); // depends on the MPI spread Coordinate tll(dims,0); // depends on the MPI spread
for(int d=0;d<dims;d++){
if( processors[d]==1 ) fll[d]=0;
}
localCopyRegion(in,out,fll,tll,local); localCopyRegion(in,out,fll,tll,local);
return out; return out;
} }
@ -121,6 +129,7 @@ public:
template<class vobj> template<class vobj>
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{ {
Coordinate processors=unpadded_grid->_processors;
GridBase *old_grid = in.Grid(); GridBase *old_grid = in.Grid();
GridCartesian *new_grid = grids[dim];//These are new grids GridCartesian *new_grid = grids[dim];//These are new grids
Lattice<vobj> padded(new_grid); Lattice<vobj> padded(new_grid);
@ -134,35 +143,47 @@ public:
double tins=0, tshift=0; double tins=0, tshift=0;
// Middle bit int islocal = 0 ;
double t = usecond(); if ( processors[dim] == 1 ) islocal = 1;
for(int x=0;x<local[dim];x++){
InsertSliceLocal(in,padded,x,depth+x,dim); if ( islocal ) {
double t = usecond();
for(int x=0;x<local[dim];x++){
InsertSliceLocal(in,padded,x,x,dim);
}
tins += usecond() - t;
} else {
// Middle bit
double t = usecond();
for(int x=0;x<local[dim];x++){
InsertSliceLocal(in,padded,x,depth+x,dim);
}
tins += usecond() - t;
// High bit
t = usecond();
shifted = cshift.Cshift(in,dim,depth);
tshift += usecond() - t;
t=usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
}
tins += usecond() - t;
// Low bit
t = usecond();
shifted = cshift.Cshift(in,dim,-depth);
tshift += usecond() - t;
t = usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,x,x,dim);
}
tins += usecond() - t;
} }
tins += usecond() - t;
// High bit
t = usecond();
shifted = cshift.Cshift(in,dim,depth);
tshift += usecond() - t;
t=usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
}
tins += usecond() - t;
// Low bit
t = usecond();
shifted = cshift.Cshift(in,dim,-depth);
tshift += usecond() - t;
t = usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,x,x,dim);
}
tins += usecond() - t;
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl; std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
return padded; return padded;