mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Faster grid/blas layout change.
Halo exchange is now the only slow part. Revisit
This commit is contained in:
parent
22c611bd1a
commit
66a1b63aa9
@ -175,25 +175,117 @@ public:
|
||||
assert(j==unpadded_sites);
|
||||
CopyMatrix();
|
||||
}
|
||||
template<class vobj> void GridtoBLAS(const Lattice<vobj> &grid,deviceVector<typename vobj::scalar_object> &out)
|
||||
template<class vobj> void GridtoBLAS(const Lattice<vobj> &from,deviceVector<typename vobj::scalar_object> &to)
|
||||
{
|
||||
#if 0
|
||||
std::vector<typename vobj::scalar_object> tmp;
|
||||
unvectorizeToLexOrdArray(tmp,grid);
|
||||
// std::cout << "GridtoBLAS volume " <<tmp.size()<<" " << grid.Grid()->lSites()<<" "<<out.size()<<std::endl;
|
||||
// std::cout << "GridtoBLAS site 0 " <<tmp[0]<<std::endl;
|
||||
assert(tmp.size()==grid.Grid()->lSites());
|
||||
assert(tmp.size()==out.size());
|
||||
out.resize(tmp.size());
|
||||
acceleratorCopyToDevice(&tmp[0],&out[0],sizeof(typename vobj::scalar_object)*tmp.size());
|
||||
unvectorizeToLexOrdArray(tmp,from);
|
||||
assert(tmp.size()==from.Grid()->lSites());
|
||||
assert(tmp.size()==to.size());
|
||||
to.resize(tmp.size());
|
||||
acceleratorCopyToDevice(&tmp[0],&to[0],sizeof(typename vobj::scalar_object)*tmp.size());
|
||||
#else
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *Fg = from.Grid();
|
||||
assert(!Fg->_isCheckerBoarded);
|
||||
int nd = Fg->_ndimension;
|
||||
|
||||
to.resize(Fg->lSites());
|
||||
|
||||
Coordinate LocalLatt = Fg->LocalDimensions();
|
||||
size_t nsite = 1;
|
||||
for(int i=0;i<nd;i++) nsite *= LocalLatt[i];
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// do the index calc on the GPU
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Coordinate f_ostride = Fg->_ostride;
|
||||
Coordinate f_istride = Fg->_istride;
|
||||
Coordinate f_rdimensions = Fg->_rdimensions;
|
||||
|
||||
autoView(from_v,from,AcceleratorRead);
|
||||
auto to_v = &to[0];
|
||||
|
||||
const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
accelerator_for(idx,nsite,1,{
|
||||
|
||||
Coordinate from_coor, base;
|
||||
Lexicographic::CoorFromIndex(base,idx,LocalLatt);
|
||||
for(int i=0;i<nd;i++){
|
||||
from_coor[i] = base[i];
|
||||
}
|
||||
int from_oidx = 0; for(int d=0;d<nd;d++) from_oidx+=f_ostride[d]*(from_coor[d]%f_rdimensions[d]);
|
||||
int from_lane = 0; for(int d=0;d<nd;d++) from_lane+=f_istride[d]*(from_coor[d]/f_rdimensions[d]);
|
||||
|
||||
const vector_type* from = (const vector_type *)&from_v[from_oidx];
|
||||
scalar_type* to = (scalar_type *)&to_v[idx];
|
||||
|
||||
scalar_type stmp;
|
||||
for(int w=0;w<words;w++){
|
||||
stmp = getlane(from[w], from_lane);
|
||||
to[w] = stmp;
|
||||
}
|
||||
});
|
||||
#endif
|
||||
}
|
||||
template<class vobj> void BLAStoGrid(Lattice<vobj> &grid,deviceVector<typename vobj::scalar_object> &in)
|
||||
{
|
||||
#if 0
|
||||
std::vector<typename vobj::scalar_object> tmp;
|
||||
tmp.resize(in.size());
|
||||
// std::cout << "BLAStoGrid volume " <<tmp.size()<<" "<< grid.Grid()->lSites()<<std::endl;
|
||||
assert(in.size()==grid.Grid()->lSites());
|
||||
acceleratorCopyFromDevice(&in[0],&tmp[0],sizeof(typename vobj::scalar_object)*in.size());
|
||||
vectorizeFromLexOrdArray(tmp,grid);
|
||||
#else
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *Tg = grid.Grid();
|
||||
assert(!Tg->_isCheckerBoarded);
|
||||
int nd = Tg->_ndimension;
|
||||
|
||||
assert(in.size()==Tg->lSites());
|
||||
|
||||
Coordinate LocalLatt = Tg->LocalDimensions();
|
||||
size_t nsite = 1;
|
||||
for(int i=0;i<nd;i++) nsite *= LocalLatt[i];
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// do the index calc on the GPU
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Coordinate t_ostride = Tg->_ostride;
|
||||
Coordinate t_istride = Tg->_istride;
|
||||
Coordinate t_rdimensions = Tg->_rdimensions;
|
||||
|
||||
autoView(to_v,grid,AcceleratorWrite);
|
||||
auto from_v = &in[0];
|
||||
|
||||
const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
accelerator_for(idx,nsite,1,{
|
||||
|
||||
Coordinate to_coor, base;
|
||||
Lexicographic::CoorFromIndex(base,idx,LocalLatt);
|
||||
for(int i=0;i<nd;i++){
|
||||
to_coor[i] = base[i];
|
||||
}
|
||||
int to_oidx = 0; for(int d=0;d<nd;d++) to_oidx+=t_ostride[d]*(to_coor[d]%t_rdimensions[d]);
|
||||
int to_lane = 0; for(int d=0;d<nd;d++) to_lane+=t_istride[d]*(to_coor[d]/t_rdimensions[d]);
|
||||
|
||||
vector_type* to = (vector_type *)&to_v[to_oidx];
|
||||
scalar_type* from = (scalar_type *)&from_v[idx];
|
||||
|
||||
scalar_type stmp;
|
||||
for(int w=0;w<words;w++){
|
||||
stmp=from[w];
|
||||
putlane(to[w], stmp, to_lane);
|
||||
}
|
||||
});
|
||||
#endif
|
||||
}
|
||||
void CopyMatrix (void)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user