mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-12 16:55:37 +00:00
Faster grid/blas layout change.
Halo exchange is now the only slow part. Revisit
This commit is contained in:
parent
22c611bd1a
commit
66a1b63aa9
@ -175,25 +175,117 @@ public:
|
|||||||
assert(j==unpadded_sites);
|
assert(j==unpadded_sites);
|
||||||
CopyMatrix();
|
CopyMatrix();
|
||||||
}
|
}
|
||||||
template<class vobj> void GridtoBLAS(const Lattice<vobj> &grid,deviceVector<typename vobj::scalar_object> &out)
|
template<class vobj> void GridtoBLAS(const Lattice<vobj> &from,deviceVector<typename vobj::scalar_object> &to)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
std::vector<typename vobj::scalar_object> tmp;
|
std::vector<typename vobj::scalar_object> tmp;
|
||||||
unvectorizeToLexOrdArray(tmp,grid);
|
unvectorizeToLexOrdArray(tmp,from);
|
||||||
// std::cout << "GridtoBLAS volume " <<tmp.size()<<" " << grid.Grid()->lSites()<<" "<<out.size()<<std::endl;
|
assert(tmp.size()==from.Grid()->lSites());
|
||||||
// std::cout << "GridtoBLAS site 0 " <<tmp[0]<<std::endl;
|
assert(tmp.size()==to.size());
|
||||||
assert(tmp.size()==grid.Grid()->lSites());
|
to.resize(tmp.size());
|
||||||
assert(tmp.size()==out.size());
|
acceleratorCopyToDevice(&tmp[0],&to[0],sizeof(typename vobj::scalar_object)*tmp.size());
|
||||||
out.resize(tmp.size());
|
#else
|
||||||
acceleratorCopyToDevice(&tmp[0],&out[0],sizeof(typename vobj::scalar_object)*tmp.size());
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
|
GridBase *Fg = from.Grid();
|
||||||
|
assert(!Fg->_isCheckerBoarded);
|
||||||
|
int nd = Fg->_ndimension;
|
||||||
|
|
||||||
|
to.resize(Fg->lSites());
|
||||||
|
|
||||||
|
Coordinate LocalLatt = Fg->LocalDimensions();
|
||||||
|
size_t nsite = 1;
|
||||||
|
for(int i=0;i<nd;i++) nsite *= LocalLatt[i];
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// do the index calc on the GPU
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
Coordinate f_ostride = Fg->_ostride;
|
||||||
|
Coordinate f_istride = Fg->_istride;
|
||||||
|
Coordinate f_rdimensions = Fg->_rdimensions;
|
||||||
|
|
||||||
|
autoView(from_v,from,AcceleratorRead);
|
||||||
|
auto to_v = &to[0];
|
||||||
|
|
||||||
|
const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
|
accelerator_for(idx,nsite,1,{
|
||||||
|
|
||||||
|
Coordinate from_coor, base;
|
||||||
|
Lexicographic::CoorFromIndex(base,idx,LocalLatt);
|
||||||
|
for(int i=0;i<nd;i++){
|
||||||
|
from_coor[i] = base[i];
|
||||||
|
}
|
||||||
|
int from_oidx = 0; for(int d=0;d<nd;d++) from_oidx+=f_ostride[d]*(from_coor[d]%f_rdimensions[d]);
|
||||||
|
int from_lane = 0; for(int d=0;d<nd;d++) from_lane+=f_istride[d]*(from_coor[d]/f_rdimensions[d]);
|
||||||
|
|
||||||
|
const vector_type* from = (const vector_type *)&from_v[from_oidx];
|
||||||
|
scalar_type* to = (scalar_type *)&to_v[idx];
|
||||||
|
|
||||||
|
scalar_type stmp;
|
||||||
|
for(int w=0;w<words;w++){
|
||||||
|
stmp = getlane(from[w], from_lane);
|
||||||
|
to[w] = stmp;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
template<class vobj> void BLAStoGrid(Lattice<vobj> &grid,deviceVector<typename vobj::scalar_object> &in)
|
template<class vobj> void BLAStoGrid(Lattice<vobj> &grid,deviceVector<typename vobj::scalar_object> &in)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
std::vector<typename vobj::scalar_object> tmp;
|
std::vector<typename vobj::scalar_object> tmp;
|
||||||
tmp.resize(in.size());
|
tmp.resize(in.size());
|
||||||
// std::cout << "BLAStoGrid volume " <<tmp.size()<<" "<< grid.Grid()->lSites()<<std::endl;
|
// std::cout << "BLAStoGrid volume " <<tmp.size()<<" "<< grid.Grid()->lSites()<<std::endl;
|
||||||
assert(in.size()==grid.Grid()->lSites());
|
assert(in.size()==grid.Grid()->lSites());
|
||||||
acceleratorCopyFromDevice(&in[0],&tmp[0],sizeof(typename vobj::scalar_object)*in.size());
|
acceleratorCopyFromDevice(&in[0],&tmp[0],sizeof(typename vobj::scalar_object)*in.size());
|
||||||
vectorizeFromLexOrdArray(tmp,grid);
|
vectorizeFromLexOrdArray(tmp,grid);
|
||||||
|
#else
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
|
GridBase *Tg = grid.Grid();
|
||||||
|
assert(!Tg->_isCheckerBoarded);
|
||||||
|
int nd = Tg->_ndimension;
|
||||||
|
|
||||||
|
assert(in.size()==Tg->lSites());
|
||||||
|
|
||||||
|
Coordinate LocalLatt = Tg->LocalDimensions();
|
||||||
|
size_t nsite = 1;
|
||||||
|
for(int i=0;i<nd;i++) nsite *= LocalLatt[i];
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// do the index calc on the GPU
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
Coordinate t_ostride = Tg->_ostride;
|
||||||
|
Coordinate t_istride = Tg->_istride;
|
||||||
|
Coordinate t_rdimensions = Tg->_rdimensions;
|
||||||
|
|
||||||
|
autoView(to_v,grid,AcceleratorWrite);
|
||||||
|
auto from_v = &in[0];
|
||||||
|
|
||||||
|
const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
|
accelerator_for(idx,nsite,1,{
|
||||||
|
|
||||||
|
Coordinate to_coor, base;
|
||||||
|
Lexicographic::CoorFromIndex(base,idx,LocalLatt);
|
||||||
|
for(int i=0;i<nd;i++){
|
||||||
|
to_coor[i] = base[i];
|
||||||
|
}
|
||||||
|
int to_oidx = 0; for(int d=0;d<nd;d++) to_oidx+=t_ostride[d]*(to_coor[d]%t_rdimensions[d]);
|
||||||
|
int to_lane = 0; for(int d=0;d<nd;d++) to_lane+=t_istride[d]*(to_coor[d]/t_rdimensions[d]);
|
||||||
|
|
||||||
|
vector_type* to = (vector_type *)&to_v[to_oidx];
|
||||||
|
scalar_type* from = (scalar_type *)&from_v[idx];
|
||||||
|
|
||||||
|
scalar_type stmp;
|
||||||
|
for(int w=0;w<words;w++){
|
||||||
|
stmp=from[w];
|
||||||
|
putlane(to[w], stmp, to_lane);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
void CopyMatrix (void)
|
void CopyMatrix (void)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user