1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-16 06:47:06 +01:00

Shaken out stencil to the point where I think wilson dslash is correct.

Need to audit code carefully, consolidate between stencil and cshift,
and then benchmark and optimise.
This commit is contained in:
Peter Boyle
2015-04-28 08:11:59 +01:00
parent f159495a9d
commit 25d523c0f4
24 changed files with 599 additions and 605 deletions

View File

@ -133,6 +133,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &r
GridBase *grid=rhs._grid;
const int Nsimd = grid->Nsimd();
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
int fd = grid->_fdimensions[dimension];
@ -155,12 +156,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &r
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
int words = sizeof(vobj)/sizeof(vector_type);
std::vector<std::vector<scalar_type> > send_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) );
std::vector<std::vector<scalar_type> > recv_buf_extract(Nsimd,std::vector<scalar_type>(buffer_size*words) );
int bytes = buffer_size*words*sizeof(scalar_type);
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
int bytes = buffer_size*sizeof(scalar_object);
std::vector<scalar_type *> pointers(Nsimd); //
std::vector<scalar_type *> rpointers(Nsimd); // received pointers
std::vector<scalar_object *> pointers(Nsimd); //
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
///////////////////////////////////////////
// Work out what to send where
@ -171,10 +172,9 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &r
// loop over outer coord planes orthog to dim
for(int x=0;x<rd;x++){
// FIXME call local permute copy if none are offnode.
// FIXME call local permute copy if none are offnode.
for(int i=0;i<Nsimd;i++){
pointers[i] = (scalar_type *)&send_buf_extract[i][0];
pointers[i] = &send_buf_extract[i][0];
}
int sx = (x+sshift)%rd;
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
@ -208,9 +208,9 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &r
recv_from_rank,
bytes);
rpointers[i] = (scalar_type *)&recv_buf_extract[i][0];
rpointers[i] = &recv_buf_extract[i][0];
} else {
rpointers[i] = (scalar_type *)&send_buf_extract[nbr_lane][0];
rpointers[i] = &send_buf_extract[nbr_lane][0];
}
}