1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Bug fix for stencil with large shifts (3+), would be important to naik term for example but did not

impact Wilson based nearest neighbour stencils.
This commit is contained in:
paboyle 2015-12-30 19:29:48 +00:00
parent 841a37f941
commit 145a295231
4 changed files with 52 additions and 17 deletions

View File

@ -163,18 +163,19 @@ namespace Grid {
// So tables are the same whether comm_dim or splice_dim
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
if ( sshift[0] == sshift[1] ) {
Comms(point,dimension,shift,0x3);
// std::cout<<"Comms 0x3"<<std::endl;
Comms(point,dimension,shift,0x3);
} else {
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
}
}
// for(int ss=0;ss<osites;ss++){
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
// }
}
}
@ -210,7 +211,6 @@ namespace Grid {
if ( (shiftpm== 1) && (sx<x) ) {
wraparound = 1;
}
int permute_slice=0;
if(permute_dim){
@ -228,6 +228,7 @@ namespace Grid {
void Comms (int point,int dimension,int shiftpm,int cbmask)
{
GridBase *grid=_grid;
const int Nsimd = grid->Nsimd();
int fd = _grid->_fdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
@ -241,8 +242,9 @@ namespace Grid {
int shift = (shiftpm + fd) %fd;
assert(shift>=0);
assert(shift<fd);
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; // done in reduced dims, so SIMD factored
// std::cout << " dim " <<dimension<<" buffersize "<<buffer_size<<std::endl;
_comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
// send to one or more remote nodes.
@ -252,11 +254,40 @@ namespace Grid {
for(int x=0;x<rd;x++){
int sx = (x+sshift)%rd;
int comm_proc = ((x+sshift)/rd)%pd;
int offnode = (comm_proc!= 0);
// std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
int permute_type=grid->PermuteType(dimension);
int sx = (x+sshift)%rd;
int offnode = 0;
if ( simd_layout > 1 ) {
for(int i=0;i<Nsimd;i++){
int inner_bit = (Nsimd>>(permute_type+1));
int ic= (i&inner_bit)? 1:0;
int my_coor = rd*ic + x;
int nbr_coor = my_coor+sshift;
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
if ( nbr_proc ) {
offnode =1;
}
}
} else {
int comm_proc = ((x+sshift)/rd)%pd;
offnode = (comm_proc!= 0);
// std::cout << "Stencil x "<<x<<" shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<< " comm_proc "<<comm_proc<<" pd "<< pd <<std::endl;
}
// Stencil x 1 shift 3 sshift 3 fd 8 rd 2 offnode 0 sx 0 comm_proc 0 pd 2
// x+sshift = 4
// x+sshift/2 = 2
// 2%2 == 0
// Problem: sshift is wrong in "rd" for SIMD directions. The complex logic in Cshift_mpi is needed.
int wraparound=0;
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
wraparound = 1;
@ -282,6 +313,7 @@ namespace Grid {
int unified_buffer_offset = _unified_buffer_size;
_unified_buffer_size += words;
// std::cout<< "Comms dim "<<dimension<<" offset "<<unified_buffer_offset<<" size "<<" " << _unified_buffer_size<<std::endl;
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
}
@ -440,7 +472,7 @@ namespace Grid {
nosplicetime+=usecond();
}
} else {
std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
// std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
if(splice_dim){
splicetime-=usecond();
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
@ -594,7 +626,6 @@ namespace Grid {
gathermtime+=usecond();
for(int i=0;i<Nsimd;i++){
int inner_bit = (Nsimd>>(permute_type+1));
int ic= (i&inner_bit)? 1:0;
@ -633,10 +664,11 @@ namespace Grid {
}
}
// Here we don't want to scatter, just place into a buffer.
// std::cout << " CommsSimd ["<<dimension<<"] offset "<<u_comm_offset<<" buffsize "<<buffer_size <<" unified buffer size "<<_unified_buffer_size<<std::endl;
mergetime-=usecond();
PARALLEL_FOR_LOOP
for(int i=0;i<buffer_size;i++){
// std::cout<<"buffer loop " << i<<" "<<u_comm_offset+i<<" / "<<_unified_buffer_size<<std::endl;
// assert(u_comm_offset+i<_unified_buffer_size);
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
}

View File

@ -167,7 +167,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
int bytes = buffer_size*sizeof(scalar_object);
std::vector<scalar_object *> pointers(Nsimd); //
std::vector<scalar_object *> pointers(Nsimd); //
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
///////////////////////////////////////////

View File

@ -176,6 +176,8 @@ void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
scalar_type *pointer;
scalar_type *vp = (scalar_type *)&vec;
// assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
for(int w=0;w<words;w++){
for(int i=0;i<Nextr;i++){
for(int ii=0;ii<s;ii++){

View File

@ -169,12 +169,13 @@ int main (int argc, char ** argv)
ECheck.checkerboard = Odd;
OCheck.checkerboard = Even;
}
// Implement a stencil code that should agree with that darn cshift!
for(int i=0;i<OCheck._grid->oSites();i++){
int permute_type;
StencilEntry *SE;
SE = EStencil.GetEntry(permute_type,0,i);
std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
// std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
if ( SE->_is_local && SE->_permute )
permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type);
@ -187,7 +188,7 @@ int main (int argc, char ** argv)
int permute_type;
StencilEntry *SE;
SE = OStencil.GetEntry(permute_type,0,i);
std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
// std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
if ( SE->_is_local && SE->_permute )
permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type);