mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Bug fix for stencil with large shifts (3+), would be important to naik term for example but did not
impact Wilson based nearest neighbour stencils.
This commit is contained in:
parent
841a37f941
commit
145a295231
@ -163,18 +163,19 @@ namespace Grid {
|
|||||||
// So tables are the same whether comm_dim or splice_dim
|
// So tables are the same whether comm_dim or splice_dim
|
||||||
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
|
sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
|
||||||
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
Comms(point,dimension,shift,0x3);
|
|
||||||
// std::cout<<"Comms 0x3"<<std::endl;
|
// std::cout<<"Comms 0x3"<<std::endl;
|
||||||
|
Comms(point,dimension,shift,0x3);
|
||||||
} else {
|
} else {
|
||||||
|
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
|
||||||
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||||
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
|
Comms(point,dimension,shift,0x2);// both with block stride loop iteration
|
||||||
// std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// for(int ss=0;ss<osites;ss++){
|
// for(int ss=0;ss<osites;ss++){
|
||||||
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
|
// std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
|
||||||
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
|
// _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -211,7 +212,6 @@ namespace Grid {
|
|||||||
wraparound = 1;
|
wraparound = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int permute_slice=0;
|
int permute_slice=0;
|
||||||
if(permute_dim){
|
if(permute_dim){
|
||||||
int wrap = sshift/rd;
|
int wrap = sshift/rd;
|
||||||
@ -228,6 +228,7 @@ namespace Grid {
|
|||||||
void Comms (int point,int dimension,int shiftpm,int cbmask)
|
void Comms (int point,int dimension,int shiftpm,int cbmask)
|
||||||
{
|
{
|
||||||
GridBase *grid=_grid;
|
GridBase *grid=_grid;
|
||||||
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
int fd = _grid->_fdimensions[dimension];
|
int fd = _grid->_fdimensions[dimension];
|
||||||
int ld = _grid->_ldimensions[dimension];
|
int ld = _grid->_ldimensions[dimension];
|
||||||
@ -242,7 +243,8 @@ namespace Grid {
|
|||||||
assert(shift>=0);
|
assert(shift>=0);
|
||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; // done in reduced dims, so SIMD factored
|
||||||
|
// std::cout << " dim " <<dimension<<" buffersize "<<buffer_size<<std::endl;
|
||||||
_comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
|
_comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
|
||||||
// send to one or more remote nodes.
|
// send to one or more remote nodes.
|
||||||
|
|
||||||
@ -252,11 +254,40 @@ namespace Grid {
|
|||||||
|
|
||||||
for(int x=0;x<rd;x++){
|
for(int x=0;x<rd;x++){
|
||||||
|
|
||||||
int sx = (x+sshift)%rd;
|
|
||||||
int comm_proc = ((x+sshift)/rd)%pd;
|
|
||||||
int offnode = (comm_proc!= 0);
|
|
||||||
|
|
||||||
// std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
|
int permute_type=grid->PermuteType(dimension);
|
||||||
|
|
||||||
|
int sx = (x+sshift)%rd;
|
||||||
|
|
||||||
|
int offnode = 0;
|
||||||
|
if ( simd_layout > 1 ) {
|
||||||
|
|
||||||
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
|
||||||
|
int inner_bit = (Nsimd>>(permute_type+1));
|
||||||
|
int ic= (i&inner_bit)? 1:0;
|
||||||
|
int my_coor = rd*ic + x;
|
||||||
|
int nbr_coor = my_coor+sshift;
|
||||||
|
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
|
||||||
|
|
||||||
|
if ( nbr_proc ) {
|
||||||
|
offnode =1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
int comm_proc = ((x+sshift)/rd)%pd;
|
||||||
|
offnode = (comm_proc!= 0);
|
||||||
|
// std::cout << "Stencil x "<<x<<" shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<< " comm_proc "<<comm_proc<<" pd "<< pd <<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Stencil x 1 shift 3 sshift 3 fd 8 rd 2 offnode 0 sx 0 comm_proc 0 pd 2
|
||||||
|
// x+sshift = 4
|
||||||
|
// x+sshift/2 = 2
|
||||||
|
// 2%2 == 0
|
||||||
|
// Problem: sshift is wrong in "rd" for SIMD directions. The complex logic in Cshift_mpi is needed.
|
||||||
|
|
||||||
int wraparound=0;
|
int wraparound=0;
|
||||||
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
|
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
|
||||||
wraparound = 1;
|
wraparound = 1;
|
||||||
@ -282,6 +313,7 @@ namespace Grid {
|
|||||||
|
|
||||||
int unified_buffer_offset = _unified_buffer_size;
|
int unified_buffer_offset = _unified_buffer_size;
|
||||||
_unified_buffer_size += words;
|
_unified_buffer_size += words;
|
||||||
|
// std::cout<< "Comms dim "<<dimension<<" offset "<<unified_buffer_offset<<" size "<<" " << _unified_buffer_size<<std::endl;
|
||||||
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
|
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -440,7 +472,7 @@ namespace Grid {
|
|||||||
nosplicetime+=usecond();
|
nosplicetime+=usecond();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
// std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||||
if(splice_dim){
|
if(splice_dim){
|
||||||
splicetime-=usecond();
|
splicetime-=usecond();
|
||||||
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
|
GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
|
||||||
@ -595,7 +627,6 @@ namespace Grid {
|
|||||||
|
|
||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
|
|
||||||
|
|
||||||
int inner_bit = (Nsimd>>(permute_type+1));
|
int inner_bit = (Nsimd>>(permute_type+1));
|
||||||
int ic= (i&inner_bit)? 1:0;
|
int ic= (i&inner_bit)? 1:0;
|
||||||
|
|
||||||
@ -633,10 +664,11 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Here we don't want to scatter, just place into a buffer.
|
// std::cout << " CommsSimd ["<<dimension<<"] offset "<<u_comm_offset<<" buffsize "<<buffer_size <<" unified buffer size "<<_unified_buffer_size<<std::endl;
|
||||||
mergetime-=usecond();
|
mergetime-=usecond();
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int i=0;i<buffer_size;i++){
|
for(int i=0;i<buffer_size;i++){
|
||||||
|
// std::cout<<"buffer loop " << i<<" "<<u_comm_offset+i<<" / "<<_unified_buffer_size<<std::endl;
|
||||||
// assert(u_comm_offset+i<_unified_buffer_size);
|
// assert(u_comm_offset+i<_unified_buffer_size);
|
||||||
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
|
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
|
||||||
}
|
}
|
||||||
|
@ -167,7 +167,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
||||||
int bytes = buffer_size*sizeof(scalar_object);
|
int bytes = buffer_size*sizeof(scalar_object);
|
||||||
|
|
||||||
std::vector<scalar_object *> pointers(Nsimd); //
|
std::vector<scalar_object *> pointers(Nsimd); //
|
||||||
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
|
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
|
||||||
|
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
|
@ -176,6 +176,8 @@ void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
|
|||||||
scalar_type *pointer;
|
scalar_type *pointer;
|
||||||
scalar_type *vp = (scalar_type *)&vec;
|
scalar_type *vp = (scalar_type *)&vec;
|
||||||
|
|
||||||
|
// assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
|
||||||
|
|
||||||
for(int w=0;w<words;w++){
|
for(int w=0;w<words;w++){
|
||||||
for(int i=0;i<Nextr;i++){
|
for(int i=0;i<Nextr;i++){
|
||||||
for(int ii=0;ii<s;ii++){
|
for(int ii=0;ii<s;ii++){
|
||||||
|
@ -169,12 +169,13 @@ int main (int argc, char ** argv)
|
|||||||
ECheck.checkerboard = Odd;
|
ECheck.checkerboard = Odd;
|
||||||
OCheck.checkerboard = Even;
|
OCheck.checkerboard = Even;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implement a stencil code that should agree with that darn cshift!
|
// Implement a stencil code that should agree with that darn cshift!
|
||||||
for(int i=0;i<OCheck._grid->oSites();i++){
|
for(int i=0;i<OCheck._grid->oSites();i++){
|
||||||
int permute_type;
|
int permute_type;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
SE = EStencil.GetEntry(permute_type,0,i);
|
SE = EStencil.GetEntry(permute_type,0,i);
|
||||||
std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
|
// std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
|
||||||
|
|
||||||
if ( SE->_is_local && SE->_permute )
|
if ( SE->_is_local && SE->_permute )
|
||||||
permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type);
|
permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type);
|
||||||
@ -187,7 +188,7 @@ int main (int argc, char ** argv)
|
|||||||
int permute_type;
|
int permute_type;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
SE = OStencil.GetEntry(permute_type,0,i);
|
SE = OStencil.GetEntry(permute_type,0,i);
|
||||||
std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
|
// std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl;
|
||||||
|
|
||||||
if ( SE->_is_local && SE->_permute )
|
if ( SE->_is_local && SE->_permute )
|
||||||
permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type);
|
permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type);
|
||||||
|
Loading…
Reference in New Issue
Block a user