1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Fixed the stencil sector and Wilson now agrees between stencil based implementation

and the cshift based implementation. Managed to reduce the volume of code in this
sector a little, but consolidation would be good, perhaps taking common
logic out into simple helper functions
This commit is contained in:
Peter Boyle 2015-04-29 06:23:56 +01:00
parent b0485894b3
commit dcc23faa4a
4 changed files with 17 additions and 51 deletions

View File

@ -39,13 +39,6 @@
namespace Grid { namespace Grid {
struct CommsRequest {
int words;
int unified_buffer_offset;
int tag;
int to_rank;
int from_rank;
} ;
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
@ -69,7 +62,6 @@ namespace Grid {
int _unified_buffer_size; int _unified_buffer_size;
int _request_count; int _request_count;
std::vector<CommsRequest> CommsRequests;
CartesianStencil(GridBase *grid, CartesianStencil(GridBase *grid,
int npoints, int npoints,
@ -90,7 +82,6 @@ namespace Grid {
template<class vobj,class cobj, class compressor> void template<class vobj,class cobj, class compressor> void
HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress) HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
{ {
std::cout<< "HaloExchange comm_buf.size()="<< u_comm_buf.size()<<" unified_buffer_size"<< _unified_buffer_size<< std::endl;
// conformable(source._grid,_grid); // conformable(source._grid,_grid);
assert(source._grid==_grid); assert(source._grid==_grid);
if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size); if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size);
@ -141,7 +132,6 @@ namespace Grid {
} }
} }
} }
std::cout<< "HaloExchange complete"<< std::endl;
} }
template<class vobj,class cobj, class compressor> template<class vobj,class cobj, class compressor>
@ -194,24 +184,18 @@ namespace Grid {
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); _grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
assert (xmit_to_rank != _grid->ThisRank()); assert (xmit_to_rank != _grid->ThisRank());
assert (recv_from_rank != _grid->ThisRank()); assert (recv_from_rank != _grid->ThisRank());
// FIXME Implement asynchronous send & also avoid buffer copy // FIXME Implement asynchronous send & also avoid buffer copy
_grid->SendToRecvFrom((void *)&send_buf[0], _grid->SendToRecvFrom((void *)&send_buf[0],
xmit_to_rank, xmit_to_rank,
(void *)&recv_buf[0], (void *)&recv_buf[0],
recv_from_rank, recv_from_rank,
bytes); bytes);
printf("GatherStartComms communicated offnode x %d\n",x);fflush(stdout);
printf("GatherStartComms inserting %le to u_comm_offset %d buf size %d for dim %d shift %d\n",
*( (RealF *) &recv_buf[0]),
u_comm_offset,buffer_size,
dimension,shift
); fflush(stdout);
for(int i=0;i<buffer_size;i++){ for(int i=0;i<buffer_size;i++){
u_comm_buf[u_comm_offset+i]=recv_buf[i]; u_comm_buf[u_comm_offset+i]=recv_buf[i];
} }
u_comm_offset+=buffer_size; u_comm_offset+=buffer_size;
printf("GatherStartComms inserted x %d\n",x);fflush(stdout);
} }
} }
} }
@ -248,7 +232,7 @@ namespace Grid {
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int words = sizeof(cobj)/sizeof(vector_type); int words = sizeof(cobj)/sizeof(vector_type);
/* FIXME ALTERNATE BUFFER DETERMINATION */ /* FIXME ALTERNATE BUFFER DETERMINATION ; possibly slow to allocate*/
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) ); std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) ); std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
int bytes = buffer_size*sizeof(scalar_object); int bytes = buffer_size*sizeof(scalar_object);
@ -267,25 +251,21 @@ namespace Grid {
for(int x=0;x<rd;x++){ for(int x=0;x<rd;x++){
int any_offnode = ( ((x+sshift)%fd) >= rd ); int any_offnode = ( ((x+sshift)%fd) >= rd );
std::cout<<"any_offnode ="<<any_offnode<<std::endl;
if ( any_offnode ) { if ( any_offnode ) {
// FIXME call local permute copy if none are offnode.
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
pointers[i] = &send_buf_extract[i][0]; pointers[i] = &send_buf_extract[i][0];
} }
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
std::cout<< "Gathering "<< x <<std::endl;
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress); Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
std::cout<< "Gathered "<<std::endl;
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
std::vector<int> icoor;
_grid->iCoorFromIindex(icoor,i);
int inner_bit = (Nsimd>>(permute_type+1)); int inner_bit = (Nsimd>>(permute_type+1));
int ic= (i&inner_bit)? 1:0; int ic= (i&inner_bit)? 1:0;
assert(ic==icoor[dimension]);
int my_coor = rd*ic + x; int my_coor = rd*ic + x;
int nbr_coor = my_coor+sshift; int nbr_coor = my_coor+sshift;
@ -301,12 +281,9 @@ namespace Grid {
if (nbr_ic) nbr_lane|=inner_bit; if (nbr_ic) nbr_lane|=inner_bit;
assert (sx == nbr_ox); assert (sx == nbr_ox);
std::cout<<"nbr_proc "<<nbr_proc<< " x "<<x<<" nbr_x "<<nbr_ox << " lane "<<i << " nbr_lane "<<nbr_lane
<< " nbr_ic "<<nbr_ic << " mycoor "<< my_coor<< " nbr_coor "<<nbr_coor<<std::endl;
if(nbr_proc){ if(nbr_proc){
std::cout<< "MPI sending "<<std::endl;
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
_grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0], _grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
@ -314,23 +291,20 @@ namespace Grid {
(void *)&recv_buf_extract[i][0], (void *)&recv_buf_extract[i][0],
recv_from_rank, recv_from_rank,
bytes); bytes);
std::cout<< "MPI complete "<<std::endl;
rpointers[i] = &recv_buf_extract[i][0]; rpointers[i] = &recv_buf_extract[i][0];
std::cout<<"lane "<<i<<" data "<<*( (Real *) rpointers[i])<<std::endl;
} else { } else {
rpointers[i] = &send_buf_extract[nbr_lane][0]; rpointers[i] = &send_buf_extract[nbr_lane][0];
std::cout<<"lane "<<i<<" data "<<*( (Real *) rpointers[i])<<std::endl;
} }
} }
// Here we don't want to scatter, just place into a buffer. // Here we don't want to scatter, just place into a buffer.
std::cout<< "merging u_comm_offset "<< u_comm_offset<<" comm_buf_size" << u_comm_buf.size() <<std::endl;
for(int i=0;i<buffer_size;i++){ for(int i=0;i<buffer_size;i++){
assert(u_comm_offset+i<_unified_buffer_size); assert(u_comm_offset+i<_unified_buffer_size);
merge(u_comm_buf[u_comm_offset+i],rpointers,i); merge(u_comm_buf[u_comm_offset+i],rpointers,i);
} }
u_comm_offset+=buffer_size; u_comm_offset+=buffer_size;
} }
} }

View File

@ -24,7 +24,6 @@ const int WilsonMatrix::Tm = 7;
void Point(int p) { void Point(int p) {
mu=p; mu=p;
std::cout << "WilsonCompressor.Point " << mu<<std::endl;
}; };
vHalfSpinColourVector operator () (const vSpinColourVector &in) vHalfSpinColourVector operator () (const vSpinColourVector &in)
@ -193,7 +192,6 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
chi_p = &tmp; chi_p = &tmp;
} }
} }
std::cout<<"Xm for site "<<ss<<" l "<<local<<" p "<<perm<<" chi "<<Reduce(TensorRemove(innerProduct(*chi_p,*chi_p)))<<std::endl;
mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)()); mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
accumReconXm(result,Uchi); accumReconXm(result,Uchi);

View File

@ -19,7 +19,6 @@ namespace Grid {
_distances = distances; _distances = distances;
_unified_buffer_size=0; _unified_buffer_size=0;
_request_count =0; _request_count =0;
CommsRequests.resize(0);
int osites = _grid->oSites(); int osites = _grid->oSites();
@ -117,6 +116,7 @@ namespace Grid {
GridBase *grid=_grid; GridBase *grid=_grid;
int fd = _grid->_fdimensions[dimension]; int fd = _grid->_fdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
int rd = _grid->_rdimensions[dimension]; int rd = _grid->_rdimensions[dimension];
int pd = _grid->_processors[dimension]; int pd = _grid->_processors[dimension];
int simd_layout = _grid->_simd_layout[dimension]; int simd_layout = _grid->_simd_layout[dimension];
@ -137,8 +137,9 @@ namespace Grid {
for(int x=0;x<rd;x++){ for(int x=0;x<rd;x++){
int comm_proc = ((x+sshift)/rd)%pd; int offnode = (((x+sshift)%fd) >= rd );
int offnode = (comm_proc!=0); // int comm_proc = ((x+sshift)/ld)%pd;
// int offnode = (comm_proc!=0);
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
if (!offnode) { if (!offnode) {
@ -157,17 +158,9 @@ namespace Grid {
int recv_from_rank; int recv_from_rank;
int xmit_to_rank; int xmit_to_rank;
CommsRequest cr; int unified_buffer_offset = _unified_buffer_size;
cr.tag = _request_count++;
cr.words = words;
cr.unified_buffer_offset = _unified_buffer_size;
_unified_buffer_size += words; _unified_buffer_size += words;
grid->ShiftedRanks(dimension,comm_proc,cr.to_rank,cr.from_rank); ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset); // permute/extract/merge is done in comms phase
CommsRequests.push_back(cr);
ScatterPlane(point,dimension,x,cbmask,cr.unified_buffer_offset); // permute/extract/merge is done in comms phase
} }
} }

View File

@ -21,7 +21,7 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2}); std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({2,1,1,2}); std::vector<int> mpi_layout ({2,2,2,2});
std::vector<int> latt_size ({8,8,8,8}); std::vector<int> latt_size ({8,8,8,8});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
@ -76,7 +76,8 @@ int main (int argc, char ** argv)
std::cout << "norm result "<< norm2(result)<<std::endl; std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl; std::cout << "norm ref "<< norm2(ref)<<std::endl;
for(int ss=0;ss<10;ss++ ){ // for(int ss=0;ss<10;ss++ ){
for(int ss=0;ss<0;ss++ ){
for(int i=0;i<Ns;i++){ for(int i=0;i<Ns;i++){
for(int j=0;j<Nc;j++){ for(int j=0;j<Nc;j++){
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j); ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);