1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Fixed the stencil sector and Wilson now agrees between stencil based implementation

and the cshift based implementation. Managed to reduce the volume of code in this
sector a little, but consolidation would be good, perhaps taking common
logic out into simple helper functions
This commit is contained in:
Peter Boyle 2015-04-29 06:23:56 +01:00
parent b0485894b3
commit dcc23faa4a
4 changed files with 17 additions and 51 deletions

View File

@ -39,13 +39,6 @@
namespace Grid {
struct CommsRequest {
int words;
int unified_buffer_offset;
int tag;
int to_rank;
int from_rank;
} ;
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
@ -69,7 +62,6 @@ namespace Grid {
int _unified_buffer_size;
int _request_count;
std::vector<CommsRequest> CommsRequests;
CartesianStencil(GridBase *grid,
int npoints,
@ -90,7 +82,6 @@ namespace Grid {
template<class vobj,class cobj, class compressor> void
HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
{
std::cout<< "HaloExchange comm_buf.size()="<< u_comm_buf.size()<<" unified_buffer_size"<< _unified_buffer_size<< std::endl;
// conformable(source._grid,_grid);
assert(source._grid==_grid);
if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size);
@ -141,7 +132,6 @@ namespace Grid {
}
}
}
std::cout<< "HaloExchange complete"<< std::endl;
}
template<class vobj,class cobj, class compressor>
@ -194,24 +184,18 @@ namespace Grid {
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
assert (xmit_to_rank != _grid->ThisRank());
assert (recv_from_rank != _grid->ThisRank());
// FIXME Implement asynchronous send & also avoid buffer copy
_grid->SendToRecvFrom((void *)&send_buf[0],
xmit_to_rank,
(void *)&recv_buf[0],
recv_from_rank,
bytes);
printf("GatherStartComms communicated offnode x %d\n",x);fflush(stdout);
printf("GatherStartComms inserting %le to u_comm_offset %d buf size %d for dim %d shift %d\n",
*( (RealF *) &recv_buf[0]),
u_comm_offset,buffer_size,
dimension,shift
); fflush(stdout);
for(int i=0;i<buffer_size;i++){
u_comm_buf[u_comm_offset+i]=recv_buf[i];
}
u_comm_offset+=buffer_size;
printf("GatherStartComms inserted x %d\n",x);fflush(stdout);
}
}
}
@ -248,7 +232,7 @@ namespace Grid {
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
int words = sizeof(cobj)/sizeof(vector_type);
/* FIXME ALTERNATE BUFFER DETERMINATION */
/* FIXME ALTERNATE BUFFER DETERMINATION ; possibly slow to allocate*/
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
int bytes = buffer_size*sizeof(scalar_object);
@ -267,25 +251,21 @@ namespace Grid {
for(int x=0;x<rd;x++){
int any_offnode = ( ((x+sshift)%fd) >= rd );
std::cout<<"any_offnode ="<<any_offnode<<std::endl;
if ( any_offnode ) {
// FIXME call local permute copy if none are offnode.
for(int i=0;i<Nsimd;i++){
pointers[i] = &send_buf_extract[i][0];
}
int sx = (x+sshift)%rd;
std::cout<< "Gathering "<< x <<std::endl;
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
std::cout<< "Gathered "<<std::endl;
for(int i=0;i<Nsimd;i++){
std::vector<int> icoor;
_grid->iCoorFromIindex(icoor,i);
int inner_bit = (Nsimd>>(permute_type+1));
int ic= (i&inner_bit)? 1:0;
assert(ic==icoor[dimension]);
int my_coor = rd*ic + x;
int nbr_coor = my_coor+sshift;
@ -301,12 +281,9 @@ namespace Grid {
if (nbr_ic) nbr_lane|=inner_bit;
assert (sx == nbr_ox);
std::cout<<"nbr_proc "<<nbr_proc<< " x "<<x<<" nbr_x "<<nbr_ox << " lane "<<i << " nbr_lane "<<nbr_lane
<< " nbr_ic "<<nbr_ic << " mycoor "<< my_coor<< " nbr_coor "<<nbr_coor<<std::endl;
if(nbr_proc){
std::cout<< "MPI sending "<<std::endl;
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
_grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
@ -314,23 +291,20 @@ namespace Grid {
(void *)&recv_buf_extract[i][0],
recv_from_rank,
bytes);
std::cout<< "MPI complete "<<std::endl;
rpointers[i] = &recv_buf_extract[i][0];
std::cout<<"lane "<<i<<" data "<<*( (Real *) rpointers[i])<<std::endl;
} else {
rpointers[i] = &send_buf_extract[nbr_lane][0];
std::cout<<"lane "<<i<<" data "<<*( (Real *) rpointers[i])<<std::endl;
}
}
// Here we don't want to scatter, just place into a buffer.
std::cout<< "merging u_comm_offset "<< u_comm_offset<<" comm_buf_size" << u_comm_buf.size() <<std::endl;
for(int i=0;i<buffer_size;i++){
assert(u_comm_offset+i<_unified_buffer_size);
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
}
u_comm_offset+=buffer_size;
}
}

View File

@ -24,7 +24,6 @@ const int WilsonMatrix::Tm = 7;
void Point(int p) {
mu=p;
std::cout << "WilsonCompressor.Point " << mu<<std::endl;
};
vHalfSpinColourVector operator () (const vSpinColourVector &in)
@ -193,7 +192,6 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
chi_p = &tmp;
}
}
std::cout<<"Xm for site "<<ss<<" l "<<local<<" p "<<perm<<" chi "<<Reduce(TensorRemove(innerProduct(*chi_p,*chi_p)))<<std::endl;
mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
accumReconXm(result,Uchi);

View File

@ -19,7 +19,6 @@ namespace Grid {
_distances = distances;
_unified_buffer_size=0;
_request_count =0;
CommsRequests.resize(0);
int osites = _grid->oSites();
@ -117,6 +116,7 @@ namespace Grid {
GridBase *grid=_grid;
int fd = _grid->_fdimensions[dimension];
int ld = _grid->_ldimensions[dimension];
int rd = _grid->_rdimensions[dimension];
int pd = _grid->_processors[dimension];
int simd_layout = _grid->_simd_layout[dimension];
@ -137,9 +137,10 @@ namespace Grid {
for(int x=0;x<rd;x++){
int comm_proc = ((x+sshift)/rd)%pd;
int offnode = (comm_proc!=0);
int sx = (x+sshift)%rd;
int offnode = (((x+sshift)%fd) >= rd );
// int comm_proc = ((x+sshift)/ld)%pd;
// int offnode = (comm_proc!=0);
int sx = (x+sshift)%rd;
if (!offnode) {
@ -157,17 +158,9 @@ namespace Grid {
int recv_from_rank;
int xmit_to_rank;
CommsRequest cr;
cr.tag = _request_count++;
cr.words = words;
cr.unified_buffer_offset = _unified_buffer_size;
int unified_buffer_offset = _unified_buffer_size;
_unified_buffer_size += words;
grid->ShiftedRanks(dimension,comm_proc,cr.to_rank,cr.from_rank);
CommsRequests.push_back(cr);
ScatterPlane(point,dimension,x,cbmask,cr.unified_buffer_offset); // permute/extract/merge is done in comms phase
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset); // permute/extract/merge is done in comms phase
}
}

View File

@ -21,7 +21,7 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({2,1,1,2});
std::vector<int> mpi_layout ({2,2,2,2});
std::vector<int> latt_size ({8,8,8,8});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
@ -76,7 +76,8 @@ int main (int argc, char ** argv)
std::cout << "norm result "<< norm2(result)<<std::endl;
std::cout << "norm ref "<< norm2(ref)<<std::endl;
for(int ss=0;ss<10;ss++ ){
// for(int ss=0;ss<10;ss++ ){
for(int ss=0;ss<0;ss++ ){
for(int i=0;i<Ns;i++){
for(int j=0;j<Nc;j++){
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);