mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Fixed the stencil sector and Wilson now agrees between stencil based implementation
and the cshift based implementation. Managed to reduce the volume of code in this sector a little, but consolidation would be good, perhaps taking common logic out into simple helper functions
This commit is contained in:
parent
25d523c0f4
commit
c72db6c6f6
@ -39,13 +39,6 @@
|
||||
|
||||
namespace Grid {
|
||||
|
||||
struct CommsRequest {
|
||||
int words;
|
||||
int unified_buffer_offset;
|
||||
int tag;
|
||||
int to_rank;
|
||||
int from_rank;
|
||||
} ;
|
||||
|
||||
|
||||
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
|
||||
@ -69,7 +62,6 @@ namespace Grid {
|
||||
int _unified_buffer_size;
|
||||
int _request_count;
|
||||
|
||||
std::vector<CommsRequest> CommsRequests;
|
||||
|
||||
CartesianStencil(GridBase *grid,
|
||||
int npoints,
|
||||
@ -90,7 +82,6 @@ namespace Grid {
|
||||
template<class vobj,class cobj, class compressor> void
|
||||
HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
|
||||
{
|
||||
std::cout<< "HaloExchange comm_buf.size()="<< u_comm_buf.size()<<" unified_buffer_size"<< _unified_buffer_size<< std::endl;
|
||||
// conformable(source._grid,_grid);
|
||||
assert(source._grid==_grid);
|
||||
if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size);
|
||||
@ -141,7 +132,6 @@ namespace Grid {
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout<< "HaloExchange complete"<< std::endl;
|
||||
}
|
||||
|
||||
template<class vobj,class cobj, class compressor>
|
||||
@ -194,24 +184,18 @@ namespace Grid {
|
||||
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
assert (xmit_to_rank != _grid->ThisRank());
|
||||
assert (recv_from_rank != _grid->ThisRank());
|
||||
|
||||
// FIXME Implement asynchronous send & also avoid buffer copy
|
||||
_grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
printf("GatherStartComms communicated offnode x %d\n",x);fflush(stdout);
|
||||
|
||||
printf("GatherStartComms inserting %le to u_comm_offset %d buf size %d for dim %d shift %d\n",
|
||||
*( (RealF *) &recv_buf[0]),
|
||||
u_comm_offset,buffer_size,
|
||||
dimension,shift
|
||||
); fflush(stdout);
|
||||
for(int i=0;i<buffer_size;i++){
|
||||
u_comm_buf[u_comm_offset+i]=recv_buf[i];
|
||||
}
|
||||
u_comm_offset+=buffer_size;
|
||||
printf("GatherStartComms inserted x %d\n",x);fflush(stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -248,7 +232,7 @@ namespace Grid {
|
||||
int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
|
||||
int words = sizeof(cobj)/sizeof(vector_type);
|
||||
|
||||
/* FIXME ALTERNATE BUFFER DETERMINATION */
|
||||
/* FIXME ALTERNATE BUFFER DETERMINATION ; possibly slow to allocate*/
|
||||
std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
||||
std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
@ -267,25 +251,21 @@ namespace Grid {
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int any_offnode = ( ((x+sshift)%fd) >= rd );
|
||||
std::cout<<"any_offnode ="<<any_offnode<<std::endl;
|
||||
|
||||
if ( any_offnode ) {
|
||||
// FIXME call local permute copy if none are offnode.
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[i] = &send_buf_extract[i][0];
|
||||
}
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
std::cout<< "Gathering "<< x <<std::endl;
|
||||
Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
|
||||
std::cout<< "Gathered "<<std::endl;
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
std::vector<int> icoor;
|
||||
_grid->iCoorFromIindex(icoor,i);
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
int ic= (i&inner_bit)? 1:0;
|
||||
assert(ic==icoor[dimension]);
|
||||
|
||||
int my_coor = rd*ic + x;
|
||||
int nbr_coor = my_coor+sshift;
|
||||
@ -301,12 +281,9 @@ namespace Grid {
|
||||
if (nbr_ic) nbr_lane|=inner_bit;
|
||||
assert (sx == nbr_ox);
|
||||
|
||||
std::cout<<"nbr_proc "<<nbr_proc<< " x "<<x<<" nbr_x "<<nbr_ox << " lane "<<i << " nbr_lane "<<nbr_lane
|
||||
<< " nbr_ic "<<nbr_ic << " mycoor "<< my_coor<< " nbr_coor "<<nbr_coor<<std::endl;
|
||||
|
||||
if(nbr_proc){
|
||||
|
||||
std::cout<< "MPI sending "<<std::endl;
|
||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
_grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
|
||||
@ -314,23 +291,20 @@ namespace Grid {
|
||||
(void *)&recv_buf_extract[i][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
std::cout<< "MPI complete "<<std::endl;
|
||||
|
||||
rpointers[i] = &recv_buf_extract[i][0];
|
||||
std::cout<<"lane "<<i<<" data "<<*( (Real *) rpointers[i])<<std::endl;
|
||||
|
||||
} else {
|
||||
rpointers[i] = &send_buf_extract[nbr_lane][0];
|
||||
std::cout<<"lane "<<i<<" data "<<*( (Real *) rpointers[i])<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Here we don't want to scatter, just place into a buffer.
|
||||
std::cout<< "merging u_comm_offset "<< u_comm_offset<<" comm_buf_size" << u_comm_buf.size() <<std::endl;
|
||||
|
||||
for(int i=0;i<buffer_size;i++){
|
||||
assert(u_comm_offset+i<_unified_buffer_size);
|
||||
merge(u_comm_buf[u_comm_offset+i],rpointers,i);
|
||||
}
|
||||
|
||||
u_comm_offset+=buffer_size;
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,6 @@ const int WilsonMatrix::Tm = 7;
|
||||
|
||||
void Point(int p) {
|
||||
mu=p;
|
||||
std::cout << "WilsonCompressor.Point " << mu<<std::endl;
|
||||
};
|
||||
|
||||
vHalfSpinColourVector operator () (const vSpinColourVector &in)
|
||||
@ -193,7 +192,6 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
chi_p = &tmp;
|
||||
}
|
||||
}
|
||||
std::cout<<"Xm for site "<<ss<<" l "<<local<<" p "<<perm<<" chi "<<Reduce(TensorRemove(innerProduct(*chi_p,*chi_p)))<<std::endl;
|
||||
mult(&(Uchi()),&(Umu._odata[ss](Xm)),&(*chi_p)());
|
||||
accumReconXm(result,Uchi);
|
||||
|
||||
|
@ -19,7 +19,6 @@ namespace Grid {
|
||||
_distances = distances;
|
||||
_unified_buffer_size=0;
|
||||
_request_count =0;
|
||||
CommsRequests.resize(0);
|
||||
|
||||
int osites = _grid->oSites();
|
||||
|
||||
@ -117,6 +116,7 @@ namespace Grid {
|
||||
GridBase *grid=_grid;
|
||||
|
||||
int fd = _grid->_fdimensions[dimension];
|
||||
int ld = _grid->_ldimensions[dimension];
|
||||
int rd = _grid->_rdimensions[dimension];
|
||||
int pd = _grid->_processors[dimension];
|
||||
int simd_layout = _grid->_simd_layout[dimension];
|
||||
@ -137,9 +137,10 @@ namespace Grid {
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
int offnode = (comm_proc!=0);
|
||||
int sx = (x+sshift)%rd;
|
||||
int offnode = (((x+sshift)%fd) >= rd );
|
||||
// int comm_proc = ((x+sshift)/ld)%pd;
|
||||
// int offnode = (comm_proc!=0);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
if (!offnode) {
|
||||
|
||||
@ -157,17 +158,9 @@ namespace Grid {
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
CommsRequest cr;
|
||||
|
||||
cr.tag = _request_count++;
|
||||
cr.words = words;
|
||||
cr.unified_buffer_offset = _unified_buffer_size;
|
||||
int unified_buffer_offset = _unified_buffer_size;
|
||||
_unified_buffer_size += words;
|
||||
grid->ShiftedRanks(dimension,comm_proc,cr.to_rank,cr.from_rank);
|
||||
|
||||
CommsRequests.push_back(cr);
|
||||
|
||||
ScatterPlane(point,dimension,x,cbmask,cr.unified_buffer_offset); // permute/extract/merge is done in comms phase
|
||||
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset); // permute/extract/merge is done in comms phase
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ int main (int argc, char ** argv)
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout({1,1,2,2});
|
||||
std::vector<int> mpi_layout ({2,1,1,2});
|
||||
std::vector<int> mpi_layout ({2,2,2,2});
|
||||
std::vector<int> latt_size ({8,8,8,8});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
@ -76,7 +76,8 @@ int main (int argc, char ** argv)
|
||||
std::cout << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout << "norm ref "<< norm2(ref)<<std::endl;
|
||||
|
||||
for(int ss=0;ss<10;ss++ ){
|
||||
// for(int ss=0;ss<10;ss++ ){
|
||||
for(int ss=0;ss<0;ss++ ){
|
||||
for(int i=0;i<Ns;i++){
|
||||
for(int j=0;j<Nc;j++){
|
||||
ComplexF * ref_p = (ComplexF *)&ref._odata[ss]()(i)(j);
|
||||
|
Loading…
Reference in New Issue
Block a user