mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet
This commit is contained in:
commit
6a1a198144
@ -372,7 +372,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
double off_node_bytes=0.0;
|
||||
int tag;
|
||||
|
||||
if ( dox ) {
|
||||
if ( dor ) {
|
||||
if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
|
||||
tag= dir+from*32;
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
|
||||
@ -382,7 +382,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
}
|
||||
}
|
||||
|
||||
if (dor) {
|
||||
if (dox) {
|
||||
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
|
||||
tag= dir+_processor*32;
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
|
||||
|
@ -125,6 +125,12 @@ void pokeSite(const sobj &s,Lattice<vobj> &l,const Coordinate &site){
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
typename vobj::scalar_object peekSite(const Lattice<vobj> &l,const Coordinate &site){
|
||||
typename vobj::scalar_object s;
|
||||
peekSite(s,l,site);
|
||||
return s;
|
||||
}
|
||||
template<class vobj,class sobj>
|
||||
void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
|
||||
|
||||
|
@ -665,7 +665,7 @@ public:
|
||||
this->_comms_recv[ii] = comm_dim;
|
||||
if ( block && comm_dim ) {
|
||||
assert(abs(displacement) < ld );
|
||||
|
||||
// Quiesce communication across block boundaries
|
||||
if( displacement > 0 ) {
|
||||
// High side, low side
|
||||
// | <--B--->|
|
||||
@ -730,7 +730,7 @@ public:
|
||||
int gd = _grid->_gdimensions[dimension];
|
||||
int fd = _grid->_fdimensions[dimension];
|
||||
int pd = _grid->_processors [dimension];
|
||||
int ld = gd/pd;
|
||||
// int ld = gd/pd;
|
||||
int rd = _grid->_rdimensions[dimension];
|
||||
int pc = _grid->_processor_coor[dimension];
|
||||
this->_permute_type[point]=_grid->PermuteType(dimension);
|
||||
@ -871,12 +871,14 @@ public:
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int permute_type=grid->PermuteType(dimension);
|
||||
int permute_slice;
|
||||
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
int offnode = 0;
|
||||
if ( simd_layout > 1 ) {
|
||||
|
||||
permute_slice=1;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
@ -893,6 +895,7 @@ public:
|
||||
} else {
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
offnode = (comm_proc!= 0);
|
||||
permute_slice=0;
|
||||
}
|
||||
|
||||
int wraparound=0;
|
||||
@ -906,19 +909,18 @@ public:
|
||||
// Wrap locally dirichlet support case OR node local
|
||||
if ( offnode==0 ) {
|
||||
|
||||
int permute_slice=0;
|
||||
permute_slice=0;
|
||||
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
|
||||
|
||||
} else {
|
||||
|
||||
if ( comms_recv==0 ) {
|
||||
if ( comms_recv ) {
|
||||
|
||||
int permute_slice=1;
|
||||
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
|
||||
ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase
|
||||
|
||||
} else {
|
||||
|
||||
ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase
|
||||
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
|
||||
|
||||
}
|
||||
|
||||
@ -1208,7 +1210,7 @@ public:
|
||||
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||
}
|
||||
|
||||
if ( comms_send )
|
||||
if ( comms_send || comms_recv )
|
||||
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
|
||||
face_idx++;
|
||||
|
||||
|
@ -19,8 +19,8 @@ export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0
|
||||
export MPICH_GPU_NO_ASYNC_MEMCPY=0
|
||||
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
|
||||
|
||||
OPT="--comms-overlap --shm-mpi 1"
|
||||
VOL=64.64.32.32
|
||||
OPT="--comms-sequential --shm-mpi 1"
|
||||
VOL=64.64.64.64
|
||||
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
|
||||
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
|
||||
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
|
||||
|
Loading…
Reference in New Issue
Block a user