1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Clean up Dirichlet. Big oops fix

This commit is contained in:
Peter Boyle 2022-05-28 17:18:08 -07:00
parent 5ddea3829d
commit 34faa39f4f
2 changed files with 12 additions and 10 deletions

View File

@ -665,7 +665,7 @@ public:
this->_comms_recv[ii] = comm_dim; this->_comms_recv[ii] = comm_dim;
if ( block && comm_dim ) { if ( block && comm_dim ) {
assert(abs(displacement) < ld ); assert(abs(displacement) < ld );
// Quiesce communication across block boundaries
if( displacement > 0 ) { if( displacement > 0 ) {
// High side, low side // High side, low side
// | <--B--->| // | <--B--->|
@ -730,7 +730,7 @@ public:
int gd = _grid->_gdimensions[dimension]; int gd = _grid->_gdimensions[dimension];
int fd = _grid->_fdimensions[dimension]; int fd = _grid->_fdimensions[dimension];
int pd = _grid->_processors [dimension]; int pd = _grid->_processors [dimension];
int ld = gd/pd; // int ld = gd/pd;
int rd = _grid->_rdimensions[dimension]; int rd = _grid->_rdimensions[dimension];
int pc = _grid->_processor_coor[dimension]; int pc = _grid->_processor_coor[dimension];
this->_permute_type[point]=_grid->PermuteType(dimension); this->_permute_type[point]=_grid->PermuteType(dimension);
@ -871,12 +871,14 @@ public:
for(int x=0;x<rd;x++){ for(int x=0;x<rd;x++){
int permute_type=grid->PermuteType(dimension); int permute_type=grid->PermuteType(dimension);
int permute_slice;
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
int offnode = 0; int offnode = 0;
if ( simd_layout > 1 ) { if ( simd_layout > 1 ) {
permute_slice=1;
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
int inner_bit = (Nsimd>>(permute_type+1)); int inner_bit = (Nsimd>>(permute_type+1));
@ -893,6 +895,7 @@ public:
} else { } else {
int comm_proc = ((x+sshift)/rd)%pd; int comm_proc = ((x+sshift)/rd)%pd;
offnode = (comm_proc!= 0); offnode = (comm_proc!= 0);
permute_slice=0;
} }
int wraparound=0; int wraparound=0;
@ -906,19 +909,18 @@ public:
// Wrap locally dirichlet support case OR node local // Wrap locally dirichlet support case OR node local
if ( offnode==0 ) { if ( offnode==0 ) {
int permute_slice=0; permute_slice=0;
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
} else { } else {
if ( comms_recv==0 ) { if ( comms_recv ) {
int permute_slice=1; ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
} else { } else {
ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
} }
@ -1208,7 +1210,7 @@ public:
face_table[face_idx].size()*sizeof(face_table_host[0])); face_table[face_idx].size()*sizeof(face_table_host[0]));
} }
if ( comms_send ) if ( comms_send || comms_recv )
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
face_idx++; face_idx++;

View File

@ -19,8 +19,8 @@ export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0
export MPICH_GPU_NO_ASYNC_MEMCPY=0 export MPICH_GPU_NO_ASYNC_MEMCPY=0
#export MPICH_SMP_SINGLE_COPY_MODE=CMA #export MPICH_SMP_SINGLE_COPY_MODE=CMA
OPT="--comms-overlap --shm-mpi 1" OPT="--comms-sequential --shm-mpi 1"
VOL=64.64.32.32 VOL=64.64.64.64
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT #srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT #srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT