From f17b8de907258274c0942568c414810410b970f6 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 7 Mar 2024 15:22:08 +0900 Subject: [PATCH 1/5] fallback to _POSIX_HOST_NAME_MAX if HOST_NAME_MAX is not defined --- Grid/util/Init.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 9a0b4376..363d9ef4 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -77,6 +77,10 @@ feenableexcept (unsigned int excepts) } #endif +#ifndef HOST_NAME_MAX +#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX +#endif + NAMESPACE_BEGIN(Grid); ////////////////////////////////////////////////////// From 2b4399f8b1a76ea38702b7c95276328b0a1a785d Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Thu, 7 Mar 2024 15:26:01 +0900 Subject: [PATCH 2/5] more HOST_NAME_MAX fix --- tests/Test_dwf_mixedcg_prec.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index 13cc0bb6..e8d36b7f 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -30,6 +30,10 @@ Author: Peter Boyle using namespace std; using namespace Grid; +#ifndef HOST_NAME_MAX +#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX +#endif + int main (int argc, char ** argv) { char hostname[HOST_NAME_MAX+1]; From d2242979726d5607f461c95ad2d79de8700c6338 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 12 Mar 2024 15:15:16 +0000 Subject: [PATCH 3/5] PBS scripts --- systems/Aurora/tests/repro16.pbs | 5 +++-- systems/Aurora/tests/solver/stag16.pbs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/systems/Aurora/tests/repro16.pbs b/systems/Aurora/tests/repro16.pbs index 28030a3d..c15ced99 100644 --- a/systems/Aurora/tests/repro16.pbs +++ b/systems/Aurora/tests/repro16.pbs @@ -4,7 +4,7 @@ #PBS -q EarlyAppAccess #PBS -l select=16 -#PBS -l walltime=01:00:00 +#PBS -l walltime=02:00:00 #PBS -A LatticeQCD_aesp_CNDA #export OMP_PROC_BIND=spread @@ -36,5 +36,6 @@ export MPICH_OFI_NIC_POLICY=GPU CMD="mpiexec -np 192 -ppn 12 -envall \ ./gpu_tile_compact.sh \ ./Test_dwf_mixedcg_prec --mpi 2.4.4.6 --grid 64.128.128.192 \ - --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000" + --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 " +#--comms-overlap $CMD diff --git a/systems/Aurora/tests/solver/stag16.pbs b/systems/Aurora/tests/solver/stag16.pbs index 5bfe04a6..ec38fe89 100644 --- a/systems/Aurora/tests/solver/stag16.pbs +++ b/systems/Aurora/tests/solver/stag16.pbs @@ -36,5 +36,5 @@ export MPICH_OFI_NIC_POLICY=GPU CMD="mpiexec -np 192 -ppn 12 -envall \ ./gpu_tile_compact.sh \ ./Test_staggered_cg_prec --mpi 2.4.4.6 --grid 128.128.128.192 \ - --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000" + --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000 --comms-overlap" $CMD From cf8632bbac1520444c75652d3582c4e3d0d13808 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 12 Mar 2024 15:15:35 +0000 Subject: [PATCH 4/5] Britney test option --- Grid/lattice/Lattice_reduction.h | 4 +- Grid/lattice/Lattice_rng.h | 26 ++---------- Grid/util/Init.cc | 73 +++++++++++++++++++++++++++++++- Grid/util/Init.h | 17 ++++++++ tests/Test_dwf_mixedcg_prec.cc | 10 +++++ 5 files changed, 106 insertions(+), 24 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 3d4c4b03..1e03fad6 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -281,12 +281,14 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & return nrm; } + template inline ComplexD innerProduct(const Lattice &left,const Lattice &right) { GridBase *grid = left.Grid(); ComplexD nrm = rankInnerProduct(left,right); - // std::cerr<<"flight log " << std::hexfloat << nrm <<" "<GlobalSum(nrm); + // GridNormLog(real(nrm)); return nrm; } diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index 2212abbe..7c6c97de 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -411,7 +411,7 @@ public: std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl; SeedFixedIntegers(seeds); } - void SeedFixedIntegers(const std::vector &seeds){ + void SeedFixedIntegers(const std::vector &seeds, int britney=0){ // Everyone generates the same seed_seq based on input seeds CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); @@ -428,7 +428,6 @@ public: // MT implementation does not implement fast discard even though // in principle this is possible //////////////////////////////////////////////// -#if 1 thread_for( lidx, _grid->lSites(), { int gidx; @@ -449,29 +448,12 @@ public: int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; - Skip(_generators[l_idx],gidx); // Skip to next RNG sequence - }); -#else - // Everybody loops over global volume. - thread_for( gidx, _grid->_gsites, { - - // Where is it? - int rank; - int o_idx; - int i_idx; - - Coordinate gcoor; - _grid->GlobalIndexToGlobalCoor(gidx,gcoor); - _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - - // If this is one of mine we take it - if( rank == _grid->ThisRank() ){ - int l_idx=generator_idx(o_idx,i_idx); - _generators[l_idx] = master_engine; + if ( britney ) { + Skip(_generators[l_idx],l_idx); // Skip to next RNG sequence + } else { Skip(_generators[l_idx],gidx); // Skip to next RNG sequence } }); -#endif #else //////////////////////////////////////////////////////////////// // Machine and thread decomposition dependent seeding is efficient diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 9a0b4376..b47c240c 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -86,11 +86,83 @@ NAMESPACE_BEGIN(Grid); static Coordinate Grid_default_latt; static Coordinate Grid_default_mpi; + +/////////////////////////////////////////////////////// +// Grid Norm logging for repro testing +/////////////////////////////////////////////////////// +int GridNormLoggingMode; +int32_t GridNormLoggingCounter; +std::vector GridNormLogVector; + +void SetGridNormLoggingMode(GridNormLoggingMode_t mode) +{ + switch ( mode ) { + case GridNormLoggingModePrint: + SetGridNormLoggingModePrint(); + break; + case GridNormLoggingModeRecord: + SetGridNormLoggingModeRecord(); + break; + case GridNormLoggingModeVerify: + SetGridNormLoggingModeVerify(); + break; + case GridNormLoggingModeNone: + GridNormLoggingMode = mode; + GridNormLoggingCounter=0; + GridNormLogVector.resize(0); + break; + default: + assert(0); + } +} + +void SetGridNormLoggingModePrint(void) +{ + GridNormLoggingCounter = 0; + GridNormLogVector.resize(0); + GridNormLoggingMode = GridNormLoggingModePrint; +} +void SetGridNormLoggingModeRecord(void) +{ + GridNormLoggingCounter = 0; + GridNormLogVector.resize(0); + GridNormLoggingMode = GridNormLoggingModeRecord; +} +void SetGridNormLoggingModeVerify(void) +{ + GridNormLoggingCounter = 0; + GridNormLoggingMode = GridNormLoggingModeVerify; +} +void GridNormLog(double value) +{ + if(GridNormLoggingMode == GridNormLoggingModePrint) { + std::cerr<<"GridNormLog : "<< GridNormLoggingCounter <<" " << std::hexfloat << value < GridNormLogVector; +void SetGridNormLoggingModePrint(void); +void SetGridNormLoggingModeRecord(void); +void SetGridNormLoggingModeVerify(void); +void SetGridNormLoggingMode(GridNormLoggingMode_t mode); +void GridNormLog(double value); + NAMESPACE_END(Grid); diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index 13cc0bb6..fb1fa59a 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -104,6 +104,11 @@ int main (int argc, char ** argv) csumref=0; int iter=0; do { + if ( iter == 0 ) { + SetGridNormLoggingMode(GridNormLoggingModeRecord); + } else { + SetGridNormLoggingMode(GridNormLoggingModeVerify); + } std::cerr << "******************* SINGLE PRECISION SOLVE "< Date: Tue, 12 Mar 2024 16:11:33 +0000 Subject: [PATCH 5/5] Repro test --- systems/Aurora/tests/repro128.pbs | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 systems/Aurora/tests/repro128.pbs diff --git a/systems/Aurora/tests/repro128.pbs b/systems/Aurora/tests/repro128.pbs new file mode 100644 index 00000000..34e2edc5 --- /dev/null +++ b/systems/Aurora/tests/repro128.pbs @@ -0,0 +1,41 @@ +#!/bin/bash + +## qsub -q EarlyAppAccess -A Aurora_Deployment -I -l select=1 -l walltime=60:00 + +#PBS -q EarlyAppAccess +#PBS -l select=128 +#PBS -l walltime=02:00:00 +#PBS -A LatticeQCD_aesp_CNDA + +#export OMP_PROC_BIND=spread +#unset OMP_PLACES + +cd $PBS_O_WORKDIR + +source ../sourceme.sh + +cat $PBS_NODEFILE + +export OMP_NUM_THREADS=3 +export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1 + +#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE +#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE +#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST + +export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0 +export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0 +export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1 +export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576 +export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072 +export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16 +export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16 +export MPICH_OFI_NIC_POLICY=GPU + +# 12 ppn, 16 nodes, 192 ranks +# 12 ppn, 128 nodes, 1536 ranks +CMD="mpiexec -np 1536 -ppn 12 -envall \ + ./gpu_tile_compact.sh \ + ./Test_dwf_mixedcg_prec --mpi 4.4.4.24 --grid 128.128.128.384 \ + --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 7000 --comms-overlap " +$CMD