1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-07-15 12:36:55 +01:00

Update comms logging in Cshift

This commit is contained in:
2025-07-11 14:36:10 +00:00
parent 73af020f98
commit a77cd50b2f
7 changed files with 54 additions and 41 deletions

View File

@@ -186,6 +186,14 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
recv_from_rank,
bytes);
acceleratorCopyToDevice(&hrecv_buf[0],&recv_buf[0],bytes);
std::cout << GridLogComms<< " Cshift: "
<<" dim"<<dimension
<<" shift "<<shift
<< " rank "<< grid->ThisRank()
<<" Coor "<<grid->ThisProcessorCoor()
<<" send "<<hsend_buf[0]<<" to "<<xmit_to_rank
<<" recv "<<hrecv_buf[0]<<" from "<<recv_from_rank
<<std::endl;
#endif
FlightRecorder::StepLog("Cshift_SendRecv_complete");
@@ -326,6 +334,15 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
recv_from_rank,
bytes);
acceleratorCopyToDevice((void *)&hrecv_buf[0],(void *)recv_buf_extract_mpi,bytes);
std::cout << GridLogComms<< " Cshift_comms_simd: "
<<" dim"<<dimension
<<" shift "<<shift
<< " rank "<< grid->ThisRank()
<<" Coor "<<grid->ThisProcessorCoor()
<<" send "<<hsend_buf[0]<<" to "<<xmit_to_rank
<<" recv "<<hrecv_buf[0]<<" from "<<recv_from_rank
<<std::endl;
#endif
xbytes+=bytes;

View File

@@ -69,6 +69,7 @@ GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
GridLogger GridLogTracing(1, "Tracing", GridLogColours, "NORMAL");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogComms (1, "Comms", GridLogColours, "BLUE");
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
@@ -84,6 +85,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogDebug.Active(0);
GridLogPerformance.Active(0);
GridLogDslash.Active(0);
GridLogComms.Active(0);
GridLogIntegrator.Active(1);
GridLogColours.Active(0);
GridLogHMC.Active(1);
@@ -97,6 +99,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
if (logstreams[i] == std::string("Comms")) GridLogComms.Active(1);
if (logstreams[i] == std::string("NoIntegrator"))GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);

View File

@@ -180,6 +180,7 @@ extern GridLogger GridLogError;
extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage;
extern GridLogger GridLogDebug;
extern GridLogger GridLogComms;
extern GridLogger GridLogPerformance;
extern GridLogger GridLogDslash;
extern GridLogger GridLogIterative;

View File

@@ -219,7 +219,8 @@ int main (int argc, char ** argv)
int comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
}
int tid = omp_get_thread_num();
// int tid = omp_get_thread_num();
int tid = 0;
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1,
(void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid);

View File

@@ -1,7 +1,8 @@
#!/bin/bash
##PBS -q EarlyAppAccess
#PBS -q debug
#PBS -l filesystems=flare
#PBS -l filesystems=home
#PBS -l select=2
#PBS -l walltime=00:20:00
#PBS -A LatticeQCD_aesp_CNDA
@@ -14,26 +15,18 @@ cp $PBS_NODEFILE nodefile
export OMP_NUM_THREADS=4
export MPICH_OFI_NIC_POLICY=GPU
#export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
export MPICH_CH4_SHM=XPMEM
export MPIR_CVAR_DEBUG_SUMMARY=1
export MPICH_DBG_LEVEL=VERBOSE
export MPICH_DBG_CLASS=ALL
#
# Local vol 16.16.16.32
#
#VOL=32.64.64.96
mpiexec -np 1 -ppn 1 -envall mpivars
for VOL in 32.32.32.96 32.64.64.96
for VOL in 32.32.32.96
do
for AT in 32
do

View File

@@ -1,26 +1,27 @@
#Ahead of time compile for PVC
export MPFR=`spack find --paths mpfr | grep ^mpfr | awk '{print $2}' `
export GMP=`spack find --paths gmp | grep ^gmp | awk '{print $2}' `
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
export UNWIND=`spack find --paths libunwind | grep ^libunwind | awk '{print $2}' `
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -lnuma -L/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/lib -fPIC -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc"
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -I/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/include/ -fPIC"
#JIT compile
#export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl "
#export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions "
../configure \
../../configure \
--enable-simd=GPU \
--enable-reduction=grid \
--enable-gen-simd-width=64 \
--enable-comms=mpi-auto \
--enable-debug \
--prefix $HOME/gpt-install \
--disable-gparity \
--disable-fermion-reps \
--with-lime=$CLIME \
--enable-shm=nvlink \
--enable-accelerator=sycl \
--enable-accelerator-aware-mpi=no \
--enable-unified=no \
--enable-debug \
--with-lime=$CLIME \
--with-gmp=$GMP \
--with-mpfr=$MPFR \
--with-unwind=$UNWIND \
MPICXX=mpicxx \
CXX=icpx
CXX=icpx \
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -lsycl -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc -lnuma" \
CXXFLAGS="-fPIC -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel"

View File

@@ -1,16 +1,13 @@
#module load oneapi/release/2023.12.15.001
#module load mpich/icc-all-debug-pmix-gpu/52.2
#module load mpich-config/mode/deterministic
#module load intel_compute_runtime/release/821.35
module load pti-gpu
source ~/spack/share/spack/setup-env.sh
spack load c-lime
spack load openssl
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
export http_proxy=http://proxy.alcf.anl.gov:3128
export https_proxy=http://proxy.alcf.anl.gov:3128
git config --global http.proxy http://proxy.alcf.anl.gov:3128
source ~/spack/share/spack/setup-env.sh
spack load c-lime
spack load openssl@3.3.1%gcc@12.2.0
spack load unwind
export UNWIND=`spack find --paths libunwind | grep ^libunwind | awk '{print $2}' `
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"