mirror of
https://github.com/paboyle/Grid.git
synced 2025-07-15 12:36:55 +01:00
Update comms logging in Cshift
This commit is contained in:
@@ -186,6 +186,14 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
acceleratorCopyToDevice(&hrecv_buf[0],&recv_buf[0],bytes);
|
acceleratorCopyToDevice(&hrecv_buf[0],&recv_buf[0],bytes);
|
||||||
|
std::cout << GridLogComms<< " Cshift: "
|
||||||
|
<<" dim"<<dimension
|
||||||
|
<<" shift "<<shift
|
||||||
|
<< " rank "<< grid->ThisRank()
|
||||||
|
<<" Coor "<<grid->ThisProcessorCoor()
|
||||||
|
<<" send "<<hsend_buf[0]<<" to "<<xmit_to_rank
|
||||||
|
<<" recv "<<hrecv_buf[0]<<" from "<<recv_from_rank
|
||||||
|
<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
FlightRecorder::StepLog("Cshift_SendRecv_complete");
|
FlightRecorder::StepLog("Cshift_SendRecv_complete");
|
||||||
|
|
||||||
@@ -326,6 +334,15 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
recv_from_rank,
|
recv_from_rank,
|
||||||
bytes);
|
bytes);
|
||||||
acceleratorCopyToDevice((void *)&hrecv_buf[0],(void *)recv_buf_extract_mpi,bytes);
|
acceleratorCopyToDevice((void *)&hrecv_buf[0],(void *)recv_buf_extract_mpi,bytes);
|
||||||
|
|
||||||
|
std::cout << GridLogComms<< " Cshift_comms_simd: "
|
||||||
|
<<" dim"<<dimension
|
||||||
|
<<" shift "<<shift
|
||||||
|
<< " rank "<< grid->ThisRank()
|
||||||
|
<<" Coor "<<grid->ThisProcessorCoor()
|
||||||
|
<<" send "<<hsend_buf[0]<<" to "<<xmit_to_rank
|
||||||
|
<<" recv "<<hrecv_buf[0]<<" from "<<recv_from_rank
|
||||||
|
<<std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
xbytes+=bytes;
|
xbytes+=bytes;
|
||||||
|
@@ -69,6 +69,7 @@ GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
|
|||||||
GridLogger GridLogTracing(1, "Tracing", GridLogColours, "NORMAL");
|
GridLogger GridLogTracing(1, "Tracing", GridLogColours, "NORMAL");
|
||||||
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
|
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
|
||||||
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
||||||
|
GridLogger GridLogComms (1, "Comms", GridLogColours, "BLUE");
|
||||||
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
|
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
|
||||||
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
|
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
|
||||||
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
|
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
|
||||||
@@ -84,6 +85,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
|||||||
GridLogDebug.Active(0);
|
GridLogDebug.Active(0);
|
||||||
GridLogPerformance.Active(0);
|
GridLogPerformance.Active(0);
|
||||||
GridLogDslash.Active(0);
|
GridLogDslash.Active(0);
|
||||||
|
GridLogComms.Active(0);
|
||||||
GridLogIntegrator.Active(1);
|
GridLogIntegrator.Active(1);
|
||||||
GridLogColours.Active(0);
|
GridLogColours.Active(0);
|
||||||
GridLogHMC.Active(1);
|
GridLogHMC.Active(1);
|
||||||
@@ -97,6 +99,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
|||||||
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
||||||
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
|
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
|
||||||
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
|
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
|
||||||
|
if (logstreams[i] == std::string("Comms")) GridLogComms.Active(1);
|
||||||
if (logstreams[i] == std::string("NoIntegrator"))GridLogIntegrator.Active(0);
|
if (logstreams[i] == std::string("NoIntegrator"))GridLogIntegrator.Active(0);
|
||||||
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
|
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
|
||||||
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
||||||
|
@@ -180,6 +180,7 @@ extern GridLogger GridLogError;
|
|||||||
extern GridLogger GridLogWarning;
|
extern GridLogger GridLogWarning;
|
||||||
extern GridLogger GridLogMessage;
|
extern GridLogger GridLogMessage;
|
||||||
extern GridLogger GridLogDebug;
|
extern GridLogger GridLogDebug;
|
||||||
|
extern GridLogger GridLogComms;
|
||||||
extern GridLogger GridLogPerformance;
|
extern GridLogger GridLogPerformance;
|
||||||
extern GridLogger GridLogDslash;
|
extern GridLogger GridLogDslash;
|
||||||
extern GridLogger GridLogIterative;
|
extern GridLogger GridLogIterative;
|
||||||
|
@@ -219,7 +219,8 @@ int main (int argc, char ** argv)
|
|||||||
int comm_proc = mpi_layout[mu]-1;
|
int comm_proc = mpi_layout[mu]-1;
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
}
|
}
|
||||||
int tid = omp_get_thread_num();
|
// int tid = omp_get_thread_num();
|
||||||
|
int tid = 0;
|
||||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1,
|
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,1,
|
||||||
(void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid);
|
(void *)&rbuf[dir][0], recv_from_rank,1, bytes,tid);
|
||||||
|
|
||||||
|
@@ -1,7 +1,8 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
##PBS -q EarlyAppAccess
|
|
||||||
#PBS -q debug
|
#PBS -q debug
|
||||||
|
#PBS -l filesystems=flare
|
||||||
|
#PBS -l filesystems=home
|
||||||
#PBS -l select=2
|
#PBS -l select=2
|
||||||
#PBS -l walltime=00:20:00
|
#PBS -l walltime=00:20:00
|
||||||
#PBS -A LatticeQCD_aesp_CNDA
|
#PBS -A LatticeQCD_aesp_CNDA
|
||||||
@@ -14,26 +15,18 @@ cp $PBS_NODEFILE nodefile
|
|||||||
|
|
||||||
export OMP_NUM_THREADS=4
|
export OMP_NUM_THREADS=4
|
||||||
export MPICH_OFI_NIC_POLICY=GPU
|
export MPICH_OFI_NIC_POLICY=GPU
|
||||||
|
export MPICH_CH4_SHM=XPMEM
|
||||||
#export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
|
export MPIR_CVAR_DEBUG_SUMMARY=1
|
||||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
|
export MPICH_DBG_LEVEL=VERBOSE
|
||||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
|
export MPICH_DBG_CLASS=ALL
|
||||||
#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
|
|
||||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
|
|
||||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
|
|
||||||
#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
|
|
||||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
|
|
||||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
|
|
||||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
|
|
||||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Local vol 16.16.16.32
|
# Local vol 16.16.16.32
|
||||||
#
|
#
|
||||||
|
|
||||||
#VOL=32.64.64.96
|
#VOL=32.64.64.96
|
||||||
|
mpiexec -np 1 -ppn 1 -envall mpivars
|
||||||
|
|
||||||
for VOL in 32.32.32.96 32.64.64.96
|
for VOL in 32.32.32.96
|
||||||
do
|
do
|
||||||
for AT in 32
|
for AT in 32
|
||||||
do
|
do
|
||||||
|
@@ -1,26 +1,27 @@
|
|||||||
#Ahead of time compile for PVC
|
export MPFR=`spack find --paths mpfr | grep ^mpfr | awk '{print $2}' `
|
||||||
|
export GMP=`spack find --paths gmp | grep ^gmp | awk '{print $2}' `
|
||||||
|
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
|
||||||
|
export UNWIND=`spack find --paths libunwind | grep ^libunwind | awk '{print $2}' `
|
||||||
|
|
||||||
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -lnuma -L/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/lib -fPIC -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc"
|
../../configure \
|
||||||
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -I/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/include/ -fPIC"
|
|
||||||
|
|
||||||
#JIT compile
|
|
||||||
#export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl "
|
|
||||||
#export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions "
|
|
||||||
|
|
||||||
../configure \
|
|
||||||
--enable-simd=GPU \
|
--enable-simd=GPU \
|
||||||
--enable-reduction=grid \
|
|
||||||
--enable-gen-simd-width=64 \
|
--enable-gen-simd-width=64 \
|
||||||
--enable-comms=mpi-auto \
|
--enable-comms=mpi-auto \
|
||||||
--enable-debug \
|
|
||||||
--prefix $HOME/gpt-install \
|
|
||||||
--disable-gparity \
|
--disable-gparity \
|
||||||
--disable-fermion-reps \
|
--disable-fermion-reps \
|
||||||
--with-lime=$CLIME \
|
|
||||||
--enable-shm=nvlink \
|
--enable-shm=nvlink \
|
||||||
--enable-accelerator=sycl \
|
--enable-accelerator=sycl \
|
||||||
--enable-accelerator-aware-mpi=no\
|
--enable-accelerator-aware-mpi=no \
|
||||||
--enable-unified=no \
|
--enable-unified=no \
|
||||||
|
--enable-debug \
|
||||||
|
--with-lime=$CLIME \
|
||||||
|
--with-gmp=$GMP \
|
||||||
|
--with-mpfr=$MPFR \
|
||||||
|
--with-unwind=$UNWIND \
|
||||||
MPICXX=mpicxx \
|
MPICXX=mpicxx \
|
||||||
CXX=icpx
|
CXX=icpx \
|
||||||
|
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -lsycl -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc -lnuma" \
|
||||||
|
CXXFLAGS="-fPIC -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,16 +1,13 @@
|
|||||||
#module load oneapi/release/2023.12.15.001
|
|
||||||
#module load mpich/icc-all-debug-pmix-gpu/52.2
|
|
||||||
#module load mpich-config/mode/deterministic
|
|
||||||
#module load intel_compute_runtime/release/821.35
|
|
||||||
module load pti-gpu
|
|
||||||
|
|
||||||
source ~/spack/share/spack/setup-env.sh
|
|
||||||
spack load c-lime
|
|
||||||
spack load openssl
|
|
||||||
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
|
|
||||||
export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
|
export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
|
||||||
export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
|
export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
|
||||||
export http_proxy=http://proxy.alcf.anl.gov:3128
|
export http_proxy=http://proxy.alcf.anl.gov:3128
|
||||||
export https_proxy=http://proxy.alcf.anl.gov:3128
|
export https_proxy=http://proxy.alcf.anl.gov:3128
|
||||||
git config --global http.proxy http://proxy.alcf.anl.gov:3128
|
git config --global http.proxy http://proxy.alcf.anl.gov:3128
|
||||||
|
|
||||||
|
source ~/spack/share/spack/setup-env.sh
|
||||||
|
spack load c-lime
|
||||||
|
spack load openssl@3.3.1%gcc@12.2.0
|
||||||
|
spack load unwind
|
||||||
|
export UNWIND=`spack find --paths libunwind | grep ^libunwind | awk '{print $2}' `
|
||||||
|
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
|
||||||
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
|
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
|
||||||
|
Reference in New Issue
Block a user