1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

Adding build basics for different systems

This commit is contained in:
Peter Boyle 2021-09-16 00:00:38 +01:00
parent 9d2238148c
commit b4690e6091
5 changed files with 63 additions and 1 deletions

View File

@ -182,7 +182,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
int ncall =1000; int ncall =3000;
if (1) { if (1) {
FGrid->Barrier(); FGrid->Barrier();

View File

@ -0,0 +1,12 @@
../../configure \
--enable-comms=mpi \
--enable-simd=GPU \
--enable-shm=nvlink \
--enable-gen-simd-width=64 \
--enable-accelerator=cuda \
--with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
--disable-accelerator-cshift \
--disable-unified \
CXX=nvcc \
LDFLAGS="-cudart shared " \
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"

31
systems/Tursa/dwf.slurm Normal file
View File

@ -0,0 +1,31 @@
#!/bin/bash
#SBATCH -J dslash
#SBATCH -A tc002
#SBATCH -t 2:20:00
#SBATCH --nodelist=tu-c0r0n[00,03,06,09]
#SBATCH --exclusive
#SBATCH --nodes=4
#SBATCH --ntasks=16
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --time=12:00:00
#SBATCH --partition=gpu
#SBATCH --gres=gpu:4
#SBATCH --output=%x.%j.out
#SBATCH --error=%x.%j.err
export OMP_NUM_THREADS=4
export OMPI_MCA_btl=^uct,openib
export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
export UCX_RNDV_SCHEME=put_zcopy
export UCX_RNDV_THRESH=16384
export UCX_IB_GPU_DIRECT_RDMA=yes
export UCX_MEMTYPE_CACHE=n
OPT="--comms-overlap --comms-concurrent"
mpirun -np $SLURM_NTASKS -x LD_LIBRARY_PATH --bind-to none ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $OPT --mpi 2.2.2.2 --accelerator-threads 8 --grid 64.64.64.64 --shm 2048

17
systems/Tursa/mpiwrapper.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/bash
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
numa1=$(( 2 * $lrank))
numa2=$(( 2 * $lrank + 1 ))
netdev=mlx5_${lrank}:1
export CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK
export UCX_NET_DEVICES=mlx5_${lrank}:1
BINDING="--interleave=$numa1,$numa2"
echo "`hostname` - $lrank device=$CUDA_VISIBLE_DEVICES binding=$BINDING"
numactl ${BINDING} $*

View File

@ -0,0 +1,2 @@
spack load c-lime
module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1