mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 01:35:36 +00:00
Adding build basics for different systems
This commit is contained in:
parent
9d2238148c
commit
b4690e6091
@ -182,7 +182,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
int ncall =1000;
|
int ncall =3000;
|
||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
|
12
systems/Tursa/config-command
Normal file
12
systems/Tursa/config-command
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
../../configure \
|
||||||
|
--enable-comms=mpi \
|
||||||
|
--enable-simd=GPU \
|
||||||
|
--enable-shm=nvlink \
|
||||||
|
--enable-gen-simd-width=64 \
|
||||||
|
--enable-accelerator=cuda \
|
||||||
|
--with-lime=/mnt/lustre/tursafs1/home/tc002/tc002/dc-boyl1/spack/spack/opt/spack/linux-rhel8-zen/gcc-8.4.1/c-lime-2-3-9-e6wxqrid6rqmd45z7n32dxkvkykpvyez \
|
||||||
|
--disable-accelerator-cshift \
|
||||||
|
--disable-unified \
|
||||||
|
CXX=nvcc \
|
||||||
|
LDFLAGS="-cudart shared " \
|
||||||
|
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
|
31
systems/Tursa/dwf.slurm
Normal file
31
systems/Tursa/dwf.slurm
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH -J dslash
|
||||||
|
#SBATCH -A tc002
|
||||||
|
#SBATCH -t 2:20:00
|
||||||
|
#SBATCH --nodelist=tu-c0r0n[00,03,06,09]
|
||||||
|
#SBATCH --exclusive
|
||||||
|
#SBATCH --nodes=4
|
||||||
|
#SBATCH --ntasks=16
|
||||||
|
#SBATCH --ntasks-per-node=4
|
||||||
|
#SBATCH --cpus-per-task=8
|
||||||
|
#SBATCH --time=12:00:00
|
||||||
|
#SBATCH --partition=gpu
|
||||||
|
#SBATCH --gres=gpu:4
|
||||||
|
#SBATCH --output=%x.%j.out
|
||||||
|
#SBATCH --error=%x.%j.err
|
||||||
|
|
||||||
|
export OMP_NUM_THREADS=4
|
||||||
|
export OMPI_MCA_btl=^uct,openib
|
||||||
|
export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
|
||||||
|
export UCX_RNDV_SCHEME=put_zcopy
|
||||||
|
export UCX_RNDV_THRESH=16384
|
||||||
|
export UCX_IB_GPU_DIRECT_RDMA=yes
|
||||||
|
export UCX_MEMTYPE_CACHE=n
|
||||||
|
OPT="--comms-overlap --comms-concurrent"
|
||||||
|
|
||||||
|
|
||||||
|
mpirun -np $SLURM_NTASKS -x LD_LIBRARY_PATH --bind-to none ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $OPT --mpi 2.2.2.2 --accelerator-threads 8 --grid 64.64.64.64 --shm 2048
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
17
systems/Tursa/mpiwrapper.sh
Executable file
17
systems/Tursa/mpiwrapper.sh
Executable file
@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
|
||||||
|
numa1=$(( 2 * $lrank))
|
||||||
|
numa2=$(( 2 * $lrank + 1 ))
|
||||||
|
netdev=mlx5_${lrank}:1
|
||||||
|
|
||||||
|
export CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK
|
||||||
|
export UCX_NET_DEVICES=mlx5_${lrank}:1
|
||||||
|
BINDING="--interleave=$numa1,$numa2"
|
||||||
|
|
||||||
|
echo "`hostname` - $lrank device=$CUDA_VISIBLE_DEVICES binding=$BINDING"
|
||||||
|
|
||||||
|
numactl ${BINDING} $*
|
||||||
|
|
||||||
|
|
||||||
|
|
2
systems/Tursa/sourceme.sh
Normal file
2
systems/Tursa/sourceme.sh
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
spack load c-lime
|
||||||
|
module load cuda/11.4.1 openmpi/4.1.1 ucx/1.10.1
|
Loading…
Reference in New Issue
Block a user