mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Script updates for current summit
This commit is contained in:
		@@ -7,6 +7,7 @@
 | 
				
			|||||||
	       --enable-setdevice \
 | 
						       --enable-setdevice \
 | 
				
			||||||
	       --disable-fermion-reps \
 | 
						       --disable-fermion-reps \
 | 
				
			||||||
	       --enable-accelerator=cuda \
 | 
						       --enable-accelerator=cuda \
 | 
				
			||||||
 | 
						       --disable-accelerator-cshift \
 | 
				
			||||||
	       --prefix /ccs/home/paboyle/prefix \
 | 
						       --prefix /ccs/home/paboyle/prefix \
 | 
				
			||||||
	       CXX=nvcc \
 | 
						       CXX=nvcc \
 | 
				
			||||||
	       LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \
 | 
						       LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,25 +1,39 @@
 | 
				
			|||||||
#!/bin/bash
 | 
					#!/bin/bash
 | 
				
			||||||
#BSUB -P LGT104
 | 
					#BSUB -P LGT104
 | 
				
			||||||
#BSUB -W 2:00
 | 
					#BSUB -W 0:20
 | 
				
			||||||
#BSUB -nnodes 16
 | 
					#BSUB -nnodes 16
 | 
				
			||||||
#BSUB -J DWF
 | 
					#BSUB -J DWF
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export OMP_NUM_THREADS=6
 | 
					export OMP_NUM_THREADS=6
 | 
				
			||||||
export PAMI_IBV_ADAPTER_AFFINITY=1
 | 
					export PAMI_IBV_ADAPTER_AFFINITY=1
 | 
				
			||||||
export PAMI_ENABLE_STRIPING=1
 | 
					export PAMI_ENABLE_STRIPING=1
 | 
				
			||||||
export OPT="--comms-concurrent --comms-overlap "
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
APP="./benchmarks/Benchmark_comms_host_device  --mpi 4.4.4.3 "
 | 
					DIR=.
 | 
				
			||||||
jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.16node.log
 | 
					source sourceme.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
APP="./benchmarks/Benchmark_dwf_fp32 --grid 96.96.96.72 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT "
 | 
					echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
 | 
				
			||||||
jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.24.log
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
APP="./benchmarks/Benchmark_dwf_fp32 --grid 128.128.128.96 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT "
 | 
					VOLS=( 32.32.32.16 32.32.32.64 64.32.32.64 64.32.64.64 64.64.64.64 64.64.64.128  64.64.64.256  64.64.64.512 128.64.64.64.512)
 | 
				
			||||||
jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.32.log
 | 
					MPI=( 1.1.1.1      1.1.1.4     2.1.1.4         2.1.2.4     2.2.2.4      2.2.2.8      2.2.2.16      2.2.2.32 4.4.2.32 )
 | 
				
			||||||
 | 
					RANKS=(     1            4           8              16          32          64            128           256 1024)
 | 
				
			||||||
 | 
					NODES=(     1            1           2               4           8           16            32            64  128)
 | 
				
			||||||
 | 
					INTS=(      0            1           2               3           4            5             6             7    8)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for i in 5
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					    vol=${VOLS[$i]} 
 | 
				
			||||||
 | 
					    nodes=${NODES[$i]} 
 | 
				
			||||||
 | 
					    mpi=${MPI[$i]} 
 | 
				
			||||||
 | 
					    ranks=${RANKS[$i]} 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    JSRUN="jsrun --nrs $nodes -a4 -g4 -c42 -dpacked -b packed:10 --latency_priority gpu-cpu --smpiargs=-gpu"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-sequential --shm 2048 --shm-mpi 0"
 | 
				
			||||||
 | 
					    $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.seq.ker
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-overlap --shm 2048 --shm-mpi 0"
 | 
				
			||||||
 | 
					    $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.over.ker
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user