module load gcc/9.3.0 module load cuda/11.4.1 module load openmpi/4.1.1-cuda11.4 export QUDA_RESOURCE_PATH=$(pwd)/tuning export OMP_NUM_THREADS=4 export OMPI_MCA_btl=^uct,openib export OMPI_MCA_pml=ucx # by fabian. no idea what this is #export UCX_TLS=rc,rc_x,sm,cuda_copy,cuda_ipc,gdr_copy export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc export UCX_RNDV_THRESH=16384 export UCX_RNDV_SCHEME=put_zcopy export UCX_IB_GPU_DIRECT_RDMA=yes export UCX_MEMTYPE_CACHE=n export OMPI_MCA_io=romio321 export OMPI_MCA_btl_openib_allow_ib=true export OMPI_MCA_btl_openib_device_type=infiniband export OMPI_MCA_btl_openib_if_exclude=mlx5_1,mlx5_2,mlx5_3 export QUDA_REORDER_LOCATION=GPU # this is the default anyway