From 7e5bd46dd3033aab62599c4cde1d1fc6bb7af8e7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 6 Mar 2024 19:03:45 +0100 Subject: [PATCH] Booster update --- Grid/algorithms/blas/BatchedBlas.h | 5 +++-- .../implementation/StaggeredKernelsImplementation.h | 12 +----------- systems/Booster/config-command | 6 ++++-- systems/Booster/sourceme.sh | 10 +++++----- 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/Grid/algorithms/blas/BatchedBlas.h b/Grid/algorithms/blas/BatchedBlas.h index 5ab46333..f6418b7e 100644 --- a/Grid/algorithms/blas/BatchedBlas.h +++ b/Grid/algorithms/blas/BatchedBlas.h @@ -31,7 +31,7 @@ Author: Peter Boyle #include #endif #ifdef GRID_CUDA -#include +#include #endif #ifdef GRID_SYCL #include @@ -51,7 +51,7 @@ NAMESPACE_BEGIN(Grid); typedef hipblasHandle_t gridblasHandle_t; #endif #ifdef GRID_CUDA - typedef cudablasHandle_t gridblasHandle_t; + typedef cublasHandle_t gridblasHandle_t; #endif #ifdef GRID_SYCL typedef cl::sycl::queue *gridblasHandle_t; @@ -78,6 +78,7 @@ public: #ifdef GRID_CUDA std::cout << "cublasCreate"<::DhopImproved(StencilImpl &st, LebesgueOrder &lo, if( interior && exterior ) { if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;} -#ifndef GRID_CUDA if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;} +#ifndef GRID_CUDA if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;} #endif } else if( interior ) { if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;} -#ifndef GRID_CUDA if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;} -#endif } else if( exterior ) { if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;} -#ifndef GRID_CUDA if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;} -#endif } assert(0 && " Kernel optimisation case not covered "); } @@ -322,19 +318,13 @@ void StaggeredKernels::DhopNaive(StencilImpl &st, LebesgueOrder &lo, if( interior && exterior ) { if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;} -#ifndef GRID_CUDA if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;} -#endif } else if( interior ) { if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;} -#ifndef GRID_CUDA if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;} -#endif } else if( exterior ) { if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;} -#ifndef GRID_CUDA if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;} -#endif } } diff --git a/systems/Booster/config-command b/systems/Booster/config-command index 8530c5f9..1ba2dc7a 100644 --- a/systems/Booster/config-command +++ b/systems/Booster/config-command @@ -5,10 +5,12 @@ LIME=/p/home/jusers/boyle2/juwels/gm2dwf/boyle/ --enable-gen-simd-width=64 \ --enable-shm=nvlink \ --enable-accelerator=cuda \ + --disable-gparity \ + --disable-fermion-reps \ --with-lime=$LIME \ - --disable-accelerator-cshift \ + --enable-accelerator-cshift \ --disable-unified \ CXX=nvcc \ LDFLAGS="-cudart shared " \ - CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared" + CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared -lcublas" diff --git a/systems/Booster/sourceme.sh b/systems/Booster/sourceme.sh index 56499be4..2341267f 100644 --- a/systems/Booster/sourceme.sh +++ b/systems/Booster/sourceme.sh @@ -1,5 +1,5 @@ -module load GCC/9.3.0 -module load GMP/6.2.0 -module load MPFR/4.1.0 -module load OpenMPI/4.1.0rc1 -module load CUDA/11.3 +module load GCC +module load GMP +module load MPFR +module load OpenMPI +module load CUDA