From 37d1d87c3c2842bdd4dd97050fce13a1fc2042e9 Mon Sep 17 00:00:00 2001 From: Meifeng Lin Date: Tue, 19 Dec 2023 08:03:28 -0600 Subject: [PATCH] bug fix for Intel GPUs --- Grid/threads/Accelerator.cc | 11 +++-------- benchmarks/Benchmark_su3.cc | 4 ++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index 698020a7..126f9bf3 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -14,15 +14,10 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; #define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" #define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" -#ifdef __CUDA_ARCH__ -#warning "ifdef cuda arch" -#endif // fold omptarget into device specific acceleratorInit() +#if defined(GRID_CUDA) || (defined(GRID_OMPTARGET) && defined(__CUDA_ARCH__)) #include -//#if defined(GRID_CUDA) || (defined(GRID_OMPTARGET) && defined(__CUDA_ARCH__)) -#if defined(GRID_OMPTARGET) -#warning "using cuda from opmtarget" cudaDeviceProp *gpu_props; cudaStream_t copyStream; cudaStream_t computeStream; @@ -206,7 +201,7 @@ void acceleratorInit(void) #endif -#if defined(GRID_SYCL) || (defined(GRID_OMPTARGET) && defined(__SYCL_DEVICE_ONLY__)) +#if defined(GRID_SYCL) //|| (defined(GRID_OMPTARGET) && defined(__SYCL_DEVICE_ONLY__)) cl::sycl::queue *theGridAccelerator; cl::sycl::queue *theCopyAccelerator; @@ -278,7 +273,7 @@ void acceleratorInit(void) } #endif -#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP)) && (!defined(GRID_OMPTARGET)) +#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP))// && (!defined(GRID_OMPTARGET)) void acceleratorInit(void){} #endif diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index da59893a..46a170cd 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -36,8 +36,8 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); -#define LMAX (31) -#define LMIN (31) +#define LMAX (8) +#define LMIN (8) #define LADD (8) int64_t Nwarm=500;