From c33a3b3b40528ad47cc308286e819eec68558030 Mon Sep 17 00:00:00 2001 From: Meifeng Lin Date: Tue, 13 Dec 2022 15:13:11 -0800 Subject: [PATCH] Fixed --accelerator-threads input to omp target thread_limit() --- Grid/threads/Accelerator.h | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index c0e07556..a7a9912d 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -26,11 +26,11 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + +#ifndef ACCELERATOR_H +#define ACCELERATOR_H + #pragma once - -//#ifndef ACCELERATOR_H -//#define ACCELERATOR_H - #include #ifdef HAVE_MALLOC_MALLOC_H @@ -437,22 +437,35 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas //OpenMP Target Offloading #ifdef OMPTARGET - -//uint32_t gpu_threads=acceleratorThreads(); +#define THREAD_LIMIT acceleratorThreads() #define accelerator #define accelerator_inline strong_inline +#ifdef THREAD_LIMIT #define accelerator_for(i,num,nsimd, ... ) \ - _Pragma("omp target teams distribute parallel for") \ + _Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT)") \ for ( uint64_t i=0;i