1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 15:27:06 +01:00

Fixed --accelerator-threads input to omp target thread_limit()

This commit is contained in:
Meifeng Lin
2022-12-13 15:13:11 -08:00
parent c2f8ba194e
commit c33a3b3b40

View File

@ -26,11 +26,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef ACCELERATOR_H
#define ACCELERATOR_H
#pragma once #pragma once
//#ifndef ACCELERATOR_H
//#define ACCELERATOR_H
#include <string.h> #include <string.h>
#ifdef HAVE_MALLOC_MALLOC_H #ifdef HAVE_MALLOC_MALLOC_H
@ -437,22 +437,35 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
//OpenMP Target Offloading //OpenMP Target Offloading
#ifdef OMPTARGET #ifdef OMPTARGET
#define THREAD_LIMIT acceleratorThreads()
//uint32_t gpu_threads=acceleratorThreads();
#define accelerator #define accelerator
#define accelerator_inline strong_inline #define accelerator_inline strong_inline
#ifdef THREAD_LIMIT
#define accelerator_for(i,num,nsimd, ... ) \ #define accelerator_for(i,num,nsimd, ... ) \
_Pragma("omp target teams distribute parallel for") \ _Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT)") \
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ; for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define accelerator_forNB(i,num,nsimd, ... ) \ #define accelerator_forNB(i,num,nsimd, ... ) \
_Pragma("omp target teams distribute parallel for nowait") \ _Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT) nowait") \
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ; for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define accelerator_barrier(dummy) _Pragma("omp barrier") #define accelerator_barrier(dummy) _Pragma("omp barrier")
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \ #define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \
_Pragma("omp target teams distribute parallel for collapse(2)") \ _Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT) collapse(2)") \
for ( uint64_t iter1=0;iter1<num1;iter1++) \ for ( uint64_t iter1=0;iter1<num1;iter1++) \
for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ; for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ;
#else
#define accelerator_for(i,num,nsimd, ... ) \
_Pragma("omp target teams distribute parallel for") \
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define accelerator_forNB(i,num,nsimd, ... ) \
_Pragma("omp target teams distribute parallel for nowait") \
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define accelerator_barrier(dummy) _Pragma("omp barrier")
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \
_Pragma("omp target teams distribute parallel for collapse(2)") \
for ( uint64_t iter1=0;iter1<num1;iter1++) \
for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ;
#endif
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) {;} inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) {;}
@ -577,5 +590,5 @@ accelerator_inline void acceleratorFence(void)
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);
//#endif #endif