mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 15:27:06 +01:00
Fixed --accelerator-threads input to omp target thread_limit()
This commit is contained in:
@ -26,11 +26,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#ifndef ACCELERATOR_H
|
||||||
|
#define ACCELERATOR_H
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
//#ifndef ACCELERATOR_H
|
|
||||||
//#define ACCELERATOR_H
|
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifdef HAVE_MALLOC_MALLOC_H
|
#ifdef HAVE_MALLOC_MALLOC_H
|
||||||
@ -437,22 +437,35 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
|
|||||||
|
|
||||||
//OpenMP Target Offloading
|
//OpenMP Target Offloading
|
||||||
#ifdef OMPTARGET
|
#ifdef OMPTARGET
|
||||||
|
#define THREAD_LIMIT acceleratorThreads()
|
||||||
//uint32_t gpu_threads=acceleratorThreads();
|
|
||||||
|
|
||||||
#define accelerator
|
#define accelerator
|
||||||
#define accelerator_inline strong_inline
|
#define accelerator_inline strong_inline
|
||||||
|
#ifdef THREAD_LIMIT
|
||||||
#define accelerator_for(i,num,nsimd, ... ) \
|
#define accelerator_for(i,num,nsimd, ... ) \
|
||||||
_Pragma("omp target teams distribute parallel for") \
|
_Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT)") \
|
||||||
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
|
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
|
||||||
#define accelerator_forNB(i,num,nsimd, ... ) \
|
#define accelerator_forNB(i,num,nsimd, ... ) \
|
||||||
_Pragma("omp target teams distribute parallel for nowait") \
|
_Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT) nowait") \
|
||||||
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
|
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
|
||||||
#define accelerator_barrier(dummy) _Pragma("omp barrier")
|
#define accelerator_barrier(dummy) _Pragma("omp barrier")
|
||||||
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \
|
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \
|
||||||
_Pragma("omp target teams distribute parallel for collapse(2)") \
|
_Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT) collapse(2)") \
|
||||||
for ( uint64_t iter1=0;iter1<num1;iter1++) \
|
for ( uint64_t iter1=0;iter1<num1;iter1++) \
|
||||||
for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ;
|
for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ;
|
||||||
|
#else
|
||||||
|
#define accelerator_for(i,num,nsimd, ... ) \
|
||||||
|
_Pragma("omp target teams distribute parallel for") \
|
||||||
|
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
|
||||||
|
#define accelerator_forNB(i,num,nsimd, ... ) \
|
||||||
|
_Pragma("omp target teams distribute parallel for nowait") \
|
||||||
|
for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
|
||||||
|
#define accelerator_barrier(dummy) _Pragma("omp barrier")
|
||||||
|
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \
|
||||||
|
_Pragma("omp target teams distribute parallel for collapse(2)") \
|
||||||
|
for ( uint64_t iter1=0;iter1<num1;iter1++) \
|
||||||
|
for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ;
|
||||||
|
#endif
|
||||||
|
|
||||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
|
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) {;}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) {;}
|
||||||
@ -577,5 +590,5 @@ accelerator_inline void acceleratorFence(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
//#endif
|
#endif
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user