diff --git a/lib/threads/Pragmas.h b/lib/threads/Pragmas.h index 18fb40e2..d109bdc1 100644 --- a/lib/threads/Pragmas.h +++ b/lib/threads/Pragmas.h @@ -35,8 +35,6 @@ Author: paboyle #define strong_inline __attribute__((always_inline)) inline -#define COMMA_SAFE(...) __VA_ARGS__ - #ifdef _OPENMP #define GRID_OMP #include @@ -52,15 +50,15 @@ Author: paboyle // New primitives; explicit host thread calls, and accelerator data parallel calls ////////////////////////////////////////////////////////////////////////////////// #ifdef GRID_OMP -#define thread_loop( range , body ) _Pragma("omp parallel for schedule(static)") for range { body ; }; -#define thread_loop_in_region( range , body ) _Pragma("omp for schedule(static)") for range { body ; }; -#define thread_loop_collapse( range , body ) _Pragma("omp parallel for collapse(2)") for range { body }; +#define thread_loop( range , ... ) _Pragma("omp parallel for schedule(static)") for range { __VA_ARGS__ ; }; +#define thread_loop_in_region( range , ... ) _Pragma("omp for schedule(static)") for range { __VA_ARGS__ ; }; +#define thread_loop_collapse( range , ... ) _Pragma("omp parallel for collapse(2)") for range { __VA_ARGS__ }; #define thread_region _Pragma("omp parallel") #define thread_critical _Pragma("omp critical") #else -#define thread_loop( range , body ) for range { body ; }; -#define thread_loop_in_region( range , body ) for range { body ; }; -#define thread_loop_collapse( range , body ) for range { body ; }; +#define thread_loop( range , ... ) for range { __VA_ARGS__ ; }; +#define thread_loop_in_region( range , ... ) for range { __VA_ARGS__ ; }; +#define thread_loop_collapse( range , ... ) for range { __VA_ARGS__ ; }; #define thread_region #define thread_critical #endif @@ -88,20 +86,20 @@ Author: paboyle #define accelerator __host__ __device__ #define accelerator_inline __host__ __device__ inline // FIXME ; need to make this a CUDA kernel call -#define accelerator_loop( iterator, range, body ) \ +#define accelerator_loop( iterator, range, ... ) \ typedef decltype(range.begin()) Iterator; \ auto lambda = [&] (Iterator iterator) { \ - body; \ + __VA_ARGS__; \ }; \ for(auto it=range.begin();it static void ThreadSum( std::vector &sum_array,obj &val,int me){ sum_array[me] = val; - val=zero; + val=Zero(); ThreadBarrier(); for(int i=0;i<_threads;i++) val+= sum_array[i]; ThreadBarrier();