1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Zero changes and VA_ARGS changes

This commit is contained in:
paboyle 2018-01-27 23:46:58 +00:00
parent 44ef5bc207
commit 45df59720e
2 changed files with 14 additions and 16 deletions

View File

@ -35,8 +35,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define strong_inline __attribute__((always_inline)) inline
#define COMMA_SAFE(...) __VA_ARGS__
#ifdef _OPENMP
#define GRID_OMP
#include <omp.h>
@ -52,15 +50,15 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// New primitives; explicit host thread calls, and accelerator data parallel calls
//////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_OMP
#define thread_loop( range , body ) _Pragma("omp parallel for schedule(static)") for range { body ; };
#define thread_loop_in_region( range , body ) _Pragma("omp for schedule(static)") for range { body ; };
#define thread_loop_collapse( range , body ) _Pragma("omp parallel for collapse(2)") for range { body };
#define thread_loop( range , ... ) _Pragma("omp parallel for schedule(static)") for range { __VA_ARGS__ ; };
#define thread_loop_in_region( range , ... ) _Pragma("omp for schedule(static)") for range { __VA_ARGS__ ; };
#define thread_loop_collapse( range , ... ) _Pragma("omp parallel for collapse(2)") for range { __VA_ARGS__ };
#define thread_region _Pragma("omp parallel")
#define thread_critical _Pragma("omp critical")
#else
#define thread_loop( range , body ) for range { body ; };
#define thread_loop_in_region( range , body ) for range { body ; };
#define thread_loop_collapse( range , body ) for range { body ; };
#define thread_loop( range , ... ) for range { __VA_ARGS__ ; };
#define thread_loop_in_region( range , ... ) for range { __VA_ARGS__ ; };
#define thread_loop_collapse( range , ... ) for range { __VA_ARGS__ ; };
#define thread_region
#define thread_critical
#endif
@ -88,20 +86,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define accelerator __host__ __device__
#define accelerator_inline __host__ __device__ inline
// FIXME ; need to make this a CUDA kernel call
#define accelerator_loop( iterator, range, body ) \
#define accelerator_loop( iterator, range, ... ) \
typedef decltype(range.begin()) Iterator; \
auto lambda = [&] (Iterator iterator) { \
body; \
__VA_ARGS__; \
}; \
for(auto it=range.begin();it<range.end();it++){ \
lambda(it); \
}
#define cpu_loop( iterator, range, body ) thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { body });
#define cpu_loop( iterator, range, ... ) thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { __VA_ARGS__ });
#else
#define accelerator
#define accelerator_inline strong_inline
#define accelerator_loop( iterator, range, body ) \
thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { body });
#define cpu_loop( iterator, range, body ) \
thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { body });
#define accelerator_loop( iterator, range, ... ) \
thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { __VA_ARGS__ });
#define cpu_loop( iterator, range, ... ) \
thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { __VA_ARGS__ });
#endif

View File

@ -102,7 +102,7 @@ public:
template<class obj> static void ThreadSum( std::vector<obj> &sum_array,obj &val,int me){
sum_array[me] = val;
val=zero;
val=Zero();
ThreadBarrier();
for(int i=0;i<_threads;i++) val+= sum_array[i];
ThreadBarrier();