From f8e880b445a4507e4f71cfdcc11cd65c0e46f01b Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 27 Jun 2018 21:49:57 +0100 Subject: [PATCH] Loop for s and xyzt offlow --- lib/threads/Pragmas.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/threads/Pragmas.h b/lib/threads/Pragmas.h index 7294b180..fb230590 100644 --- a/lib/threads/Pragmas.h +++ b/lib/threads/Pragmas.h @@ -106,6 +106,21 @@ void LambdaApply(uint64_t base, uint64_t Num, lambda Lambda) exit(0); \ } +#define accelerator_loopN( iterator, num, ... ) \ + typedef decltype(num) Iterator; \ + auto lambda = [=] accelerator (Iterator iterator) mutable { \ + __VA_ARGS__; \ + }; \ + Iterator base = 0; \ + Iterator num_block = (num+gpu_threads-1)/gpu_threads; \ + LambdaApply<<>>(base,num,lambda); \ + cudaDeviceSynchronize(); \ + cudaError err = cudaGetLastError(); \ + if ( cudaSuccess != err ) { \ + printf("Cuda error %s\n",cudaGetErrorString( err )); \ + exit(0); \ + } + #define cpu_loop( iterator, range, ... ) thread_loop( (auto iterator = range.begin();iterator __global__ @@ -141,6 +156,10 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda) #define accelerator_inline strong_inline #define accelerator_loop( iterator, range, ... ) \ thread_loop( (auto iterator = range.begin();iterator