Protect against zero length loops giving a kernel call failure

2025-07-31 03:37:07 +01:00 · 2018-09-10 11:20:07 +01:00
parent f27b9347ff
commit b5329d8852
1 changed files with 14 additions and 12 deletions
--- a/lib/threads/Pragmas.h
+++ b/lib/threads/Pragmas.h
@@ -108,18 +108,20 @@ void LambdaApply(uint64_t base, uint64_t Num, lambda Lambda)

 #define accelerator_loopN( iterator, num, ... )			\
  typedef decltype(num) Iterator;				\
-  auto lambda = [=] accelerator (Iterator iterator) mutable {		\
-    __VA_ARGS__;							\
-  };									\
-  Iterator base = 0;							\
-  Iterator num_block  = (num+gpu_threads-1)/gpu_threads;		\
-  LambdaApply<<<num_block,gpu_threads>>>(base,num,lambda);		\
-  cudaDeviceSynchronize();						\
-  cudaError err = cudaGetLastError();					\
-  if ( cudaSuccess != err ) {						\
-    printf("Cuda error %s\n",cudaGetErrorString( err ));		\
-    exit(0);								\
-  }									
+  if ( num > 0 ) {			                        \
+    auto lambda = [=] accelerator (Iterator iterator) mutable { \
+      __VA_ARGS__;						\
+    };								\
+    Iterator base = 0;						\
+    Iterator num_block  = (num+gpu_threads-1)/gpu_threads;	\
+    LambdaApply<<<num_block,gpu_threads>>>(base,num,lambda);	\
+    cudaDeviceSynchronize();					\
+    cudaError err = cudaGetLastError();				\
+    if ( cudaSuccess != err ) {					\
+      printf("Cuda error %s\n",cudaGetErrorString( err ));	\
+      exit(0);							\
+    }								\
+  }

 #define cpu_loop( iterator, range, ... )   thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { __VA_ARGS__ });