From 8adc5da7ddcac95ddb654eba8200b20127ee5ac8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 8 Jun 2019 13:47:04 +0100 Subject: [PATCH] Testig out approaches to kernel writing introducing SIMT_loop temporarily --- Grid/threads/Pragmas.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Grid/threads/Pragmas.h b/Grid/threads/Pragmas.h index 28eaebf3..737691e9 100644 --- a/Grid/threads/Pragmas.h +++ b/Grid/threads/Pragmas.h @@ -157,7 +157,6 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda) __VA_ARGS__; \ }; \ Iterator num = range.end() - range.begin(); \ - Iterator base = range.begin(); \ Iterator cu_threads= gpu_threads; \ Iterator cu_blocks = num*nsimd/cu_threads; \ LambdaApply2D<<>>(num,(uint64_t)nsimd,lambda); \ @@ -168,6 +167,31 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda) exit(0); \ } +template __global__ +void LambdaApplySIMT(uint64_t Isites, uint64_t Osites, lambda Lambda) +{ + uint64_t isite = threadIdx.y; + uint64_t osite = threadIdx.x+blockDim.x*blockIdx.x; + if ( (osite >>(nsimd,num,lambda); \ + cudaDeviceSynchronize(); \ + cudaError err = cudaGetLastError(); \ + if ( cudaSuccess != err ) { \ + printf("Cuda error %s\n",cudaGetErrorString( err )); \ + exit(0); \ + } + #else #define accelerator @@ -186,4 +210,7 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda) #define coalesce_loop( iterator, range, nsimd, ... ) cpu_loop(iterator,range,{__VA_ARGS__}) +#define SIMT_loop( iterator, num, nsimd, ... ) accelerator_loopN( iterator, num, {__VA_ARGS__}) + + #endif