mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Testig out approaches to kernel writing introducing SIMT_loop temporarily
This commit is contained in:
parent
29a244e423
commit
8adc5da7dd
@ -157,7 +157,6 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda)
|
||||
__VA_ARGS__; \
|
||||
}; \
|
||||
Iterator num = range.end() - range.begin(); \
|
||||
Iterator base = range.begin(); \
|
||||
Iterator cu_threads= gpu_threads; \
|
||||
Iterator cu_blocks = num*nsimd/cu_threads; \
|
||||
LambdaApply2D<<<cu_blocks,cu_threads>>>(num,(uint64_t)nsimd,lambda); \
|
||||
@ -168,6 +167,31 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda)
|
||||
exit(0); \
|
||||
}
|
||||
|
||||
template<typename lambda> __global__
|
||||
void LambdaApplySIMT(uint64_t Isites, uint64_t Osites, lambda Lambda)
|
||||
{
|
||||
uint64_t isite = threadIdx.y;
|
||||
uint64_t osite = threadIdx.x+blockDim.x*blockIdx.x;
|
||||
if ( (osite <Osites) && (isite<Isites) ) {
|
||||
Lambda(isite,osite);
|
||||
}
|
||||
}
|
||||
|
||||
#define SIMT_loop( iterator, num, nsimd, ... ) \
|
||||
typedef uint64_t Iterator; \
|
||||
auto lambda = [=] accelerator (Iterator lane,Iterator iterator) mutable { \
|
||||
__VA_ARGS__; \
|
||||
}; \
|
||||
dim3 cu_threads(gpu_threads,nsimd); \
|
||||
dim3 cu_blocks ((num+gpu_threads-1)/gpu_threads); \
|
||||
LambdaApplySIMT<<<cu_blocks,cu_threads>>>(nsimd,num,lambda); \
|
||||
cudaDeviceSynchronize(); \
|
||||
cudaError err = cudaGetLastError(); \
|
||||
if ( cudaSuccess != err ) { \
|
||||
printf("Cuda error %s\n",cudaGetErrorString( err )); \
|
||||
exit(0); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define accelerator
|
||||
@ -186,4 +210,7 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda)
|
||||
|
||||
#define coalesce_loop( iterator, range, nsimd, ... ) cpu_loop(iterator,range,{__VA_ARGS__})
|
||||
|
||||
#define SIMT_loop( iterator, num, nsimd, ... ) accelerator_loopN( iterator, num, {__VA_ARGS__})
|
||||
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user