mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Testig out approaches to kernel writing introducing SIMT_loop temporarily
This commit is contained in:
parent
29a244e423
commit
8adc5da7dd
@ -157,7 +157,6 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda)
|
|||||||
__VA_ARGS__; \
|
__VA_ARGS__; \
|
||||||
}; \
|
}; \
|
||||||
Iterator num = range.end() - range.begin(); \
|
Iterator num = range.end() - range.begin(); \
|
||||||
Iterator base = range.begin(); \
|
|
||||||
Iterator cu_threads= gpu_threads; \
|
Iterator cu_threads= gpu_threads; \
|
||||||
Iterator cu_blocks = num*nsimd/cu_threads; \
|
Iterator cu_blocks = num*nsimd/cu_threads; \
|
||||||
LambdaApply2D<<<cu_blocks,cu_threads>>>(num,(uint64_t)nsimd,lambda); \
|
LambdaApply2D<<<cu_blocks,cu_threads>>>(num,(uint64_t)nsimd,lambda); \
|
||||||
@ -168,6 +167,31 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda)
|
|||||||
exit(0); \
|
exit(0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename lambda> __global__
|
||||||
|
void LambdaApplySIMT(uint64_t Isites, uint64_t Osites, lambda Lambda)
|
||||||
|
{
|
||||||
|
uint64_t isite = threadIdx.y;
|
||||||
|
uint64_t osite = threadIdx.x+blockDim.x*blockIdx.x;
|
||||||
|
if ( (osite <Osites) && (isite<Isites) ) {
|
||||||
|
Lambda(isite,osite);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SIMT_loop( iterator, num, nsimd, ... ) \
|
||||||
|
typedef uint64_t Iterator; \
|
||||||
|
auto lambda = [=] accelerator (Iterator lane,Iterator iterator) mutable { \
|
||||||
|
__VA_ARGS__; \
|
||||||
|
}; \
|
||||||
|
dim3 cu_threads(gpu_threads,nsimd); \
|
||||||
|
dim3 cu_blocks ((num+gpu_threads-1)/gpu_threads); \
|
||||||
|
LambdaApplySIMT<<<cu_blocks,cu_threads>>>(nsimd,num,lambda); \
|
||||||
|
cudaDeviceSynchronize(); \
|
||||||
|
cudaError err = cudaGetLastError(); \
|
||||||
|
if ( cudaSuccess != err ) { \
|
||||||
|
printf("Cuda error %s\n",cudaGetErrorString( err )); \
|
||||||
|
exit(0); \
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define accelerator
|
#define accelerator
|
||||||
@ -186,4 +210,7 @@ void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda)
|
|||||||
|
|
||||||
#define coalesce_loop( iterator, range, nsimd, ... ) cpu_loop(iterator,range,{__VA_ARGS__})
|
#define coalesce_loop( iterator, range, nsimd, ... ) cpu_loop(iterator,range,{__VA_ARGS__})
|
||||||
|
|
||||||
|
#define SIMT_loop( iterator, num, nsimd, ... ) accelerator_loopN( iterator, num, {__VA_ARGS__})
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user