mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Adding 2D loops
This commit is contained in:
parent
066be31a3b
commit
ebd730bd54
@ -95,7 +95,7 @@ void LambdaApply(uint64_t base, uint64_t Num, lambda Lambda)
|
|||||||
auto lambda = [=] accelerator (Iterator iterator) mutable { \
|
auto lambda = [=] accelerator (Iterator iterator) mutable { \
|
||||||
__VA_ARGS__; \
|
__VA_ARGS__; \
|
||||||
}; \
|
}; \
|
||||||
Iterator num = range.end(); \
|
Iterator num = range.end() - range.begin(); \
|
||||||
Iterator base = range.begin(); \
|
Iterator base = range.begin(); \
|
||||||
Iterator num_block = (num+gpu_threads-1)/gpu_threads; \
|
Iterator num_block = (num+gpu_threads-1)/gpu_threads; \
|
||||||
LambdaApply<<<num_block,gpu_threads>>>(base,num,lambda); \
|
LambdaApply<<<num_block,gpu_threads>>>(base,num,lambda); \
|
||||||
@ -108,6 +108,33 @@ void LambdaApply(uint64_t base, uint64_t Num, lambda Lambda)
|
|||||||
|
|
||||||
#define cpu_loop( iterator, range, ... ) thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { __VA_ARGS__ });
|
#define cpu_loop( iterator, range, ... ) thread_loop( (auto iterator = range.begin();iterator<range.end();iterator++), { __VA_ARGS__ });
|
||||||
|
|
||||||
|
template<typename lambda> __global__
|
||||||
|
void LambdaApply2D(uint64_t Osites, uint64_t Isites, lambda Lambda)
|
||||||
|
{
|
||||||
|
uint64_t site = threadIdx.x + blockIdx.x*blockDim.x;
|
||||||
|
uint64_t osite = site / Isites;
|
||||||
|
if ( (osite <Osites) ) {
|
||||||
|
Lambda(osite);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define coalesce_loop( iterator, range, nsimd, ... ) \
|
||||||
|
typedef uint64_t Iterator; \
|
||||||
|
auto lambda = [=] accelerator (Iterator iterator) mutable { \
|
||||||
|
__VA_ARGS__; \
|
||||||
|
}; \
|
||||||
|
Iterator num = range.end() - range.begin(); \
|
||||||
|
Iterator base = range.begin(); \
|
||||||
|
Iterator cu_threads= gpu_threads; \
|
||||||
|
Iterator cu_blocks = num*nsimd/cu_threads; \
|
||||||
|
LambdaApply2D<<<cu_blocks,cu_threads>>>(num,(uint64_t)nsimd,lambda); \
|
||||||
|
cudaDeviceSynchronize(); \
|
||||||
|
cudaError err = cudaGetLastError(); \
|
||||||
|
if ( cudaSuccess != err ) { \
|
||||||
|
printf("Cuda error %s\n",cudaGetErrorString( err )); \
|
||||||
|
exit(0); \
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define accelerator
|
#define accelerator
|
||||||
|
Loading…
Reference in New Issue
Block a user