mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
The GPU reduction was not working for me and causing errors. Need to revisit.
Gianluca is working on deterministic reduction/
This commit is contained in:
parent
6d77941990
commit
dc5024e88c
@ -59,8 +59,7 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
auto left_v = left.View();
|
auto left_v = left.View();
|
||||||
auto right_v=right.View();
|
auto right_v=right.View();
|
||||||
|
|
||||||
#ifdef GRID_NVCC
|
#if 0
|
||||||
|
|
||||||
typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t;
|
typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t;
|
||||||
thrust::plus<inner_t> binary_sum;
|
thrust::plus<inner_t> binary_sum;
|
||||||
innerProductFunctor<vobj,inner_t> binary_inner_p;
|
innerProductFunctor<vobj,inner_t> binary_inner_p;
|
||||||
@ -69,7 +68,6 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
// is there a way of using the efficient thrust reduction while maintaining memory coalescing?
|
// is there a way of using the efficient thrust reduction while maintaining memory coalescing?
|
||||||
inner_t vnrm = thrust::inner_product(thrust::device, &left_v[0], &left_v[sN], &right_v[0], zero, binary_sum, binary_inner_p);
|
inner_t vnrm = thrust::inner_product(thrust::device, &left_v[0], &left_v[sN], &right_v[0], zero, binary_sum, binary_inner_p);
|
||||||
nrm = Reduce(TensorRemove(vnrm));// sum across simd
|
nrm = Reduce(TensorRemove(vnrm));// sum across simd
|
||||||
|
|
||||||
#else
|
#else
|
||||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
||||||
int mywork, myoff;
|
int mywork, myoff;
|
||||||
|
Loading…
Reference in New Issue
Block a user