mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 13:57:07 +01:00
Use accelerator_for2d and DeviceSegmentedRecude to avoid kernel launch latencies
This commit is contained in:
@ -44,8 +44,9 @@ int main (int argc, char ** argv) {
|
||||
std::cout <<" sliceSumGpu took "<<tgpu<<" usecs"<<std::endl;
|
||||
|
||||
for(int t=0;t<reduction_reference.size();t++){
|
||||
|
||||
auto diff = reduction_reference[t]-reduction_result[t];
|
||||
// std::cout << "Difference = " << diff <<std::endl;
|
||||
|
||||
assert(abs(TensorRemove(diff)) < 1e-8 );
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user