1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 13:57:07 +01:00

Use accelerator_for2d and DeviceSegmentedRecude to avoid kernel launch latencies

This commit is contained in:
Dennis Bollweg
2024-02-01 16:41:03 -05:00
parent caa5f97723
commit 79a6ed32d8
3 changed files with 50 additions and 26 deletions

View File

@ -44,8 +44,9 @@ int main (int argc, char ** argv) {
std::cout <<" sliceSumGpu took "<<tgpu<<" usecs"<<std::endl;
for(int t=0;t<reduction_reference.size();t++){
auto diff = reduction_reference[t]-reduction_result[t];
// std::cout << "Difference = " << diff <<std::endl;
assert(abs(TensorRemove(diff)) < 1e-8 );
}