1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

SYCL update to use buffer on reduction variable

This commit is contained in:
Peter Boyle 2024-06-08 16:05:18 +00:00
parent a6479ca50f
commit a49a161f8d

View File

@ -9,25 +9,34 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_objectD sobjD; typedef typename vobj::scalar_objectD sobjD;
sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator); // sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
// sobj *mysum =(sobj *) malloc(sizeof(sobj));
sobj identity; zeroit(identity); sobj identity; zeroit(identity);
sobj ret ; sobj ret ;
Integer nsimd= vobj::Nsimd(); Integer nsimd= vobj::Nsimd();
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { {
auto Reduction = cl::sycl::reduction(mysum,identity,std::plus<>()); sycl::buffer<sobj, 1> abuff(&ret, {1});
cgh.parallel_for(cl::sycl::range<1>{osites}, theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
Reduction,
[=] (cl::sycl::id<1> item, auto &sum) { auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::plus<>());
auto osite = item[0]; cgh.parallel_for(cl::sycl::range<1>{osites},
sum +=Reduce(lat[osite]); Reduction,
}); [=] (cl::sycl::id<1> item, auto &sum) {
}); auto osite = item[0];
sum +=Reduce(lat[osite]);
});
});
}
theGridAccelerator->wait(); theGridAccelerator->wait();
ret = mysum[0]; // acceleratorCopyFromDevice(mysum,&ret,sizeof(sobj));
free(mysum,*theGridAccelerator); // ret = mysum[0];
sobjD dret; convertType(dret,ret); sobjD dret; convertType(dret,ret);
// free(mysum,*theGridAccelerator);
// free(mysum);
return dret; return dret;
} }
@ -73,19 +82,24 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
template<class Word> Word svm_xor(Word *vec,uint64_t L) template<class Word> Word svm_xor(Word *vec,uint64_t L)
{ {
Word xorResult; xorResult = 0; Word xorResult; xorResult = 0;
Word *d_sum =(Word *)cl::sycl::malloc_shared(sizeof(Word),*theGridAccelerator); // Word *d_sum =(Word *)cl::sycl::malloc_shared(sizeof(Word),*theGridAccelerator);
Word identity; identity=0; Word identity; identity=0;
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { Word ret;
auto Reduction = cl::sycl::reduction(d_sum,identity,std::bit_xor<>()); {
cgh.parallel_for(cl::sycl::range<1>{L}, sycl::buffer<Word, 1> abuff(&ret, {1});
Reduction, theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
[=] (cl::sycl::id<1> index, auto &sum) { // auto Reduction = cl::sycl::reduction(d_sum,identity,std::bit_xor<>());
sum ^=vec[index]; auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::bit_xor<>());
}); cgh.parallel_for(cl::sycl::range<1>{L},
}); Reduction,
[=] (cl::sycl::id<1> index, auto &sum) {
sum ^=vec[index];
});
});
}
theGridAccelerator->wait(); theGridAccelerator->wait();
Word ret = d_sum[0]; // ret = d_sum[0];
free(d_sum,*theGridAccelerator); // free(d_sum,*theGridAccelerator);
return ret; return ret;
} }