1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-19 16:55:37 +01:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
Peter Boyle 2024-06-11 15:16:58 -04:00
commit 6ce52092e8

View File

@ -9,34 +9,29 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_objectD sobjD; typedef typename vobj::scalar_objectD sobjD;
// sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator); static Vector<sobj> mysum;
// sobj *mysum =(sobj *) malloc(sizeof(sobj)); mysum.resize(1);
sobj *mysum_p = & mysum[0];
sobj identity; zeroit(identity); sobj identity; zeroit(identity);
mysum[0] = identity;
sobj ret ; sobj ret ;
Integer nsimd= vobj::Nsimd(); Integer nsimd= vobj::Nsimd();
{ const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
sycl::buffer<sobj, 1> abuff(&ret, {1}); theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { auto Reduction = cl::sycl::reduction(mysum_p,identity,std::plus<>(),PropList);
cgh.parallel_for(cl::sycl::range<1>{osites},
auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::plus<>()); Reduction,
cgh.parallel_for(cl::sycl::range<1>{osites}, [=] (cl::sycl::id<1> item, auto &sum) {
Reduction, auto osite = item[0];
[=] (cl::sycl::id<1> item, auto &sum) { sum +=Reduce(lat[osite]);
auto osite = item[0]; });
sum +=Reduce(lat[osite]); });
});
});
}
theGridAccelerator->wait(); theGridAccelerator->wait();
// acceleratorCopyFromDevice(mysum,&ret,sizeof(sobj)); ret = mysum[0];
// ret = mysum[0];
sobjD dret; convertType(dret,ret);
// free(mysum,*theGridAccelerator); // free(mysum,*theGridAccelerator);
// free(mysum); sobjD dret; convertType(dret,ret);
return dret; return dret;
} }
@ -82,23 +77,22 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
template<class Word> Word svm_xor(Word *vec,uint64_t L) template<class Word> Word svm_xor(Word *vec,uint64_t L)
{ {
Word xorResult; xorResult = 0; Word xorResult; xorResult = 0;
// Word *d_sum =(Word *)cl::sycl::malloc_shared(sizeof(Word),*theGridAccelerator); static Vector<Word> d_sum;
d_sum.resize(1);
Word *d_sum_p=&d_sum[0];
Word identity; identity=0; Word identity; identity=0;
Word ret; d_sum[0] = identity;
{ const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
sycl::buffer<Word, 1> abuff(&ret, {1}); theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList);
// auto Reduction = cl::sycl::reduction(d_sum,identity,std::bit_xor<>()); cgh.parallel_for(cl::sycl::range<1>{L},
auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::bit_xor<>()); Reduction,
cgh.parallel_for(cl::sycl::range<1>{L}, [=] (cl::sycl::id<1> index, auto &sum) {
Reduction, sum^=vec[index];
[=] (cl::sycl::id<1> index, auto &sum) { });
sum ^=vec[index]; });
});
});
}
theGridAccelerator->wait(); theGridAccelerator->wait();
// ret = d_sum[0]; Word ret = d_sum[0];
// free(d_sum,*theGridAccelerator); // free(d_sum,*theGridAccelerator);
return ret; return ret;
} }