mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
6ce52092e8
@ -9,34 +9,29 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
|
|||||||
{
|
{
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
typedef typename vobj::scalar_objectD sobjD;
|
typedef typename vobj::scalar_objectD sobjD;
|
||||||
// sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
|
static Vector<sobj> mysum;
|
||||||
// sobj *mysum =(sobj *) malloc(sizeof(sobj));
|
mysum.resize(1);
|
||||||
|
sobj *mysum_p = & mysum[0];
|
||||||
sobj identity; zeroit(identity);
|
sobj identity; zeroit(identity);
|
||||||
|
mysum[0] = identity;
|
||||||
sobj ret ;
|
sobj ret ;
|
||||||
|
|
||||||
Integer nsimd= vobj::Nsimd();
|
Integer nsimd= vobj::Nsimd();
|
||||||
|
|
||||||
{
|
const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
|
||||||
sycl::buffer<sobj, 1> abuff(&ret, {1});
|
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
||||||
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
auto Reduction = cl::sycl::reduction(mysum_p,identity,std::plus<>(),PropList);
|
||||||
|
cgh.parallel_for(cl::sycl::range<1>{osites},
|
||||||
auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::plus<>());
|
Reduction,
|
||||||
cgh.parallel_for(cl::sycl::range<1>{osites},
|
[=] (cl::sycl::id<1> item, auto &sum) {
|
||||||
Reduction,
|
auto osite = item[0];
|
||||||
[=] (cl::sycl::id<1> item, auto &sum) {
|
sum +=Reduce(lat[osite]);
|
||||||
auto osite = item[0];
|
});
|
||||||
sum +=Reduce(lat[osite]);
|
});
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
theGridAccelerator->wait();
|
theGridAccelerator->wait();
|
||||||
// acceleratorCopyFromDevice(mysum,&ret,sizeof(sobj));
|
ret = mysum[0];
|
||||||
// ret = mysum[0];
|
|
||||||
|
|
||||||
sobjD dret; convertType(dret,ret);
|
|
||||||
// free(mysum,*theGridAccelerator);
|
// free(mysum,*theGridAccelerator);
|
||||||
// free(mysum);
|
sobjD dret; convertType(dret,ret);
|
||||||
return dret;
|
return dret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,23 +77,22 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
|
|||||||
template<class Word> Word svm_xor(Word *vec,uint64_t L)
|
template<class Word> Word svm_xor(Word *vec,uint64_t L)
|
||||||
{
|
{
|
||||||
Word xorResult; xorResult = 0;
|
Word xorResult; xorResult = 0;
|
||||||
// Word *d_sum =(Word *)cl::sycl::malloc_shared(sizeof(Word),*theGridAccelerator);
|
static Vector<Word> d_sum;
|
||||||
|
d_sum.resize(1);
|
||||||
|
Word *d_sum_p=&d_sum[0];
|
||||||
Word identity; identity=0;
|
Word identity; identity=0;
|
||||||
Word ret;
|
d_sum[0] = identity;
|
||||||
{
|
const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
|
||||||
sycl::buffer<Word, 1> abuff(&ret, {1});
|
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
||||||
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList);
|
||||||
// auto Reduction = cl::sycl::reduction(d_sum,identity,std::bit_xor<>());
|
cgh.parallel_for(cl::sycl::range<1>{L},
|
||||||
auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::bit_xor<>());
|
Reduction,
|
||||||
cgh.parallel_for(cl::sycl::range<1>{L},
|
[=] (cl::sycl::id<1> index, auto &sum) {
|
||||||
Reduction,
|
sum^=vec[index];
|
||||||
[=] (cl::sycl::id<1> index, auto &sum) {
|
});
|
||||||
sum ^=vec[index];
|
});
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
theGridAccelerator->wait();
|
theGridAccelerator->wait();
|
||||||
// ret = d_sum[0];
|
Word ret = d_sum[0];
|
||||||
// free(d_sum,*theGridAccelerator);
|
// free(d_sum,*theGridAccelerator);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user