mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-13 01:05:36 +00:00
SYCL updates -- operator = giving trouble on Aurora.
SYCL reduction is failing intermittently with SVM interface - returns zero, expect non-zero. Think I need to remove ALL dependence on SVM.
This commit is contained in:
parent
aa67a5b095
commit
622f78ebea
@ -236,10 +236,13 @@ public:
|
|||||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
vobj vtmp;
|
vobj vtmp;
|
||||||
vtmp = r;
|
vtmp = r;
|
||||||
#if defined(GRID_HIP) || defined(GRID_CUDA) || defined (GRID_SYCL)
|
#if 0
|
||||||
|
deviceVector<vobj> vvtmp(1);
|
||||||
|
acceleratorPut(vvtmp[0],vtmp);
|
||||||
|
vobj *vvtmp_p = & vvtmp[0];
|
||||||
auto me = View(AcceleratorWrite);
|
auto me = View(AcceleratorWrite);
|
||||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||||
auto stmp=coalescedRead(vtmp);
|
auto stmp=coalescedRead(*vvtmp_p);
|
||||||
coalescedWrite(me[ss],stmp);
|
coalescedWrite(me[ss],stmp);
|
||||||
});
|
});
|
||||||
#else
|
#else
|
||||||
|
@ -4,16 +4,36 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Possibly promote to double and sum
|
// Possibly promote to double and sum
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
template <class vobj>
|
template <class vobj>
|
||||||
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
|
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
typedef typename vobj::scalar_objectD sobjD;
|
typedef typename vobj::scalar_objectD sobjD;
|
||||||
|
#if 1
|
||||||
|
sobj identity; zeroit(identity);
|
||||||
|
sobj ret; zeroit(ret);
|
||||||
|
Integer nsimd= vobj::Nsimd();
|
||||||
|
{
|
||||||
|
sycl::buffer<sobj, 1> abuff(&ret, {1});
|
||||||
|
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
||||||
|
auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::plus<>());
|
||||||
|
cgh.parallel_for(cl::sycl::range<1>{osites},
|
||||||
|
Reduction,
|
||||||
|
[=] (cl::sycl::id<1> item, auto &sum) {
|
||||||
|
auto osite = item[0];
|
||||||
|
sum +=Reduce(lat[osite]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
sobjD dret; convertType(dret,ret);
|
||||||
|
return dret;
|
||||||
|
#else
|
||||||
static Vector<sobj> mysum;
|
static Vector<sobj> mysum;
|
||||||
mysum.resize(1);
|
mysum.resize(1);
|
||||||
sobj *mysum_p = & mysum[0];
|
sobj *mysum_p = & mysum[0];
|
||||||
sobj identity; zeroit(identity);
|
sobj identity; zeroit(identity);
|
||||||
mysum[0] = identity;
|
acceleratorPut(mysum[0],identity);
|
||||||
sobj ret ;
|
sobj ret ;
|
||||||
|
|
||||||
Integer nsimd= vobj::Nsimd();
|
Integer nsimd= vobj::Nsimd();
|
||||||
@ -33,6 +53,7 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
|
|||||||
// free(mysum,*theGridAccelerator);
|
// free(mysum,*theGridAccelerator);
|
||||||
sobjD dret; convertType(dret,ret);
|
sobjD dret; convertType(dret,ret);
|
||||||
return dret;
|
return dret;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class vobj>
|
template <class vobj>
|
||||||
@ -76,12 +97,28 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
|
|||||||
|
|
||||||
template<class Word> Word svm_xor(Word *vec,uint64_t L)
|
template<class Word> Word svm_xor(Word *vec,uint64_t L)
|
||||||
{
|
{
|
||||||
Word xorResult; xorResult = 0;
|
#if 1
|
||||||
|
Word identity; identity=0;
|
||||||
|
Word ret = 0;
|
||||||
|
{
|
||||||
|
sycl::buffer<Word, 1> abuff(&ret, {1});
|
||||||
|
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
||||||
|
auto Reduction = cl::sycl::reduction(abuff,cgh,identity,std::bit_xor<>());
|
||||||
|
cgh.parallel_for(cl::sycl::range<1>{L},
|
||||||
|
Reduction,
|
||||||
|
[=] (cl::sycl::id<1> index, auto &sum) {
|
||||||
|
sum ^=vec[index];
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
theGridAccelerator->wait();
|
||||||
|
return ret;
|
||||||
|
#else
|
||||||
static Vector<Word> d_sum;
|
static Vector<Word> d_sum;
|
||||||
d_sum.resize(1);
|
d_sum.resize(1);
|
||||||
Word *d_sum_p=&d_sum[0];
|
Word *d_sum_p=&d_sum[0];
|
||||||
Word identity; identity=0;
|
Word identity; identity=0;
|
||||||
d_sum[0] = identity;
|
acceleratorPut(d_sum[0],identity);
|
||||||
const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
|
const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
|
||||||
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
|
||||||
auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList);
|
auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList);
|
||||||
@ -92,9 +129,10 @@ template<class Word> Word svm_xor(Word *vec,uint64_t L)
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
theGridAccelerator->wait();
|
theGridAccelerator->wait();
|
||||||
Word ret = d_sum[0];
|
Word ret = acceleratorGet(d_sum[0]);
|
||||||
// free(d_sum,*theGridAccelerator);
|
// free(d_sum,*theGridAccelerator);
|
||||||
return ret;
|
return ret;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
Loading…
Reference in New Issue
Block a user