1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-16 14:57:05 +01:00

Deprecate UVM

This commit is contained in:
2024-09-17 13:34:27 +00:00
parent 11be10d2c0
commit 066544281f
75 changed files with 668 additions and 1082 deletions

View File

@ -10,7 +10,7 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_objectD sobjD;
#if 1
sobj identity; zeroit(identity);
sobj ret; zeroit(ret);
Integer nsimd= vobj::Nsimd();
@ -28,32 +28,6 @@ inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer os
}
sobjD dret; convertType(dret,ret);
return dret;
#else
static Vector<sobj> mysum;
mysum.resize(1);
sobj *mysum_p = & mysum[0];
sobj identity; zeroit(identity);
acceleratorPut(mysum[0],identity);
sobj ret ;
Integer nsimd= vobj::Nsimd();
const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(mysum_p,identity,std::plus<>(),PropList);
cgh.parallel_for(cl::sycl::range<1>{osites},
Reduction,
[=] (cl::sycl::id<1> item, auto &sum) {
auto osite = item[0];
sum +=Reduce(lat[osite]);
});
});
theGridAccelerator->wait();
ret = mysum[0];
// free(mysum,*theGridAccelerator);
sobjD dret; convertType(dret,ret);
return dret;
#endif
}
template <class vobj>
@ -97,7 +71,6 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
template<class Word> Word svm_xor(Word *vec,uint64_t L)
{
#if 1
Word identity; identity=0;
Word ret = 0;
{
@ -113,60 +86,7 @@ template<class Word> Word svm_xor(Word *vec,uint64_t L)
}
theGridAccelerator->wait();
return ret;
#else
static Vector<Word> d_sum;
d_sum.resize(1);
Word *d_sum_p=&d_sum[0];
Word identity; identity=0;
acceleratorPut(d_sum[0],identity);
const cl::sycl::property_list PropList ({ cl::sycl::property::reduction::initialize_to_identity() });
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(d_sum_p,identity,std::bit_xor<>(),PropList);
cgh.parallel_for(cl::sycl::range<1>{L},
Reduction,
[=] (cl::sycl::id<1> index, auto &sum) {
sum^=vec[index];
});
});
theGridAccelerator->wait();
Word ret = acceleratorGet(d_sum[0]);
// free(d_sum,*theGridAccelerator);
return ret;
#endif
}
NAMESPACE_END(Grid);
/*
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_repack(const vobj *lat, Integer osites)
{
typedef typename vobj::vector_type vector;
typedef typename vobj::scalar_type scalar;
typedef typename vobj::scalar_typeD scalarD;
typedef typename vobj::scalar_objectD sobjD;
sobjD ret;
scalarD *ret_p = (scalarD *)&ret;
const int nsimd = vobj::Nsimd();
const int words = sizeof(vobj)/sizeof(vector);
Vector<scalar> buffer(osites*nsimd);
scalar *buf = &buffer[0];
vector *dat = (vector *)lat;
for(int w=0;w<words;w++) {
accelerator_for(ss,osites,nsimd,{
int lane = acceleratorSIMTlane(nsimd);
buf[ss*nsimd+lane] = dat[ss*words+w].getlane(lane);
});
//Precision change at this point is to late to gain precision
ret_p[w] = svm_reduce(buf,nsimd*osites);
}
return ret;
}
*/