1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 17:55:38 +00:00

cl::sycl -> SYCL

This commit is contained in:
Peter Boyle 2024-10-10 22:04:12 +00:00
parent be7a543e2c
commit c5c67b706e
2 changed files with 18 additions and 18 deletions

View File

@ -202,13 +202,13 @@ void acceleratorInit(void)
#ifdef GRID_SYCL
cl::sycl::queue *theGridAccelerator;
cl::sycl::queue *theCopyAccelerator;
sycl::queue *theGridAccelerator;
sycl::queue *theCopyAccelerator;
void acceleratorInit(void)
{
int nDevices = 1;
// cl::sycl::gpu_selector selector;
// cl::sycl::device selectedDevice { selector };
// sycl::gpu_selector selector;
// sycl::device selectedDevice { selector };
theGridAccelerator = new sycl::queue (sycl::gpu_selector_v);
theCopyAccelerator = new sycl::queue (sycl::gpu_selector_v);
// theCopyAccelerator = theGridAccelerator; // Should proceed concurrenlty anyway.
@ -242,14 +242,14 @@ void acceleratorInit(void)
gethostname(hostname, HOST_NAME_MAX+1);
if ( rank==0 ) printf(" acceleratorInit world_rank %d is host %s \n",world_rank,hostname);
auto devices = cl::sycl::device::get_devices();
auto devices = sycl::device::get_devices();
for(int d = 0;d<devices.size();d++){
#define GPU_PROP_STR(prop) \
printf("AcceleratorSyclInit: " #prop ": %s \n",devices[d].get_info<cl::sycl::info::device::prop>().c_str());
printf("AcceleratorSyclInit: " #prop ": %s \n",devices[d].get_info<sycl::info::device::prop>().c_str());
#define GPU_PROP_FMT(prop,FMT) \
printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info<cl::sycl::info::device::prop>());
printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info<sycl::info::device::prop>());
#define GPU_PROP(prop) GPU_PROP_FMT(prop,"%ld");
if ( world_rank == 0) {

View File

@ -302,7 +302,7 @@ NAMESPACE_END(Grid);
// Force deterministic reductions
#define SYCL_REDUCTION_DETERMINISTIC
#include <sycl/CL/sycl.hpp>
#include <sycl/sycl.hpp>
#include <sycl/usm.hpp>
#include <level_zero/ze_api.h>
#include <sycl/ext/oneapi/backend/level_zero.hpp>
@ -314,8 +314,8 @@ inline void acceleratorMem(void)
std::cout <<" SYCL acceleratorMem not implemented"<<std::endl;
}
extern cl::sycl::queue *theGridAccelerator;
extern cl::sycl::queue *theCopyAccelerator;
extern sycl::queue *theGridAccelerator;
extern sycl::queue *theCopyAccelerator;
#ifdef __SYCL_DEVICE_ONLY__
#define GRID_SIMT
@ -326,24 +326,24 @@ extern cl::sycl::queue *theCopyAccelerator;
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT
return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2];
return __spirv::initLocalInvocationId<3, sycl::id<3>>()[2];
#else
return 0;
#endif
} // SYCL specific
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \
theGridAccelerator->submit([&](sycl::handler &cgh) { \
unsigned long nt=acceleratorThreads(); \
if(nt < 8)nt=8; \
unsigned long unum1 = num1; \
unsigned long unum2 = num2; \
unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \
cl::sycl::range<3> local {nt,1,nsimd}; \
cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \
sycl::range<3> local {nt,1,nsimd}; \
sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \
cgh.parallel_for( \
cl::sycl::nd_range<3>(global,local), \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
sycl::nd_range<3>(global,local), \
[=] (sycl::nd_item<3> item) /*mutable*/ \
[[intel::reqd_sub_group_size(16)]] \
{ \
auto iter1 = item.get_global_id(0); \
@ -369,8 +369,8 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccele
inline int acceleratorIsCommunicable(void *ptr)
{
#if 0
auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
if ( uvm = cl::sycl::usm::alloc::shared ) return 1;
auto uvm = sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
if ( uvm = sycl::usm::alloc::shared ) return 1;
else return 0;
#endif
return 1;