diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index fa11dd5f..74d1f585 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -202,13 +202,13 @@ void acceleratorInit(void) #ifdef GRID_SYCL -cl::sycl::queue *theGridAccelerator; -cl::sycl::queue *theCopyAccelerator; +sycl::queue *theGridAccelerator; +sycl::queue *theCopyAccelerator; void acceleratorInit(void) { int nDevices = 1; - // cl::sycl::gpu_selector selector; - // cl::sycl::device selectedDevice { selector }; + // sycl::gpu_selector selector; + // sycl::device selectedDevice { selector }; theGridAccelerator = new sycl::queue (sycl::gpu_selector_v); theCopyAccelerator = new sycl::queue (sycl::gpu_selector_v); // theCopyAccelerator = theGridAccelerator; // Should proceed concurrenlty anyway. @@ -242,14 +242,14 @@ void acceleratorInit(void) gethostname(hostname, HOST_NAME_MAX+1); if ( rank==0 ) printf(" acceleratorInit world_rank %d is host %s \n",world_rank,hostname); - auto devices = cl::sycl::device::get_devices(); + auto devices = sycl::device::get_devices(); for(int d = 0;d().c_str()); + printf("AcceleratorSyclInit: " #prop ": %s \n",devices[d].get_info().c_str()); #define GPU_PROP_FMT(prop,FMT) \ - printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info()); + printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info()); #define GPU_PROP(prop) GPU_PROP_FMT(prop,"%ld"); if ( world_rank == 0) { diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 1cb56ddd..e37b5fb7 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -302,7 +302,7 @@ NAMESPACE_END(Grid); // Force deterministic reductions #define SYCL_REDUCTION_DETERMINISTIC -#include +#include #include #include #include @@ -314,8 +314,8 @@ inline void acceleratorMem(void) std::cout <<" SYCL acceleratorMem not implemented"<>()[2]; + return __spirv::initLocalInvocationId<3, sycl::id<3>>()[2]; #else return 0; #endif } // SYCL specific #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ - theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ + theGridAccelerator->submit([&](sycl::handler &cgh) { \ unsigned long nt=acceleratorThreads(); \ if(nt < 8)nt=8; \ unsigned long unum1 = num1; \ unsigned long unum2 = num2; \ unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \ - cl::sycl::range<3> local {nt,1,nsimd}; \ - cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \ + sycl::range<3> local {nt,1,nsimd}; \ + sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \ cgh.parallel_for( \ - cl::sycl::nd_range<3>(global,local), \ - [=] (cl::sycl::nd_item<3> item) /*mutable*/ \ + sycl::nd_range<3>(global,local), \ + [=] (sycl::nd_item<3> item) /*mutable*/ \ [[intel::reqd_sub_group_size(16)]] \ { \ auto iter1 = item.get_global_id(0); \ @@ -369,8 +369,8 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccele inline int acceleratorIsCommunicable(void *ptr) { #if 0 - auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context()); - if ( uvm = cl::sycl::usm::alloc::shared ) return 1; + auto uvm = sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context()); + if ( uvm = sycl::usm::alloc::shared ) return 1; else return 0; #endif return 1;