1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-15 02:05:37 +00:00

cl::sycl -> SYCL

This commit is contained in:
Peter Boyle 2024-10-10 22:04:12 +00:00
parent be7a543e2c
commit c5c67b706e
2 changed files with 18 additions and 18 deletions

View File

@ -202,13 +202,13 @@ void acceleratorInit(void)
#ifdef GRID_SYCL #ifdef GRID_SYCL
cl::sycl::queue *theGridAccelerator; sycl::queue *theGridAccelerator;
cl::sycl::queue *theCopyAccelerator; sycl::queue *theCopyAccelerator;
void acceleratorInit(void) void acceleratorInit(void)
{ {
int nDevices = 1; int nDevices = 1;
// cl::sycl::gpu_selector selector; // sycl::gpu_selector selector;
// cl::sycl::device selectedDevice { selector }; // sycl::device selectedDevice { selector };
theGridAccelerator = new sycl::queue (sycl::gpu_selector_v); theGridAccelerator = new sycl::queue (sycl::gpu_selector_v);
theCopyAccelerator = new sycl::queue (sycl::gpu_selector_v); theCopyAccelerator = new sycl::queue (sycl::gpu_selector_v);
// theCopyAccelerator = theGridAccelerator; // Should proceed concurrenlty anyway. // theCopyAccelerator = theGridAccelerator; // Should proceed concurrenlty anyway.
@ -242,14 +242,14 @@ void acceleratorInit(void)
gethostname(hostname, HOST_NAME_MAX+1); gethostname(hostname, HOST_NAME_MAX+1);
if ( rank==0 ) printf(" acceleratorInit world_rank %d is host %s \n",world_rank,hostname); if ( rank==0 ) printf(" acceleratorInit world_rank %d is host %s \n",world_rank,hostname);
auto devices = cl::sycl::device::get_devices(); auto devices = sycl::device::get_devices();
for(int d = 0;d<devices.size();d++){ for(int d = 0;d<devices.size();d++){
#define GPU_PROP_STR(prop) \ #define GPU_PROP_STR(prop) \
printf("AcceleratorSyclInit: " #prop ": %s \n",devices[d].get_info<cl::sycl::info::device::prop>().c_str()); printf("AcceleratorSyclInit: " #prop ": %s \n",devices[d].get_info<sycl::info::device::prop>().c_str());
#define GPU_PROP_FMT(prop,FMT) \ #define GPU_PROP_FMT(prop,FMT) \
printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info<cl::sycl::info::device::prop>()); printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info<sycl::info::device::prop>());
#define GPU_PROP(prop) GPU_PROP_FMT(prop,"%ld"); #define GPU_PROP(prop) GPU_PROP_FMT(prop,"%ld");
if ( world_rank == 0) { if ( world_rank == 0) {

View File

@ -302,7 +302,7 @@ NAMESPACE_END(Grid);
// Force deterministic reductions // Force deterministic reductions
#define SYCL_REDUCTION_DETERMINISTIC #define SYCL_REDUCTION_DETERMINISTIC
#include <sycl/CL/sycl.hpp> #include <sycl/sycl.hpp>
#include <sycl/usm.hpp> #include <sycl/usm.hpp>
#include <level_zero/ze_api.h> #include <level_zero/ze_api.h>
#include <sycl/ext/oneapi/backend/level_zero.hpp> #include <sycl/ext/oneapi/backend/level_zero.hpp>
@ -314,8 +314,8 @@ inline void acceleratorMem(void)
std::cout <<" SYCL acceleratorMem not implemented"<<std::endl; std::cout <<" SYCL acceleratorMem not implemented"<<std::endl;
} }
extern cl::sycl::queue *theGridAccelerator; extern sycl::queue *theGridAccelerator;
extern cl::sycl::queue *theCopyAccelerator; extern sycl::queue *theCopyAccelerator;
#ifdef __SYCL_DEVICE_ONLY__ #ifdef __SYCL_DEVICE_ONLY__
#define GRID_SIMT #define GRID_SIMT
@ -326,24 +326,24 @@ extern cl::sycl::queue *theCopyAccelerator;
accelerator_inline int acceleratorSIMTlane(int Nsimd) { accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT #ifdef GRID_SIMT
return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; return __spirv::initLocalInvocationId<3, sycl::id<3>>()[2];
#else #else
return 0; return 0;
#endif #endif
} // SYCL specific } // SYCL specific
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ theGridAccelerator->submit([&](sycl::handler &cgh) { \
unsigned long nt=acceleratorThreads(); \ unsigned long nt=acceleratorThreads(); \
if(nt < 8)nt=8; \ if(nt < 8)nt=8; \
unsigned long unum1 = num1; \ unsigned long unum1 = num1; \
unsigned long unum2 = num2; \ unsigned long unum2 = num2; \
unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \ unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt; \
cl::sycl::range<3> local {nt,1,nsimd}; \ sycl::range<3> local {nt,1,nsimd}; \
cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \ sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd}; \
cgh.parallel_for( \ cgh.parallel_for( \
cl::sycl::nd_range<3>(global,local), \ sycl::nd_range<3>(global,local), \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \ [=] (sycl::nd_item<3> item) /*mutable*/ \
[[intel::reqd_sub_group_size(16)]] \ [[intel::reqd_sub_group_size(16)]] \
{ \ { \
auto iter1 = item.get_global_id(0); \ auto iter1 = item.get_global_id(0); \
@ -369,8 +369,8 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccele
inline int acceleratorIsCommunicable(void *ptr) inline int acceleratorIsCommunicable(void *ptr)
{ {
#if 0 #if 0
auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context()); auto uvm = sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
if ( uvm = cl::sycl::usm::alloc::shared ) return 1; if ( uvm = sycl::usm::alloc::shared ) return 1;
else return 0; else return 0;
#endif #endif
return 1; return 1;