1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-19 08:17:05 +01:00

Pipeline mode commit on Aurora. 5+ TF/s on 16^3x32 per tile at 384

nodes.
More concurrency/fine grained scheduling is possible.
This commit is contained in:
2025-02-04 19:27:26 +00:00
parent b50fb34e71
commit 0baaddbe98
9 changed files with 151 additions and 81 deletions

View File

@ -343,9 +343,26 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorCopySynchronise(void) { theCopyAccelerator->wait(); }
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes);}
inline void acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); }
inline void acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); }
///////
// Asynch event interface
///////
typedef sycl::event acceleratorEvent_t;
inline void acceleratorEventWait(acceleratorEvent_t ev)
{
ev.wait();
}
inline int acceleratorEventIsComplete(acceleratorEvent_t ev)
{
return (ev.get_info<sycl::info::event::command_execution_status>() == sycl::info::event_command_status::complete);
}
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes);}
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();}
@ -358,8 +375,10 @@ inline int acceleratorIsCommunicable(void *ptr)
else return 0;
#endif
return 1;
}
#endif
//////////////////////////////////////////////