From 074627a5bd7b2fa98343d5c6404f8bbec66d43dd Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 17 Apr 2023 21:50:52 +0000 Subject: [PATCH] Pass file descriptors through AF_UNIX for level_zero --- Grid/communicator/SharedMemoryMPI.cc | 157 +++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 12 deletions(-) diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index ad60ecb0..335404c2 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -27,6 +27,8 @@ Author: Christoph Lehner *************************************************************************************/ /* END LEGAL */ +#define header "SharedMemoryMpi: " + #include #include @@ -37,13 +39,119 @@ Author: Christoph Lehner #include #endif #ifdef GRID_SYCL -#include #define GRID_SYCL_LEVEL_ZERO_IPC +#include +#define SHM_SOCKETS +#endif + +#include +#include + +NAMESPACE_BEGIN(Grid); + +#ifdef SHM_SOCKETS + +/* + * Barbaric extra intranode communication route in case we need sockets to pass FDs + * Forced by level_zero not being nicely designed + */ +static int sock; +static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d"; +static char sock_path[256]; +class UnixSockets { +public: + static void Open(int rank) + { + int errnum; + + sock = socket(AF_UNIX, SOCK_DGRAM, 0); assert(sock>0); + + struct sockaddr_un sa_un = { 0 }; + sa_un.sun_family = AF_UNIX; + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank); + unlink(sa_un.sun_path); + if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) { + perror("bind failure"); + exit(EXIT_FAILURE); + } + } + + static int RecvFileDescriptor(void) + { + int n; + int fd; + char buf[1]; + struct iovec iov; + struct msghdr msg; + struct cmsghdr *cmsg; + char cms[CMSG_SPACE(sizeof(int))]; + + iov.iov_base = buf; + iov.iov_len = 1; + + memset(&msg, 0, sizeof msg); + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = (caddr_t)cms; + msg.msg_controllen = sizeof cms; + + if((n=recvmsg(sock, &msg, 0)) < 0) { + perror("recvmsg failed"); + return -1; + } + if(n == 0){ + perror("recvmsg returned 0"); + return -1; + } + cmsg = CMSG_FIRSTHDR(&msg); + + memmove(&fd, CMSG_DATA(cmsg), sizeof(int)); + + return fd; + } + + static void SendFileDescriptor(int fildes,int xmit_to_rank) + { + struct msghdr msg; + struct iovec iov; + struct cmsghdr *cmsg = NULL; + char ctrl[CMSG_SPACE(sizeof(int))]; + char data = ' '; + + memset(&msg, 0, sizeof(struct msghdr)); + memset(ctrl, 0, CMSG_SPACE(sizeof(int))); + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + sprintf(sock_path,sock_path_fmt,xmit_to_rank); + + struct sockaddr_un sa_un = { 0 }; + sa_un.sun_family = AF_UNIX; + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank); + + msg.msg_name = (void *)&sa_un; + msg.msg_namelen = sizeof(sa_un); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_controllen = CMSG_SPACE(sizeof(int)); + msg.msg_control = ctrl; + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + *((int *) CMSG_DATA(cmsg)) = fildes; + + sendmsg(sock, &msg, 0); + }; +}; #endif -NAMESPACE_BEGIN(Grid); -#define header "SharedMemoryMpi: " /*Construct from an MPI communicator*/ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) { @@ -480,8 +588,13 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) /////////////////////////////////////////////////////////////////////////////////////////////////////////// // Loop over ranks/gpu's on our node /////////////////////////////////////////////////////////////////////////////////////////////////////////// +#ifdef SHM_SOCKETS + UnixSockets::Open(WorldShmRank); +#endif for(int r=0;r(theGridAccelerator->get_device()); auto zeContext = cl::sycl::get_native(theGridAccelerator->get_context()); ze_ipc_mem_handle_t ihandle; clone_mem_t handle; - + if ( r==WorldShmRank ) { auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle); if ( err != ZE_RESULT_SUCCESS ) { - std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<