mirror of
https://github.com/paboyle/Grid.git
synced 2025-07-01 22:17:08 +01:00
Simplify and verbose
This commit is contained in:
@ -544,19 +544,20 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
#ifndef ACCELERATOR_AWARE_MPI
|
#ifndef ACCELERATOR_AWARE_MPI
|
||||||
// printf("Host buffer allocate for GPU non-aware MPI\n");
|
// printf("Host buffer allocate for GPU non-aware MPI\n");
|
||||||
HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host
|
HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host
|
||||||
// acceleratorPin(HostCommBuf,bytes);
|
|
||||||
#endif
|
#endif
|
||||||
ShmCommBuf = acceleratorAllocDevice(bytes);
|
ShmCommBuf = acceleratorAllocDevice(bytes);
|
||||||
if (ShmCommBuf == (void *)NULL ) {
|
if (ShmCommBuf == (void *)NULL ) {
|
||||||
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
std::cerr << "SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
if ( WorldRank == 0 ){
|
if ( WorldRank == 0 ){
|
||||||
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
|
std::cout << Mheader " acceleratorAllocDevice "<< bytes
|
||||||
<< "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
|
<< "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
|
||||||
}
|
}
|
||||||
SharedMemoryZero(ShmCommBuf,bytes);
|
SharedMemoryZero(ShmCommBuf,bytes);
|
||||||
std::cout<< "Setting up IPC"<<std::endl;
|
if ( WorldRank == 0 ){
|
||||||
|
std::cout<< Mheader "Setting up IPC"<<std::endl;
|
||||||
|
}
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Loop over ranks/gpu's on our node
|
// Loop over ranks/gpu's on our node
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -587,8 +588,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
if ( err != ZE_RESULT_SUCCESS ) {
|
if ( err != ZE_RESULT_SUCCESS ) {
|
||||||
std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
|
std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
} else {
|
|
||||||
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
|
|
||||||
}
|
}
|
||||||
memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
|
memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
|
||||||
handle.pid = getpid();
|
handle.pid = getpid();
|
||||||
@ -647,12 +646,12 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
#ifdef SHM_SOCKETS
|
#ifdef SHM_SOCKETS
|
||||||
myfd=UnixSockets::RecvFileDescriptor();
|
myfd=UnixSockets::RecvFileDescriptor();
|
||||||
#else
|
#else
|
||||||
std::cout<<"mapping seeking remote pid/fd "
|
// std::cout<<"mapping seeking remote pid/fd "
|
||||||
<<handle.pid<<"/"
|
// <<handle.pid<<"/"
|
||||||
<<handle.fd<<std::endl;
|
// <<handle.fd<<std::endl;
|
||||||
|
|
||||||
int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
|
int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
|
||||||
std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
|
// std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
|
||||||
// int myfd = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
|
// int myfd = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
|
||||||
myfd = syscall(438,pidfd,handle.fd,0);
|
myfd = syscall(438,pidfd,handle.fd,0);
|
||||||
int err_t = errno;
|
int err_t = errno;
|
||||||
@ -662,7 +661,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
std::cout<<"Using IpcHandle mapped remote pid "<<handle.pid <<" FD "<<handle.fd <<" to myfd "<<myfd<<"\n";
|
// std::cout<<"Using IpcHandle mapped remote pid "<<handle.pid <<" FD "<<handle.fd <<" to myfd "<<myfd<<"\n";
|
||||||
memcpy((void *)&ihandle,(void *)&handle.ze,sizeof(ihandle));
|
memcpy((void *)&ihandle,(void *)&handle.ze,sizeof(ihandle));
|
||||||
memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
|
memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
|
||||||
|
|
||||||
@ -671,9 +670,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
|
std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
|
||||||
std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
|
std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
} else {
|
|
||||||
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
|
|
||||||
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle pointer is "<<std::hex<<thisBuf<<std::dec<<std::endl;
|
|
||||||
}
|
}
|
||||||
assert(thisBuf!=nullptr);
|
assert(thisBuf!=nullptr);
|
||||||
}
|
}
|
||||||
@ -754,6 +750,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
WorldShmCommBufs[r] =ptr;
|
WorldShmCommBufs[r] =ptr;
|
||||||
// std::cout << Mheader "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
// std::cout << Mheader "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||||
}
|
}
|
||||||
|
std::cout<< Mheader " Intra-node IPC setup is complete "<<std::endl;
|
||||||
_ShmAlloc=1;
|
_ShmAlloc=1;
|
||||||
_ShmAllocBytes = bytes;
|
_ShmAllocBytes = bytes;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user