1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Comms buffers to device memory

This commit is contained in:
Peter Boyle 2020-09-03 15:45:35 -04:00
parent d3ce60713d
commit 0c3095e173

View File

@ -302,60 +302,35 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
int bytes) int bytes)
{ {
std::vector<CommsRequest_t> reqs(0); std::vector<CommsRequest_t> reqs(0);
// unsigned long xcrc = crc32(0L, Z_NULL, 0); unsigned long xcrc = crc32(0L, Z_NULL, 0);
// unsigned long rcrc = crc32(0L, Z_NULL, 0); unsigned long rcrc = crc32(0L, Z_NULL, 0);
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
MPI_Status stat;
assert(sender != receiver);
int tag = sender;
if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
}
if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
int myrank = _processor; int myrank = _processor;
int ierr; int ierr;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { // Enforce no UVM in comms, device or host OK
MPI_Request xrq; int uvm;
MPI_Request rrq; auto
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) xmit);
assert(cuerr == cudaSuccess );
assert(uvm==0);
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) recv);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); assert(cuerr == cudaSuccess );
assert(uvm==0);
assert(ierr==0); // Give the CPU to MPI immediately; can use threads to overlap optionally
list.push_back(xrq); // printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);
list.push_back(rrq); ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
} else { recv,bytes,MPI_CHAR,from, from,
// Give the CPU to MPI immediately; can use threads to overlap optionally communicator,MPI_STATUS_IGNORE);
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, assert(ierr==0);
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE); // xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
assert(ierr==0); // rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
} // printf("proc %d SendToRecvFrom %d bytes xcrc %lx rcrc %lx\n",_processor,bytes,xcrc,rcrc); fflush
} }
// Basic Halo comms primitive
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int dest, int dest,
void *recv, void *recv,
@ -411,15 +386,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
return off_node_bytes; return off_node_bytes;
} }
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
{
SendToRecvFromComplete(waitall);
}
void CartesianCommunicator::StencilBarrier(void)
{
MPI_Barrier (ShmComm);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
int nreq=list.size(); int nreq=list.size();
@ -430,6 +397,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
assert(ierr==0); assert(ierr==0);
list.resize(0); list.resize(0);
} }
void CartesianCommunicator::StencilBarrier(void)
{
MPI_Barrier (ShmComm);
}
//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
//{
//}
void CartesianCommunicator::Barrier(void) void CartesianCommunicator::Barrier(void)
{ {
int ierr = MPI_Barrier(communicator); int ierr = MPI_Barrier(communicator);