1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Overlap cudamemcpy - didn't set up stream right

This commit is contained in:
Peter Boyle 2021-10-11 13:31:26 -07:00
parent cda915a345
commit 16c2a99965
3 changed files with 3 additions and 2 deletions

View File

@ -389,7 +389,6 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
acceleratorCopySynchronise(); // MPI prob slower
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
@ -405,6 +404,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
if (nreq==0) return;
std::vector<MPI_Status> status(nreq);
acceleratorCopySynchronise();
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
list.resize(0);

View File

@ -95,7 +95,7 @@ void acceleratorInit(void)
#endif
cudaSetDevice(device);
cudaStreamCreate(&copyStream);
const int len=64;
char busid[len];
if( rank == world_rank ) {

View File

@ -95,6 +95,7 @@ void acceleratorInit(void);
//////////////////////////////////////////////
#ifdef GRID_CUDA
#include <cuda.h>
#ifdef __CUDA_ARCH__