diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index b667d32e..38b9f9c6 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -746,26 +746,34 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list,int dir) { - // int nreq=list.size(); + acceleratorCopySynchronise(); // Complete all pending copy transfers D2D - // if (nreq==0) return; - // std::vector status(nreq); - // std::vector MpiRequests(nreq); + std::vector status; + std::vector MpiRequests; + + for(int r=0;r0) { + status.resize(MpiRequests.size()); + int ierr = MPI_Waitall(MpiRequests.size(),&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing. + assert(ierr==0); + } - // int ierr = MPI_Waitall(nreq,&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing. - // assert(ierr==0); - // for(int r=0;rHostBufferFreeAll(); // Clean up the buffer allocs diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index b5aaccb4..28c3aa0a 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -676,7 +676,7 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes) template void acceleratorPut(T& dev,const T&host) { - acceleratorCopyToDevice(&host,&dev,sizeof(T)); + acceleratorCopyToDevice((void *)&host,&dev,sizeof(T)); } template T acceleratorGet(T& dev) { diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 1424667e..feb44645 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -509,7 +509,14 @@ void Grid_init(int *argc,char ***argv) Grid_default_latt, Grid_default_mpi); - + if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){ + std::cout << GridLogMessage <<" Enabling flight recorder " <