mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Overlap cudamemcpy - didn't set up stream right
This commit is contained in:
parent
cda915a345
commit
16c2a99965
@ -389,7 +389,6 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
|
||||
assert(shm!=NULL);
|
||||
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
|
||||
acceleratorCopySynchronise(); // MPI prob slower
|
||||
}
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
@ -405,6 +404,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
|
||||
if (nreq==0) return;
|
||||
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
acceleratorCopySynchronise();
|
||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||
assert(ierr==0);
|
||||
list.resize(0);
|
||||
|
@ -95,7 +95,7 @@ void acceleratorInit(void)
|
||||
#endif
|
||||
|
||||
cudaSetDevice(device);
|
||||
|
||||
cudaStreamCreate(©Stream);
|
||||
const int len=64;
|
||||
char busid[len];
|
||||
if( rank == world_rank ) {
|
||||
|
@ -95,6 +95,7 @@ void acceleratorInit(void);
|
||||
//////////////////////////////////////////////
|
||||
|
||||
#ifdef GRID_CUDA
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
|
Loading…
Reference in New Issue
Block a user