diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index 305a3a9b..162180bc 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -388,6 +388,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vectorShmBufferTranslate(dest,recv); assert(shm!=NULL); + std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl; acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); } @@ -399,12 +400,13 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list,int dir) { + acceleratorCopySynchronise(); std::cout << "Copy Synchronised\n"< status(nreq); - acceleratorCopySynchronise(); int ierr = MPI_Waitall(nreq,&list[0],&status[0]); assert(ierr==0); list.resize(0); diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index cec0600f..8be712ba 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -306,7 +306,7 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); } -inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); } +inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); std::cout<<"acceleratorCopySynchronise() wait "<memcpy(to,from,bytes); theGridAccelerator->wait();} inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();} inline void acceleratorMemSet(void *base,int value,size_t bytes) { theGridAccelerator->memset(base,value,bytes); theGridAccelerator->wait();} diff --git a/benchmarks/Benchmark_dwf_fp32.cc b/benchmarks/Benchmark_dwf_fp32.cc index d48486c0..4edf7c16 100644 --- a/benchmarks/Benchmark_dwf_fp32.cc +++ b/benchmarks/Benchmark_dwf_fp32.cc @@ -126,19 +126,10 @@ int main (int argc, char ** argv) // Naive wilson implementation //////////////////////////////////// // replicate across fifth dimension - LatticeGaugeFieldF Umu5d(FGrid); - std::vector U(4,FGrid); - { - autoView( Umu5d_v, Umu5d, CpuWrite); - autoView( Umu_v , Umu , CpuRead); - for(int ss=0;ssoSites();ss++){ - for(int s=0;s U(4,UGrid); for(int mu=0;mu(Umu5d,mu); + U[mu] = PeekIndex(Umu,mu); } std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl; @@ -147,10 +138,28 @@ int main (int argc, char ** argv) ref = Zero(); for(int mu=0;muoSites();ss++){ + for(int s=0;soSites();ss++){ + for(int s=0;soSites();ss++){ + for(int s=0;soSites();ss++){ + for(int s=0;s