diff --git a/Grid/cshift/Cshift_mpi.h b/Grid/cshift/Cshift_mpi.h index 4b000035..6f2e2699 100644 --- a/Grid/cshift/Cshift_mpi.h +++ b/Grid/cshift/Cshift_mpi.h @@ -126,8 +126,8 @@ template void Cshift_comms(Lattice &ret,const Lattice &r static deviceVector send_buf; send_buf.resize(buffer_size); static deviceVector recv_buf; recv_buf.resize(buffer_size); #ifndef ACCELERATOR_AWARE_MPI - static std::vector hsend_buf; hsend_buf.resize(buffer_size); - static std::vector hrecv_buf; hrecv_buf.resize(buffer_size); + static hostVector hsend_buf; hsend_buf.resize(buffer_size); + static hostVector hrecv_buf; hrecv_buf.resize(buffer_size); #endif int cb= (cbmask==0x2)? Odd : Even; @@ -244,14 +244,13 @@ template void Cshift_comms_simd(Lattice &ret,const Lattice hsend_buf; hsend_buf.resize(buffer_size); - std::vector hrecv_buf; hrecv_buf.resize(buffer_size); + hostVector hsend_buf; hsend_buf.resize(buffer_size); + hostVector hrecv_buf; hrecv_buf.resize(buffer_size); #endif int bytes = buffer_size*sizeof(scalar_object); diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index 1c6571e1..22f2d8e3 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -485,7 +485,6 @@ public: assert(this->u_comm_offset==this->_unified_buffer_size); accelerator_barrier(); #ifdef NVLINK_GET - #warning "NVLINK_GET" this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Or issue barrier AFTER the DMA is running diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 2a666a04..3613cdbb 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -518,7 +518,6 @@ public: } accelerator_barrier(); // All my local gathers are complete #ifdef NVLINK_GET - #warning "NVLINK_GET" _grid->StencilBarrier(); // He can now get mu local gather, I can get his // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Or issue barrier AFTER the DMA is running