1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-03 18:55:56 +01:00

Use hostVector but remove construct resize

This commit is contained in:
Peter Boyle 2025-03-11 15:02:32 +00:00
parent 19f9378b98
commit 25ab9325e7
3 changed files with 4 additions and 7 deletions

View File

@ -126,8 +126,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
static deviceVector<vobj> send_buf; send_buf.resize(buffer_size);
static deviceVector<vobj> recv_buf; recv_buf.resize(buffer_size);
#ifndef ACCELERATOR_AWARE_MPI
static std::vector<vobj> hsend_buf; hsend_buf.resize(buffer_size);
static std::vector<vobj> hrecv_buf; hrecv_buf.resize(buffer_size);
static hostVector<vobj> hsend_buf; hsend_buf.resize(buffer_size);
static hostVector<vobj> hrecv_buf; hrecv_buf.resize(buffer_size);
#endif
int cb= (cbmask==0x2)? Odd : Even;
@ -244,14 +244,13 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
scalar_object * recv_buf_extract_mpi;
scalar_object * send_buf_extract_mpi;
for(int s=0;s<Nsimd;s++){
send_buf_extract[s].resize(buffer_size);
recv_buf_extract[s].resize(buffer_size);
}
#ifndef ACCELERATOR_AWARE_MPI
std::vector<scalar_object> hsend_buf; hsend_buf.resize(buffer_size);
std::vector<scalar_object> hrecv_buf; hrecv_buf.resize(buffer_size);
hostVector<scalar_object> hsend_buf; hsend_buf.resize(buffer_size);
hostVector<scalar_object> hrecv_buf; hrecv_buf.resize(buffer_size);
#endif
int bytes = buffer_size*sizeof(scalar_object);

View File

@ -485,7 +485,6 @@ public:
assert(this->u_comm_offset==this->_unified_buffer_size);
accelerator_barrier();
#ifdef NVLINK_GET
#warning "NVLINK_GET"
this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his
// Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
// Or issue barrier AFTER the DMA is running

View File

@ -518,7 +518,6 @@ public:
}
accelerator_barrier(); // All my local gathers are complete
#ifdef NVLINK_GET
#warning "NVLINK_GET"
_grid->StencilBarrier(); // He can now get mu local gather, I can get his
// Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
// Or issue barrier AFTER the DMA is running