diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index a32d69db..316f201c 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -69,7 +69,7 @@ public: } // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop - void construct(pointer __p, const _Tp& __val) { assert(0);}; + void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; void destroy(pointer __p) { }; }; diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc index eb8c6d38..09afbcf7 100644 --- a/Grid/allocator/MemoryManagerCache.cc +++ b/Grid/allocator/MemoryManagerCache.cc @@ -234,6 +234,9 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis } void MemoryManager::EvictVictims(uint64_t bytes) { + if(bytes>=DeviceMaxBytes) { + printf("EvictVictims bytes %ld DeviceMaxBytes %ld\n",bytes,DeviceMaxBytes); + } assert(bytes DeviceMaxBytes){ if ( DeviceLRUBytes > 0){ diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index 38b9f9c6..8de29669 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -759,9 +759,6 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector0) { status.resize(MpiRequests.size()); int ierr = MPI_Waitall(MpiRequests.size(),&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing. diff --git a/Grid/cshift/Cshift_mpi.h b/Grid/cshift/Cshift_mpi.h index 710792ee..6f2e2699 100644 --- a/Grid/cshift/Cshift_mpi.h +++ b/Grid/cshift/Cshift_mpi.h @@ -126,8 +126,8 @@ template void Cshift_comms(Lattice &ret,const Lattice &r static deviceVector send_buf; send_buf.resize(buffer_size); static deviceVector recv_buf; recv_buf.resize(buffer_size); #ifndef ACCELERATOR_AWARE_MPI - static hostVector hsend_buf; hsend_buf.resize(buffer_size); - static hostVector hrecv_buf; hrecv_buf.resize(buffer_size); + static hostVector hsend_buf; hsend_buf.resize(buffer_size); + static hostVector hrecv_buf; hrecv_buf.resize(buffer_size); #endif int cb= (cbmask==0x2)? Odd : Even; @@ -244,7 +244,6 @@ template void Cshift_comms_simd(Lattice &ret,const Latticeu_comm_offset==this->_unified_buffer_size); accelerator_barrier(); #ifdef NVLINK_GET - #warning "NVLINK_GET" this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Or issue barrier AFTER the DMA is running diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 2a666a04..3613cdbb 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -518,7 +518,6 @@ public: } accelerator_barrier(); // All my local gathers are complete #ifdef NVLINK_GET - #warning "NVLINK_GET" _grid->StencilBarrier(); // He can now get mu local gather, I can get his // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Or issue barrier AFTER the DMA is running