Merge branch 'develop' of https://github.com/paboyle/Grid into feature-aurora

2025-10-21 08:24:44 +01:00 · 2025-03-13 07:29:55 +00:00
parent 9ffd1ed4ce 25ab9325e7
commit d15a6c5933
6 changed files with 6 additions and 9 deletions
--- a/Grid/allocator/AlignedAllocator.h
+++ b/Grid/allocator/AlignedAllocator.h
@@ -69,7 +69,7 @@ public:
  }
  // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
-  void construct(pointer __p, const _Tp& __val) { assert(0);};
+  void construct(pointer __p, const _Tp& __val) { };
  void construct(pointer __p) { };
  void destroy(pointer __p) { };
 };
--- a/Grid/allocator/MemoryManagerCache.cc
+++ b/Grid/allocator/MemoryManagerCache.cc
@@ -234,6 +234,9 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
 }
 void  MemoryManager::EvictVictims(uint64_t bytes)
 {
  if(bytes>=DeviceMaxBytes) {
    printf("EvictVictims bytes %ld DeviceMaxBytes %ld\n",bytes,DeviceMaxBytes);
  }
  assert(bytes<DeviceMaxBytes);
  while(bytes+DeviceLRUBytes > DeviceMaxBytes){
    if ( DeviceLRUBytes > 0){
--- a/Grid/communicator/Communicator_mpi3.cc
+++ b/Grid/communicator/Communicator_mpi3.cc
@@ -759,9 +759,6 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
  int nreq=MpiRequests.size();
  std::cout << GridLogMessage << " StencilSendToRevFromComplete "<<nreq<<" Mpi Requests"<<std::endl;
  if (nreq>0) {
    status.resize(MpiRequests.size());
    int ierr = MPI_Waitall(MpiRequests.size(),&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing.
--- a/Grid/cshift/Cshift_mpi.h
+++ b/Grid/cshift/Cshift_mpi.h
@@ -126,8 +126,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
  static deviceVector<vobj> send_buf; send_buf.resize(buffer_size);
  static deviceVector<vobj> recv_buf; recv_buf.resize(buffer_size);
 #ifndef ACCELERATOR_AWARE_MPI
-  static hostVector<vobj> hsend_buf;  hsend_buf.resize(buffer_size);
+  static hostVector<vobj> hsend_buf; hsend_buf.resize(buffer_size);
-  static hostVector<vobj> hrecv_buf;  hrecv_buf.resize(buffer_size);
+  static hostVector<vobj> hrecv_buf; hrecv_buf.resize(buffer_size);
 #endif
  int cb= (cbmask==0x2)? Odd : Even;
@@ -244,7 +244,6 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
  scalar_object *  recv_buf_extract_mpi;
  scalar_object *  send_buf_extract_mpi;
  for(int s=0;s<Nsimd;s++){
    send_buf_extract[s].resize(buffer_size);
    recv_buf_extract[s].resize(buffer_size);
--- a/Grid/qcd/action/fermion/WilsonCompressor.h
+++ b/Grid/qcd/action/fermion/WilsonCompressor.h
@@ -485,7 +485,6 @@ public:
    assert(this->u_comm_offset==this->_unified_buffer_size);
    accelerator_barrier();
 #ifdef NVLINK_GET
    #warning "NVLINK_GET"
    this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his
    // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
    // Or issue barrier AFTER the DMA is running
--- a/Grid/stencil/Stencil.h
+++ b/Grid/stencil/Stencil.h
@@ -518,7 +518,6 @@ public:
    }
    accelerator_barrier(); // All my local gathers are complete
 #ifdef NVLINK_GET
    #warning "NVLINK_GET"
    _grid->StencilBarrier(); // He can now get mu local gather, I can get his
    // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
    // Or issue barrier AFTER the DMA is running