Shared Memory test reenabled on every Grid object creation.

Const improvements in Accelerator.h
2025-06-30 21:47:08 +01:00 · 2025-04-07 11:51:40 -04:00
parent a49fa3f8d0
commit e652fc2825
3 changed files with 18 additions and 20 deletions
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@ -1018,14 +1018,12 @@ void SharedMemory::SharedMemoryTest(void)
  ShmBarrier();
  for(uint64_t r=0;r<ShmSize;r++){
    acceleratorCopyFromDevice(ShmCommBufs[r],check,3*sizeof(uint64_t));
    //      accelerator_barrier();
    //    std::cerr << " ShmRank "<<ShmRank<<" read "<<check[0]<<","<<check[1]<<","<<std::hex<<check[2]<<" from buf "<<ShmCommBufs[r]
    //	      <<std::dec<<std::endl;
    assert(check[0]==GlobalSharedMemory::WorldNode);
    assert(check[1]==r);
    assert(check[2]==magic);
  }
  ShmBarrier();
  std::cout << GridLogDebug << " SharedMemoryTest has passed "<<std::endl;
 }
 void *SharedMemory::ShmBuffer(int rank)
--- a/Grid/stencil/Stencil.h
+++ b/Grid/stencil/Stencil.h
@ -690,7 +690,7 @@ public:
 	}
      }
    }
-    std::cout << "BuildSurfaceList size is "<<surface_list_size<<std::endl;
+    //    std::cout << "BuildSurfaceList size is "<<surface_list_size<<std::endl;
    surface_list.resize(surface_list_size);
    std::vector<int> surface_list_host(surface_list_size);
    int32_t ss=0;
@ -710,7 +710,7 @@ public:
      }
    }
    acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int));
-    std::cout << GridLogMessage<<"BuildSurfaceList size is "<<surface_list_size<<std::endl;
+    //    std::cout << GridLogMessage<<"BuildSurfaceList size is "<<surface_list_size<<std::endl;
  }
  /// Introduce a block structure and switch off comms on boundaries
  void DirichletBlock(const Coordinate &dirichlet_block)
@ -802,8 +802,8 @@ public:
    this->_entries_host_p = &_entries[0];
    this->_entries_p = &_entries_device[0];
-    std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites
+    //    std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites
-	      <<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl;
+    //	      <<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl;
    for(int ii=0;ii<npoints;ii++){
--- a/Grid/threads/Accelerator.h
+++ b/Grid/threads/Accelerator.h
@ -250,15 +250,15 @@ inline void acceleratorFreeHost(void *ptr){ cudaFree(ptr);};
 inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes)  { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
 inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
 inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
-inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
+inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
  acceleratorCopyToDevice(to,from,bytes, cudaMemcpyHostToDevice);
  return 0;
 }
-inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
+inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
  acceleratorCopyFromDevice(from,to,bytes);
  return 0;
 }
-inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes) // Asynch
+inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
 {
  cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
  return 0;
@ -373,9 +373,9 @@ inline int acceleratorEventIsComplete(acceleratorEvent_t ev)
  return (ev.get_info<sycl::info::event::command_execution_status>() == sycl::info::event_command_status::complete);
 }
-inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes)  { return theCopyAccelerator->memcpy(to,from,bytes);}
+inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes)  { return theCopyAccelerator->memcpy(to,from,bytes);}
-inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from,void *to,size_t bytes)        { return theCopyAccelerator->memcpy(to,from,bytes); }
+inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes)        { return theCopyAccelerator->memcpy(to,from,bytes); }
-inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from,void *to,size_t bytes)      { return theCopyAccelerator->memcpy(to,from,bytes); }
+inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes)      { return theCopyAccelerator->memcpy(to,from,bytes); }
 inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes)  { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
 inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
@ -532,16 +532,16 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto discard=
 typedef int acceleratorEvent_t;
-inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes) // Asynch
+inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
 {
  auto discard=hipMemcpyDtoDAsync(to,from,bytes, copyStream);
  return 0;
 }
-inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
+inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
  acceleratorCopyToDevice(from,to,bytes);
  return 0;
 }
-inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
+inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
  acceleratorCopyFromDevice(from,to,bytes);
  return 0;
 }
@ -611,11 +611,11 @@ inline void acceleratorMem(void)
 accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
-inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from,void *to,size_t bytes)        { acceleratorCopyToDevice(from,to,bytes); return 0; }
+inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes)        { acceleratorCopyToDevice(from,to,bytes); return 0; }
-inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from,void *to,size_t bytes)      { acceleratorCopyFromDevice(from,to,bytes); return 0; }
+inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes)      { acceleratorCopyFromDevice(from,to,bytes); return 0; }
 inline void acceleratorEventWait(acceleratorEvent_t ev){}
 inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev); return 1;}
-inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes)  { thread_bcopy(from,to,bytes); return 0;}
+inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes)  { thread_bcopy(from,to,bytes); return 0;}
 inline void acceleratorCopySynchronise(void) {};
@ -699,7 +699,7 @@ accelerator_inline void acceleratorFence(void)
  return;
 }
-inline void acceleratorCopyDeviceToDevice(const void *from,void *to,size_t bytes)
+inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 {
  acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
  acceleratorCopySynchronise();