1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-24 12:45:56 +01:00

Shared Memory test reenabled on every Grid object creation.

Const improvements in Accelerator.h
This commit is contained in:
Peter Boyle 2025-04-07 11:51:40 -04:00
parent a49fa3f8d0
commit e652fc2825
3 changed files with 18 additions and 20 deletions

View File

@ -1018,14 +1018,12 @@ void SharedMemory::SharedMemoryTest(void)
ShmBarrier(); ShmBarrier();
for(uint64_t r=0;r<ShmSize;r++){ for(uint64_t r=0;r<ShmSize;r++){
acceleratorCopyFromDevice(ShmCommBufs[r],check,3*sizeof(uint64_t)); acceleratorCopyFromDevice(ShmCommBufs[r],check,3*sizeof(uint64_t));
// accelerator_barrier();
// std::cerr << " ShmRank "<<ShmRank<<" read "<<check[0]<<","<<check[1]<<","<<std::hex<<check[2]<<" from buf "<<ShmCommBufs[r]
// <<std::dec<<std::endl;
assert(check[0]==GlobalSharedMemory::WorldNode); assert(check[0]==GlobalSharedMemory::WorldNode);
assert(check[1]==r); assert(check[1]==r);
assert(check[2]==magic); assert(check[2]==magic);
} }
ShmBarrier(); ShmBarrier();
std::cout << GridLogDebug << " SharedMemoryTest has passed "<<std::endl;
} }
void *SharedMemory::ShmBuffer(int rank) void *SharedMemory::ShmBuffer(int rank)

View File

@ -690,7 +690,7 @@ public:
} }
} }
} }
std::cout << "BuildSurfaceList size is "<<surface_list_size<<std::endl; // std::cout << "BuildSurfaceList size is "<<surface_list_size<<std::endl;
surface_list.resize(surface_list_size); surface_list.resize(surface_list_size);
std::vector<int> surface_list_host(surface_list_size); std::vector<int> surface_list_host(surface_list_size);
int32_t ss=0; int32_t ss=0;
@ -710,7 +710,7 @@ public:
} }
} }
acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int)); acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int));
std::cout << GridLogMessage<<"BuildSurfaceList size is "<<surface_list_size<<std::endl; // std::cout << GridLogMessage<<"BuildSurfaceList size is "<<surface_list_size<<std::endl;
} }
/// Introduce a block structure and switch off comms on boundaries /// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block) void DirichletBlock(const Coordinate &dirichlet_block)
@ -802,8 +802,8 @@ public:
this->_entries_host_p = &_entries[0]; this->_entries_host_p = &_entries[0];
this->_entries_p = &_entries_device[0]; this->_entries_p = &_entries_device[0];
std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites // std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites
<<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl; // <<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl;
for(int ii=0;ii<npoints;ii++){ for(int ii=0;ii<npoints;ii++){

View File

@ -250,15 +250,15 @@ inline void acceleratorFreeHost(void *ptr){ cudaFree(ptr);};
inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);} inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);} inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);} inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
acceleratorCopyToDevice(to,from,bytes, cudaMemcpyHostToDevice); acceleratorCopyToDevice(to,from,bytes, cudaMemcpyHostToDevice);
return 0; return 0;
} }
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
acceleratorCopyFromDevice(from,to,bytes); acceleratorCopyFromDevice(from,to,bytes);
return 0; return 0;
} }
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes) // Asynch inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{ {
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream); cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
return 0; return 0;
@ -373,9 +373,9 @@ inline int acceleratorEventIsComplete(acceleratorEvent_t ev)
return (ev.get_info<sycl::info::event::command_execution_status>() == sycl::info::event_command_status::complete); return (ev.get_info<sycl::info::event::command_execution_status>() == sycl::info::event_command_status::complete);
} }
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes);} inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes);}
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); } inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); } inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
@ -532,16 +532,16 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto discard=
typedef int acceleratorEvent_t; typedef int acceleratorEvent_t;
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes) // Asynch inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{ {
auto discard=hipMemcpyDtoDAsync(to,from,bytes, copyStream); auto discard=hipMemcpyDtoDAsync(to,from,bytes, copyStream);
return 0; return 0;
} }
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
acceleratorCopyToDevice(from,to,bytes); acceleratorCopyToDevice(from,to,bytes);
return 0; return 0;
} }
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
acceleratorCopyFromDevice(from,to,bytes); acceleratorCopyFromDevice(from,to,bytes);
return 0; return 0;
} }
@ -611,11 +611,11 @@ inline void acceleratorMem(void)
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(const void *from,void *to,size_t bytes) { acceleratorCopyToDevice(from,to,bytes); return 0; } inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyToDevice(from,to,bytes); return 0; }
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(const void *from,void *to,size_t bytes) { acceleratorCopyFromDevice(from,to,bytes); return 0; } inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyFromDevice(from,to,bytes); return 0; }
inline void acceleratorEventWait(acceleratorEvent_t ev){} inline void acceleratorEventWait(acceleratorEvent_t ev){}
inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev); return 1;} inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev); return 1;}
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(const void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); return 0;} inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); return 0;}
inline void acceleratorCopySynchronise(void) {}; inline void acceleratorCopySynchronise(void) {};
@ -699,7 +699,7 @@ accelerator_inline void acceleratorFence(void)
return; return;
} }
inline void acceleratorCopyDeviceToDevice(const void *from,void *to,size_t bytes) inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
{ {
acceleratorCopyDeviceToDeviceAsynch(from,to,bytes); acceleratorCopyDeviceToDeviceAsynch(from,to,bytes);
acceleratorCopySynchronise(); acceleratorCopySynchronise();