1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Intranode asynch hipMemCopy

This commit is contained in:
Peter Boyle 2021-11-22 20:45:12 -05:00
parent 76cde73705
commit 6ceb556684

View File

@ -230,6 +230,7 @@ inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
}
inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); };
inline int acceleratorIsCommunicable(void *ptr)
{
// int uvm=0;
@ -337,6 +338,7 @@ NAMESPACE_BEGIN(Grid);
#define accelerator __host__ __device__
#define accelerator_inline __host__ __device__ inline
extern hipStream_t copyStream;
/*These routines define mapping from thread grid to loop & vector lane indexing */
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT
@ -411,10 +413,16 @@ inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);};
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
inline void acceleratorCopySynchronise(void) { }
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
//inline void acceleratorCopySynchronise(void) { }
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{
hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToDevice,copyStream);
}
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
#endif
//////////////////////////////////////////////
@ -485,18 +493,12 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);};
///////////////////////////////////////////////////
// Synchronise across local threads for divergence resynch
///////////////////////////////////////////////////
accelerator_inline void acceleratorSynchronise(void)
accelerator_inline void acceleratorSynchronise(void) // Only Nvidia needs
{
#ifdef GRID_SIMT
#ifdef GRID_CUDA
__syncwarp();
#endif
#ifdef GRID_SYCL
//cl::sycl::detail::workGroupBarrier();
#endif
#ifdef GRID_HIP
__syncthreads();
#endif
#endif
return;
}