mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Intranode asynch hipMemCopy
This commit is contained in:
parent
76cde73705
commit
6ceb556684
@ -230,6 +230,7 @@ inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes
|
|||||||
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
|
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
|
||||||
}
|
}
|
||||||
inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); };
|
inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); };
|
||||||
|
|
||||||
inline int acceleratorIsCommunicable(void *ptr)
|
inline int acceleratorIsCommunicable(void *ptr)
|
||||||
{
|
{
|
||||||
// int uvm=0;
|
// int uvm=0;
|
||||||
@ -337,6 +338,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#define accelerator __host__ __device__
|
#define accelerator __host__ __device__
|
||||||
#define accelerator_inline __host__ __device__ inline
|
#define accelerator_inline __host__ __device__ inline
|
||||||
|
|
||||||
|
extern hipStream_t copyStream;
|
||||||
/*These routines define mapping from thread grid to loop & vector lane indexing */
|
/*These routines define mapping from thread grid to loop & vector lane indexing */
|
||||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||||
#ifdef GRID_SIMT
|
#ifdef GRID_SIMT
|
||||||
@ -411,10 +413,16 @@ inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);};
|
|||||||
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
|
||||||
inline void acceleratorCopySynchronise(void) { }
|
//inline void acceleratorCopySynchronise(void) { }
|
||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
|
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
|
||||||
|
|
||||||
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||||
|
{
|
||||||
|
hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToDevice,copyStream);
|
||||||
|
}
|
||||||
|
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
@ -485,18 +493,12 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);};
|
|||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
// Synchronise across local threads for divergence resynch
|
// Synchronise across local threads for divergence resynch
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
accelerator_inline void acceleratorSynchronise(void)
|
accelerator_inline void acceleratorSynchronise(void) // Only Nvidia needs
|
||||||
{
|
{
|
||||||
#ifdef GRID_SIMT
|
#ifdef GRID_SIMT
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
__syncwarp();
|
__syncwarp();
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
|
||||||
//cl::sycl::detail::workGroupBarrier();
|
|
||||||
#endif
|
|
||||||
#ifdef GRID_HIP
|
|
||||||
__syncthreads();
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user