1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-11 14:40:46 +01:00

Updates for HiP

This commit is contained in:
Peter Boyle 2020-05-24 14:00:55 -04:00
parent 92b342a477
commit 32be2b13d3
2 changed files with 60 additions and 26 deletions

View File

@ -33,17 +33,17 @@ void acceleratorInit(void)
for (int i = 0; i < nDevices; i++) { for (int i = 0; i < nDevices; i++) {
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
cudaGetDeviceProperties(&gpu_props[i], i); cudaGetDeviceProperties(&gpu_props[i], i);
if ( world_rank == 0) { if ( world_rank == 0) {
cudaDeviceProp prop; cudaDeviceProp prop;
prop = gpu_props[i]; prop = gpu_props[i];
printf("GpuInit: ========================\n"); printf("AcceleratorCudaInit: ========================\n");
printf("GpuInit: Device Number : %d\n", i); printf("AcceleratorCudaInit: Device Number : %d\n", i);
printf("GpuInit: ========================\n"); printf("AcceleratorCudaInit: ========================\n");
printf("GpuInit: Device identifier: %s\n", prop.name); printf("AcceleratorCudaInit: Device identifier: %s\n", prop.name);
GPU_PROP(managedMemory); GPU_PROP(managedMemory);
GPU_PROP(isMultiGpuBoard); GPU_PROP(isMultiGpuBoard);
@ -55,12 +55,12 @@ void acceleratorInit(void)
} }
#ifdef GRID_IBM_SUMMIT #ifdef GRID_IBM_SUMMIT
// IBM Jsrun makes cuda Device numbering screwy and not match rank // IBM Jsrun makes cuda Device numbering screwy and not match rank
if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n"); if ( world_rank == 0 ) printf("AcceleratorCudaInit: IBM Summit or similar - NOT setting device to node rank\n");
#else #else
if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n"); if ( world_rank == 0 ) printf("AcceleratorCudaInit: setting device to node rank\n");
cudaSetDevice(rank); cudaSetDevice(rank);
#endif #endif
if ( world_rank == 0 ) printf("GpuInit: ================================================\n"); if ( world_rank == 0 ) printf("AcceleratorCudaInit: ================================================\n");
} }
#endif #endif
@ -92,17 +92,17 @@ void acceleratorInit(void)
for (int i = 0; i < nDevices; i++) { for (int i = 0; i < nDevices; i++) {
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
hipGetDeviceProperties(&gpu_props[i], i); hipGetDeviceProperties(&gpu_props[i], i);
if ( world_rank == 0) { if ( world_rank == 0) {
hipDeviceProp_t prop; hipDeviceProp_t prop;
prop = gpu_props[i]; prop = gpu_props[i];
printf("GpuInit: ========================\n"); printf("AcceleratorHipInit: ========================\n");
printf("GpuInit: Device Number : %d\n", i); printf("AcceleratorHipInit: Device Number : %d\n", i);
printf("GpuInit: ========================\n"); printf("AcceleratorHipInit: ========================\n");
printf("GpuInit: Device identifier: %s\n", prop.name); printf("AcceleratorHipInit: Device identifier: %s\n", prop.name);
// GPU_PROP(managedMemory); // GPU_PROP(managedMemory);
GPU_PROP(isMultiGpuBoard); GPU_PROP(isMultiGpuBoard);
@ -114,12 +114,12 @@ void acceleratorInit(void)
} }
#ifdef GRID_IBM_SUMMIT #ifdef GRID_IBM_SUMMIT
// IBM Jsrun makes cuda Device numbering screwy and not match rank // IBM Jsrun makes cuda Device numbering screwy and not match rank
if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n"); if ( world_rank == 0 ) printf("AcceleratorHipInit: IBM Summit or similar - NOT setting device to node rank\n");
#else #else
if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n"); if ( world_rank == 0 ) printf("AcceleratorHipInit: setting device to node rank\n");
cudaSetDevice(rank); hipSetDevice(rank);
#endif #endif
if ( world_rank == 0 ) printf("GpuInit: ================================================\n"); if ( world_rank == 0 ) printf("AcceleratorHipInit: ================================================\n");
} }
#endif #endif
@ -159,22 +159,22 @@ void acceleratorInit(void)
/* /*
for (int i = 0; i < nDevices; i++) { for (int i = 0; i < nDevices; i++) {
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorSyclInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
cudaGetDeviceProperties(&gpu_props[i], i); cudaGetDeviceProperties(&gpu_props[i], i);
if ( world_rank == 0) { if ( world_rank == 0) {
cudaDeviceProp prop; cudaDeviceProp prop;
prop = gpu_props[i]; prop = gpu_props[i];
printf("GpuInit: ========================\n"); printf("AcceleratorSyclInit: ========================\n");
printf("GpuInit: Device Number : %d\n", i); printf("AcceleratorSyclInit: Device Number : %d\n", i);
printf("GpuInit: ========================\n"); printf("AcceleratorSyclInit: ========================\n");
printf("GpuInit: Device identifier: %s\n", prop.name); printf("AcceleratorSyclInit: Device identifier: %s\n", prop.name);
} }
} }
*/ */
if ( world_rank == 0 ) { if ( world_rank == 0 ) {
printf("GpuInit: ================================================\n"); printf("AcceleratorSyclInit: ================================================\n");
} }
} }
#endif #endif

View File

@ -284,6 +284,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
inline void *acceleratorAllocShared(size_t bytes) inline void *acceleratorAllocShared(size_t bytes)
{ {
#if 0
void *ptr=NULL; void *ptr=NULL;
auto err = hipMallocManaged((void **)&ptr,bytes); auto err = hipMallocManaged((void **)&ptr,bytes);
if( err != hipSuccess ) { if( err != hipSuccess ) {
@ -291,6 +292,9 @@ inline void *acceleratorAllocShared(size_t bytes)
printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err)); printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err));
} }
return ptr; return ptr;
#else
return malloc(bytes);
#endif
}; };
inline void *acceleratorAllocDevice(size_t bytes) inline void *acceleratorAllocDevice(size_t bytes)
@ -304,10 +308,10 @@ inline void *acceleratorAllocDevice(size_t bytes)
return ptr; return ptr;
}; };
inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);}; inline void acceleratorFreeShared(void *ptr){ free(ptr);};
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);}; inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);} inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);} inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
#endif #endif
@ -379,5 +383,35 @@ accelerator_inline void acceleratorSynchronise(void)
#endif #endif
return; return;
} }
accelerator_inline void acceleratorSynchroniseAll(void)
{
#ifdef GRID_SIMT
#ifdef GRID_CUDA
__syncthreads();
#endif
#ifdef GRID_SYCL
// No barrier call on SYCL?? // Option get __spir:: stuff to do warp barrier
#endif
#ifdef GRID_HIP
__syncthreads();
#endif
#endif
return;
}
accelerator_inline void acceleratorFence(void)
{
#ifdef GRID_SIMT
#ifdef GRID_CUDA
__threadfence();
#endif
#ifdef GRID_SYCL
// FIXMEE
#endif
#ifdef GRID_HIP
__threadfence();
#endif
#endif
return;
}
NAMESPACE_END(Grid); NAMESPACE_END(Grid);