mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-11 14:40:46 +01:00
Updates for HiP
This commit is contained in:
parent
92b342a477
commit
32be2b13d3
@ -33,17 +33,17 @@ void acceleratorInit(void)
|
|||||||
|
|
||||||
for (int i = 0; i < nDevices; i++) {
|
for (int i = 0; i < nDevices; i++) {
|
||||||
|
|
||||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||||
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
||||||
|
|
||||||
cudaGetDeviceProperties(&gpu_props[i], i);
|
cudaGetDeviceProperties(&gpu_props[i], i);
|
||||||
if ( world_rank == 0) {
|
if ( world_rank == 0) {
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
prop = gpu_props[i];
|
prop = gpu_props[i];
|
||||||
printf("GpuInit: ========================\n");
|
printf("AcceleratorCudaInit: ========================\n");
|
||||||
printf("GpuInit: Device Number : %d\n", i);
|
printf("AcceleratorCudaInit: Device Number : %d\n", i);
|
||||||
printf("GpuInit: ========================\n");
|
printf("AcceleratorCudaInit: ========================\n");
|
||||||
printf("GpuInit: Device identifier: %s\n", prop.name);
|
printf("AcceleratorCudaInit: Device identifier: %s\n", prop.name);
|
||||||
|
|
||||||
GPU_PROP(managedMemory);
|
GPU_PROP(managedMemory);
|
||||||
GPU_PROP(isMultiGpuBoard);
|
GPU_PROP(isMultiGpuBoard);
|
||||||
@ -55,12 +55,12 @@ void acceleratorInit(void)
|
|||||||
}
|
}
|
||||||
#ifdef GRID_IBM_SUMMIT
|
#ifdef GRID_IBM_SUMMIT
|
||||||
// IBM Jsrun makes cuda Device numbering screwy and not match rank
|
// IBM Jsrun makes cuda Device numbering screwy and not match rank
|
||||||
if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n");
|
if ( world_rank == 0 ) printf("AcceleratorCudaInit: IBM Summit or similar - NOT setting device to node rank\n");
|
||||||
#else
|
#else
|
||||||
if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n");
|
if ( world_rank == 0 ) printf("AcceleratorCudaInit: setting device to node rank\n");
|
||||||
cudaSetDevice(rank);
|
cudaSetDevice(rank);
|
||||||
#endif
|
#endif
|
||||||
if ( world_rank == 0 ) printf("GpuInit: ================================================\n");
|
if ( world_rank == 0 ) printf("AcceleratorCudaInit: ================================================\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -92,17 +92,17 @@ void acceleratorInit(void)
|
|||||||
|
|
||||||
for (int i = 0; i < nDevices; i++) {
|
for (int i = 0; i < nDevices; i++) {
|
||||||
|
|
||||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||||
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
||||||
|
|
||||||
hipGetDeviceProperties(&gpu_props[i], i);
|
hipGetDeviceProperties(&gpu_props[i], i);
|
||||||
if ( world_rank == 0) {
|
if ( world_rank == 0) {
|
||||||
hipDeviceProp_t prop;
|
hipDeviceProp_t prop;
|
||||||
prop = gpu_props[i];
|
prop = gpu_props[i];
|
||||||
printf("GpuInit: ========================\n");
|
printf("AcceleratorHipInit: ========================\n");
|
||||||
printf("GpuInit: Device Number : %d\n", i);
|
printf("AcceleratorHipInit: Device Number : %d\n", i);
|
||||||
printf("GpuInit: ========================\n");
|
printf("AcceleratorHipInit: ========================\n");
|
||||||
printf("GpuInit: Device identifier: %s\n", prop.name);
|
printf("AcceleratorHipInit: Device identifier: %s\n", prop.name);
|
||||||
|
|
||||||
// GPU_PROP(managedMemory);
|
// GPU_PROP(managedMemory);
|
||||||
GPU_PROP(isMultiGpuBoard);
|
GPU_PROP(isMultiGpuBoard);
|
||||||
@ -114,12 +114,12 @@ void acceleratorInit(void)
|
|||||||
}
|
}
|
||||||
#ifdef GRID_IBM_SUMMIT
|
#ifdef GRID_IBM_SUMMIT
|
||||||
// IBM Jsrun makes cuda Device numbering screwy and not match rank
|
// IBM Jsrun makes cuda Device numbering screwy and not match rank
|
||||||
if ( world_rank == 0 ) printf("GpuInit: IBM Summit or similar - NOT setting device to node rank\n");
|
if ( world_rank == 0 ) printf("AcceleratorHipInit: IBM Summit or similar - NOT setting device to node rank\n");
|
||||||
#else
|
#else
|
||||||
if ( world_rank == 0 ) printf("GpuInit: setting device to node rank\n");
|
if ( world_rank == 0 ) printf("AcceleratorHipInit: setting device to node rank\n");
|
||||||
cudaSetDevice(rank);
|
hipSetDevice(rank);
|
||||||
#endif
|
#endif
|
||||||
if ( world_rank == 0 ) printf("GpuInit: ================================================\n");
|
if ( world_rank == 0 ) printf("AcceleratorHipInit: ================================================\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -159,22 +159,22 @@ void acceleratorInit(void)
|
|||||||
/*
|
/*
|
||||||
for (int i = 0; i < nDevices; i++) {
|
for (int i = 0; i < nDevices; i++) {
|
||||||
|
|
||||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("GpuInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorSyclInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||||
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
|
||||||
|
|
||||||
cudaGetDeviceProperties(&gpu_props[i], i);
|
cudaGetDeviceProperties(&gpu_props[i], i);
|
||||||
if ( world_rank == 0) {
|
if ( world_rank == 0) {
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
prop = gpu_props[i];
|
prop = gpu_props[i];
|
||||||
printf("GpuInit: ========================\n");
|
printf("AcceleratorSyclInit: ========================\n");
|
||||||
printf("GpuInit: Device Number : %d\n", i);
|
printf("AcceleratorSyclInit: Device Number : %d\n", i);
|
||||||
printf("GpuInit: ========================\n");
|
printf("AcceleratorSyclInit: ========================\n");
|
||||||
printf("GpuInit: Device identifier: %s\n", prop.name);
|
printf("AcceleratorSyclInit: Device identifier: %s\n", prop.name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
if ( world_rank == 0 ) {
|
if ( world_rank == 0 ) {
|
||||||
printf("GpuInit: ================================================\n");
|
printf("AcceleratorSyclInit: ================================================\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -284,6 +284,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
|||||||
|
|
||||||
inline void *acceleratorAllocShared(size_t bytes)
|
inline void *acceleratorAllocShared(size_t bytes)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
void *ptr=NULL;
|
void *ptr=NULL;
|
||||||
auto err = hipMallocManaged((void **)&ptr,bytes);
|
auto err = hipMallocManaged((void **)&ptr,bytes);
|
||||||
if( err != hipSuccess ) {
|
if( err != hipSuccess ) {
|
||||||
@ -291,6 +292,9 @@ inline void *acceleratorAllocShared(size_t bytes)
|
|||||||
printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err));
|
printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err));
|
||||||
}
|
}
|
||||||
return ptr;
|
return ptr;
|
||||||
|
#else
|
||||||
|
return malloc(bytes);
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void *acceleratorAllocDevice(size_t bytes)
|
inline void *acceleratorAllocDevice(size_t bytes)
|
||||||
@ -304,10 +308,10 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
|||||||
return ptr;
|
return ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeShared(void *ptr){ free(ptr);};
|
||||||
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -379,5 +383,35 @@ accelerator_inline void acceleratorSynchronise(void)
|
|||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
accelerator_inline void acceleratorSynchroniseAll(void)
|
||||||
|
{
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
#ifdef GRID_CUDA
|
||||||
|
__syncthreads();
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_SYCL
|
||||||
|
// No barrier call on SYCL?? // Option get __spir:: stuff to do warp barrier
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_HIP
|
||||||
|
__syncthreads();
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
accelerator_inline void acceleratorFence(void)
|
||||||
|
{
|
||||||
|
#ifdef GRID_SIMT
|
||||||
|
#ifdef GRID_CUDA
|
||||||
|
__threadfence();
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_SYCL
|
||||||
|
// FIXMEE
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_HIP
|
||||||
|
__threadfence();
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user