mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Copy stream HIP improvements
This commit is contained in:
parent
806b02bddf
commit
2cb5bedc15
@ -1,6 +1,7 @@
|
|||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
int world_rank; // Use to control world rank for print guarding
|
||||||
int acceleratorAbortOnGpuError=1;
|
int acceleratorAbortOnGpuError=1;
|
||||||
uint32_t accelerator_threads=2;
|
uint32_t accelerator_threads=2;
|
||||||
uint32_t acceleratorThreads(void) {return accelerator_threads;};
|
uint32_t acceleratorThreads(void) {return accelerator_threads;};
|
||||||
@ -16,7 +17,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
|
|||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
cudaDeviceProp *gpu_props;
|
cudaDeviceProp *gpu_props;
|
||||||
cudaStream_t copyStream;
|
cudaStream_t copyStream;
|
||||||
cudaStream_t cpuStream;
|
cudaStream_t computeStream;
|
||||||
void acceleratorInit(void)
|
void acceleratorInit(void)
|
||||||
{
|
{
|
||||||
int nDevices = 1;
|
int nDevices = 1;
|
||||||
@ -24,7 +25,8 @@ void acceleratorInit(void)
|
|||||||
gpu_props = new cudaDeviceProp[nDevices];
|
gpu_props = new cudaDeviceProp[nDevices];
|
||||||
|
|
||||||
char * localRankStr = NULL;
|
char * localRankStr = NULL;
|
||||||
int rank = 0, world_rank=0;
|
int rank = 0;
|
||||||
|
world_rank=0;
|
||||||
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
|
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
@ -99,7 +101,7 @@ void acceleratorInit(void)
|
|||||||
|
|
||||||
cudaSetDevice(device);
|
cudaSetDevice(device);
|
||||||
cudaStreamCreate(©Stream);
|
cudaStreamCreate(©Stream);
|
||||||
cudaStreamCreate(&cpuStream);
|
cudaStreamCreate(&computeStream);
|
||||||
const int len=64;
|
const int len=64;
|
||||||
char busid[len];
|
char busid[len];
|
||||||
if( rank == world_rank ) {
|
if( rank == world_rank ) {
|
||||||
@ -114,7 +116,7 @@ void acceleratorInit(void)
|
|||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
hipDeviceProp_t *gpu_props;
|
hipDeviceProp_t *gpu_props;
|
||||||
hipStream_t copyStream;
|
hipStream_t copyStream;
|
||||||
hipStream_t cpuStream;
|
hipStream_t computeStream;
|
||||||
void acceleratorInit(void)
|
void acceleratorInit(void)
|
||||||
{
|
{
|
||||||
int nDevices = 1;
|
int nDevices = 1;
|
||||||
@ -122,7 +124,8 @@ void acceleratorInit(void)
|
|||||||
gpu_props = new hipDeviceProp_t[nDevices];
|
gpu_props = new hipDeviceProp_t[nDevices];
|
||||||
|
|
||||||
char * localRankStr = NULL;
|
char * localRankStr = NULL;
|
||||||
int rank = 0, world_rank=0;
|
int rank = 0;
|
||||||
|
world_rank=0;
|
||||||
// We extract the local rank initialization using an environment variable
|
// We extract the local rank initialization using an environment variable
|
||||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
||||||
{
|
{
|
||||||
@ -183,7 +186,7 @@ void acceleratorInit(void)
|
|||||||
#endif
|
#endif
|
||||||
hipSetDevice(device);
|
hipSetDevice(device);
|
||||||
hipStreamCreate(©Stream);
|
hipStreamCreate(©Stream);
|
||||||
hipStreamCreate(&cpuStream);
|
hipStreamCreate(&computeStream);
|
||||||
const int len=64;
|
const int len=64;
|
||||||
char busid[len];
|
char busid[len];
|
||||||
if( rank == world_rank ) {
|
if( rank == world_rank ) {
|
||||||
@ -210,7 +213,8 @@ void acceleratorInit(void)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
char * localRankStr = NULL;
|
char * localRankStr = NULL;
|
||||||
int rank = 0, world_rank=0;
|
int rank = 0;
|
||||||
|
world_rank=0;
|
||||||
|
|
||||||
// We extract the local rank initialization using an environment variable
|
// We extract the local rank initialization using an environment variable
|
||||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
||||||
|
@ -370,7 +370,8 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
|||||||
num1,num2,nsimd, lambda); \
|
num1,num2,nsimd, lambda); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
// Works with MPI if barrier here
|
||||||
|
// accelerator_barrier();
|
||||||
|
|
||||||
template<typename lambda> __global__
|
template<typename lambda> __global__
|
||||||
__launch_bounds__(64,1)
|
__launch_bounds__(64,1)
|
||||||
@ -400,7 +401,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
|||||||
|
|
||||||
#define accelerator_barrier(dummy) \
|
#define accelerator_barrier(dummy) \
|
||||||
{ \
|
{ \
|
||||||
hipStreamSynchronize(cpuStream); \
|
hipDeviceSynchronize(); \
|
||||||
auto err = hipGetLastError(); \
|
auto err = hipGetLastError(); \
|
||||||
if ( err != hipSuccess ) { \
|
if ( err != hipSuccess ) { \
|
||||||
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
||||||
@ -443,7 +444,7 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
|
|||||||
|
|
||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||||
{
|
{
|
||||||
hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);
|
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
||||||
}
|
}
|
||||||
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user