mirror of
https://github.com/paboyle/Grid.git
synced 2025-10-24 09:44:47 +01:00
Merge branch 'feature/gpu-port' of https://github.com/paboyle/Grid into feature/gpu-port
This commit is contained in:
@@ -239,48 +239,62 @@ static int Grid_is_initialised;
|
|||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// Reinit guard
|
// Reinit guard
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
#ifdef GRID_NVCC
|
|
||||||
void GridGpuInit(void)
|
void GridGpuInit(void)
|
||||||
{
|
{
|
||||||
int nDevices;
|
#ifdef GRID_NVCC
|
||||||
|
int nDevices = 1;
|
||||||
cudaGetDeviceCount(&nDevices);
|
cudaGetDeviceCount(&nDevices);
|
||||||
|
|
||||||
|
char * localRankStr = NULL;
|
||||||
|
|
||||||
|
int rank = 0, device = 0, world_rank=0;
|
||||||
|
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"
|
||||||
|
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
|
||||||
|
#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK"
|
||||||
|
#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK"
|
||||||
|
// We extract the local rank initialization using an environment variable
|
||||||
|
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
||||||
|
{
|
||||||
|
rank = atoi(localRankStr);
|
||||||
|
device = rank %nDevices;
|
||||||
|
}
|
||||||
|
if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL)
|
||||||
|
{
|
||||||
|
rank = atoi(localRankStr);
|
||||||
|
device = rank %nDevices;
|
||||||
|
}
|
||||||
|
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
|
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
|
|
||||||
|
|
||||||
|
cudaSetDevice(device);
|
||||||
|
|
||||||
for (int i = 0; i < nDevices; i++) {
|
for (int i = 0; i < nDevices; i++) {
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
cudaGetDeviceProperties(&prop, i);
|
cudaGetDeviceProperties(&prop, i);
|
||||||
/*
|
|
||||||
printf("Device Number: %d\n", i);
|
if ( world_rank == 0) {
|
||||||
printf(" Device name: %s\n", prop.name);
|
printf("Device Number: %d\n", i);
|
||||||
printf(" Memory Clock Rate (KHz): %d\n",
|
printf(" Device name: %s\n", prop.name);
|
||||||
prop.memoryClockRate);
|
printf(" Peak Memory Bandwidth (GB/s): %f\n\n",2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
|
||||||
printf(" Memory Bus Width (bits): %d\n",
|
|
||||||
prop.memoryBusWidth);
|
|
||||||
printf(" Peak Memory Bandwidth (GB/s): %f\n\n",
|
|
||||||
2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
|
|
||||||
|
|
||||||
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf(" " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf(" " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
|
||||||
#define GPU_PROP(canMapHostMemory) printf(" " #canMapHostMemory ": %d \n",prop.canMapHostMemory);
|
#define GPU_PROP(canMapHostMemory) printf(" " #canMapHostMemory ": %d \n",prop.canMapHostMemory);
|
||||||
GPU_PROP(canMapHostMemory);
|
|
||||||
GPU_PROP(canUseHostPointerForRegisteredMem);
|
|
||||||
GPU_PROP(globalL1CacheSupported);
|
|
||||||
GPU_PROP(isMultiGpuBoard);
|
GPU_PROP(isMultiGpuBoard);
|
||||||
GPU_PROP(kernelExecTimeoutEnabled);
|
|
||||||
GPU_PROP(l2CacheSize);
|
GPU_PROP(l2CacheSize);
|
||||||
GPU_PROP(managedMemory);
|
GPU_PROP(managedMemory);
|
||||||
GPU_PROP(pageableMemoryAccess);
|
|
||||||
GPU_PROP(regsPerMultiprocessor);
|
|
||||||
GPU_PROP_FMT(sharedMemPerBlock,"%lx");
|
|
||||||
GPU_PROP_FMT(sharedMemPerMultiprocessor,"%lx");
|
|
||||||
GPU_PROP(singleToDoublePrecisionPerfRatio);
|
GPU_PROP(singleToDoublePrecisionPerfRatio);
|
||||||
GPU_PROP(unifiedAddressing);
|
GPU_PROP(unifiedAddressing);
|
||||||
GPU_PROP(warpSize);
|
GPU_PROP(warpSize);
|
||||||
*/
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void Grid_init(int *argc,char ***argv)
|
void Grid_init(int *argc,char ***argv)
|
||||||
{
|
{
|
||||||
|
GridGpuInit(); // Must come first to set device prior to MPI init
|
||||||
|
|
||||||
assert(Grid_is_initialised == 0);
|
assert(Grid_is_initialised == 0);
|
||||||
|
|
||||||
GridLogger::GlobalStopWatch.Start();
|
GridLogger::GlobalStopWatch.Start();
|
||||||
@@ -307,9 +321,6 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
Grid_debug_handler_init();
|
Grid_debug_handler_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GRID_NVCC
|
|
||||||
GridGpuInit();
|
|
||||||
#endif
|
|
||||||
CartesianCommunicator::Init(argc,argv);
|
CartesianCommunicator::Init(argc,argv);
|
||||||
|
|
||||||
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
||||||
|
@@ -471,7 +471,9 @@ esac
|
|||||||
case ${ac_COMMS} in
|
case ${ac_COMMS} in
|
||||||
*-auto)
|
*-auto)
|
||||||
LX_FIND_MPI
|
LX_FIND_MPI
|
||||||
## if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["The configure could not find the MPI compilation flags. N.B. The -auto mode is not supported by Cray wrappers. Use the non -auto version in this case."]); fi
|
echo MPI_CXXFLAGS $MPI_CXXFLAGS
|
||||||
|
echo MPI_CXXLDFLAGS $MPI_CXXLDFLAGS
|
||||||
|
echo MPI_CFLAGS $MPI_CFLAGS
|
||||||
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
|
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
|
||||||
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
|
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
|
||||||
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
|
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
|
||||||
|
Reference in New Issue
Block a user