1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-18 07:47:06 +01:00

Memory verbose and tracking, shrink default cache

Print PCI device IDs on node 0
This commit is contained in:
Peter Boyle
2021-10-05 11:41:03 -04:00
parent cffc736bb3
commit 8ed0b57b09
5 changed files with 98 additions and 25 deletions

View File

@ -74,11 +74,13 @@ void acceleratorInit(void)
// GPU_PROP(singleToDoublePrecisionPerfRatio);
}
}
MemoryManager::DeviceMaxBytes = (8*totalDeviceMem)/10; // Assume 80% ours
#undef GPU_PROP_FMT
#undef GPU_PROP
#ifdef GRID_DEFAULT_GPU
int device = 0;
// IBM Jsrun makes cuda Device numbering screwy and not match rank
if ( world_rank == 0 ) {
printf("AcceleratorCudaInit: using default device \n");
@ -87,10 +89,20 @@ void acceleratorInit(void)
printf("AcceleratorCudaInit: Configure options --enable-setdevice=no \n");
}
#else
int device = rank;
printf("AcceleratorCudaInit: rank %d setting device to node rank %d\n",world_rank,rank);
printf("AcceleratorCudaInit: Configure options --enable-setdevice=yes \n");
cudaSetDevice(rank);
#endif
cudaSetDevice(device);
const int len=64;
char busid[len];
if( rank == world_rank ) {
cudaDeviceGetPCIBusId(busid, len, device);
printf("local rank %d device %d bus id: %s\n", rank, device, busid);
}
if ( world_rank == 0 ) printf("AcceleratorCudaInit: ================================================\n");
}
#endif

View File

@ -115,6 +115,14 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#endif
} // CUDA specific
inline void cuda_mem(void)
{
size_t free_t,total_t,used_t;
cudaMemGetInfo(&free_t,&total_t);
used_t=total_t-free_t;
std::cout << " MemoryManager : GPU used "<<used_t<<" free "<<free_t<< " total "<<total_t<<std::endl;
}
#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \
{ \
int nt=acceleratorThreads(); \
@ -125,7 +133,11 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
}; \
dim3 cu_threads(nsimd,acceleratorThreads(),1); \
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
std::cout << "========================== CUDA KERNEL CALL\n"; \
cuda_mem(); \
LambdaApply<<<cu_blocks,cu_threads>>>(num1,num2,nsimd,lambda); \
cuda_mem(); \
std::cout << "========================== CUDA KERNEL DONE\n"; \
}
#define accelerator_for6dNB(iter1, num1, \