1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

UVM check in MPI calls

This commit is contained in:
Peter Boyle 2020-09-03 20:29:26 -04:00
parent 8244caff25
commit a8309638d4
3 changed files with 26 additions and 12 deletions

View File

@ -309,15 +309,8 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
int ierr;
// Enforce no UVM in comms, device or host OK
int uvm;
auto
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) xmit);
assert(cuerr == cudaSuccess );
assert(uvm==0);
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) recv);
assert(cuerr == cudaSuccess );
assert(uvm==0);
assert(acceleratorIsCommunicable(xmit));
assert(acceleratorIsCommunicable(recv));
// Give the CPU to MPI immediately; can use threads to overlap optionally
// printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);

View File

@ -70,6 +70,7 @@ NAMESPACE_BEGIN(Grid);
//
// Memory management:
//
// int acceleratorIsCommunicable(void *pointer);
// void *acceleratorAllocShared(size_t bytes);
// void acceleratorFreeShared(void *ptr);
//
@ -166,6 +167,16 @@ inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
inline int acceleratorIsCommunicable(void *ptr)
{
int uvm;
auto
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) ptr);
assert(cuerr == cudaSuccess );
if(uvm) return 0;
else return 1;
}
#endif
//////////////////////////////////////////////
@ -220,6 +231,15 @@ inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
inline int acceleratorIsCommunicable(void *ptr)
{
#if 0
auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
if ( uvm = cl::sycl::usm::alloc::shared ) return 1;
else return 0;
#endif
return 1;
}
#endif
@ -299,6 +319,7 @@ inline void *acceleratorAllocShared(size_t bytes)
return malloc(bytes);
#endif
};
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
inline void *acceleratorAllocDevice(size_t bytes)
{
@ -353,6 +374,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA spec
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
#ifdef HAVE_MM_MALLOC_H
inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};

View File

@ -154,6 +154,7 @@ AC_ARG_ENABLE([accelerator],
case ${ac_ACCELERATOR} in
cuda)
echo CUDA acceleration
LIBS="${LIBS} -lcuda"
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
sycl)
echo SYCL acceleration
@ -323,7 +324,6 @@ case ${CXXTEST} in
# CXXLD="nvcc -v -link"
CXX="${CXXBASE} -x cu "
CXXLD="${CXXBASE} -link"
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
if test $ac_openmp = yes; then
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
@ -483,8 +483,7 @@ case ${ac_SHM} in
LDFLAGS_CPY=$LDFLAGS
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
AC_SEARCH_LIBS([shm_unlink], [rt], [],
[AC_MSG_ERROR("no library found for shm_unlink")])
AC_SEARCH_LIBS([shm_unlink], [rt], [],[AC_MSG_ERROR("no library found for shm_unlink")])
CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY
;;