mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
UVM check in MPI calls
This commit is contained in:
parent
8244caff25
commit
a8309638d4
@ -309,15 +309,8 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
|||||||
int ierr;
|
int ierr;
|
||||||
|
|
||||||
// Enforce no UVM in comms, device or host OK
|
// Enforce no UVM in comms, device or host OK
|
||||||
int uvm;
|
assert(acceleratorIsCommunicable(xmit));
|
||||||
auto
|
assert(acceleratorIsCommunicable(recv));
|
||||||
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) xmit);
|
|
||||||
assert(cuerr == cudaSuccess );
|
|
||||||
assert(uvm==0);
|
|
||||||
|
|
||||||
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) recv);
|
|
||||||
assert(cuerr == cudaSuccess );
|
|
||||||
assert(uvm==0);
|
|
||||||
|
|
||||||
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
||||||
// printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);
|
// printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);
|
||||||
|
@ -70,6 +70,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
//
|
//
|
||||||
// Memory management:
|
// Memory management:
|
||||||
//
|
//
|
||||||
|
// int acceleratorIsCommunicable(void *pointer);
|
||||||
// void *acceleratorAllocShared(size_t bytes);
|
// void *acceleratorAllocShared(size_t bytes);
|
||||||
// void acceleratorFreeShared(void *ptr);
|
// void acceleratorFreeShared(void *ptr);
|
||||||
//
|
//
|
||||||
@ -166,6 +167,16 @@ inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
|||||||
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
|
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||||
|
inline int acceleratorIsCommunicable(void *ptr)
|
||||||
|
{
|
||||||
|
int uvm;
|
||||||
|
auto
|
||||||
|
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) ptr);
|
||||||
|
assert(cuerr == cudaSuccess );
|
||||||
|
if(uvm) return 0;
|
||||||
|
else return 1;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
@ -220,6 +231,15 @@ inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
|
|||||||
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||||
|
inline int acceleratorIsCommunicable(void *ptr)
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
|
||||||
|
if ( uvm = cl::sycl::usm::alloc::shared ) return 1;
|
||||||
|
else return 0;
|
||||||
|
#endif
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -299,6 +319,7 @@ inline void *acceleratorAllocShared(size_t bytes)
|
|||||||
return malloc(bytes);
|
return malloc(bytes);
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||||
|
|
||||||
inline void *acceleratorAllocDevice(size_t bytes)
|
inline void *acceleratorAllocDevice(size_t bytes)
|
||||||
{
|
{
|
||||||
@ -353,6 +374,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA spec
|
|||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
|
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
|
||||||
|
|
||||||
|
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
||||||
inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
||||||
|
@ -154,6 +154,7 @@ AC_ARG_ENABLE([accelerator],
|
|||||||
case ${ac_ACCELERATOR} in
|
case ${ac_ACCELERATOR} in
|
||||||
cuda)
|
cuda)
|
||||||
echo CUDA acceleration
|
echo CUDA acceleration
|
||||||
|
LIBS="${LIBS} -lcuda"
|
||||||
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
|
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
|
||||||
sycl)
|
sycl)
|
||||||
echo SYCL acceleration
|
echo SYCL acceleration
|
||||||
@ -323,7 +324,6 @@ case ${CXXTEST} in
|
|||||||
# CXXLD="nvcc -v -link"
|
# CXXLD="nvcc -v -link"
|
||||||
CXX="${CXXBASE} -x cu "
|
CXX="${CXXBASE} -x cu "
|
||||||
CXXLD="${CXXBASE} -link"
|
CXXLD="${CXXBASE} -link"
|
||||||
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
|
|
||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||||
if test $ac_openmp = yes; then
|
if test $ac_openmp = yes; then
|
||||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
||||||
@ -483,8 +483,7 @@ case ${ac_SHM} in
|
|||||||
LDFLAGS_CPY=$LDFLAGS
|
LDFLAGS_CPY=$LDFLAGS
|
||||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
AC_SEARCH_LIBS([shm_unlink], [rt], [],
|
AC_SEARCH_LIBS([shm_unlink], [rt], [],[AC_MSG_ERROR("no library found for shm_unlink")])
|
||||||
[AC_MSG_ERROR("no library found for shm_unlink")])
|
|
||||||
CXXFLAGS=$CXXFLAGS_CPY
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
LDFLAGS=$LDFLAGS_CPY
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
;;
|
;;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user