mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
UVM check in MPI calls
This commit is contained in:
parent
8244caff25
commit
a8309638d4
@ -309,15 +309,8 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int ierr;
|
||||
|
||||
// Enforce no UVM in comms, device or host OK
|
||||
int uvm;
|
||||
auto
|
||||
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) xmit);
|
||||
assert(cuerr == cudaSuccess );
|
||||
assert(uvm==0);
|
||||
|
||||
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) recv);
|
||||
assert(cuerr == cudaSuccess );
|
||||
assert(uvm==0);
|
||||
assert(acceleratorIsCommunicable(xmit));
|
||||
assert(acceleratorIsCommunicable(recv));
|
||||
|
||||
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
||||
// printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);
|
||||
|
@ -70,6 +70,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
//
|
||||
// Memory management:
|
||||
//
|
||||
// int acceleratorIsCommunicable(void *pointer);
|
||||
// void *acceleratorAllocShared(size_t bytes);
|
||||
// void acceleratorFreeShared(void *ptr);
|
||||
//
|
||||
@ -166,6 +167,16 @@ inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
||||
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||
inline int acceleratorIsCommunicable(void *ptr)
|
||||
{
|
||||
int uvm;
|
||||
auto
|
||||
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) ptr);
|
||||
assert(cuerr == cudaSuccess );
|
||||
if(uvm) return 0;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////
|
||||
@ -220,6 +231,15 @@ inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline int acceleratorIsCommunicable(void *ptr)
|
||||
{
|
||||
#if 0
|
||||
auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
|
||||
if ( uvm = cl::sycl::usm::alloc::shared ) return 1;
|
||||
else return 0;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -299,6 +319,7 @@ inline void *acceleratorAllocShared(size_t bytes)
|
||||
return malloc(bytes);
|
||||
#endif
|
||||
};
|
||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||
|
||||
inline void *acceleratorAllocDevice(size_t bytes)
|
||||
{
|
||||
@ -353,6 +374,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA spec
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
|
||||
|
||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
||||
inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
||||
|
@ -154,6 +154,7 @@ AC_ARG_ENABLE([accelerator],
|
||||
case ${ac_ACCELERATOR} in
|
||||
cuda)
|
||||
echo CUDA acceleration
|
||||
LIBS="${LIBS} -lcuda"
|
||||
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
|
||||
sycl)
|
||||
echo SYCL acceleration
|
||||
@ -323,7 +324,6 @@ case ${CXXTEST} in
|
||||
# CXXLD="nvcc -v -link"
|
||||
CXX="${CXXBASE} -x cu "
|
||||
CXXLD="${CXXBASE} -link"
|
||||
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
|
||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||
if test $ac_openmp = yes; then
|
||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
||||
@ -483,8 +483,7 @@ case ${ac_SHM} in
|
||||
LDFLAGS_CPY=$LDFLAGS
|
||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||
AC_SEARCH_LIBS([shm_unlink], [rt], [],
|
||||
[AC_MSG_ERROR("no library found for shm_unlink")])
|
||||
AC_SEARCH_LIBS([shm_unlink], [rt], [],[AC_MSG_ERROR("no library found for shm_unlink")])
|
||||
CXXFLAGS=$CXXFLAGS_CPY
|
||||
LDFLAGS=$LDFLAGS_CPY
|
||||
;;
|
||||
|
Loading…
Reference in New Issue
Block a user