From b2087f14c48e881ab041bbaef7f6c444c88a895d Mon Sep 17 00:00:00 2001 From: Daniel Richtmann Date: Mon, 24 Aug 2020 16:54:36 +0200 Subject: [PATCH] Fix CoarsenedMatrix regarding illegal memory accesses Need a reference to geom since the lambda copies the this pointer which points to host memory, see - https://docs.nvidia.com/cuda/cuda-c-programming-guide/#star-this-capture - https://devblogs.nvidia.com/new-compiler-features-cuda-8/ --- Grid/algorithms/CoarsenedMatrix.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index d18fba43..ba40535c 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -310,6 +310,8 @@ public: Stencil.HaloExchange(in,compressor); autoView( in_v , in, AcceleratorRead); autoView( out_v , out, AcceleratorWrite); + autoView( Stencil_v , Stencil, AcceleratorRead); + auto& geom_v = geom; typedef LatticeView Aview; Vector AcceleratorViewContainer; @@ -331,14 +333,14 @@ public: int ptype; StencilEntry *SE; - for(int point=0;point_is_local) { nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); + nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); } acceleratorSynchronise(); @@ -382,6 +384,7 @@ public: autoView( out_v , out, AcceleratorWrite); autoView( in_v , in, AcceleratorRead); + autoView( Stencil_v , Stencil, AcceleratorRead); const int Nsimd = CComplex::Nsimd(); typedef decltype(coalescedRead(in_v[0])) calcVector; @@ -395,12 +398,12 @@ public: int ptype; StencilEntry *SE; - SE=Stencil.GetEntry(ptype,point,ss); + SE=Stencil_v.GetEntry(ptype,point,ss); if(SE->_is_local) { nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); + nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]); } acceleratorSynchronise();