From b8f7bfbb26c5249c96f8c9622e415c1321761293 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 1 Jul 2019 07:30:25 +0100 Subject: [PATCH] Dont stream as poor perf in some cases --- Grid/tensors/Tensor_SIMT.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Grid/tensors/Tensor_SIMT.h b/Grid/tensors/Tensor_SIMT.h index 8ce2d325..2fbc0ccb 100644 --- a/Grid/tensors/Tensor_SIMT.h +++ b/Grid/tensors/Tensor_SIMT.h @@ -35,7 +35,8 @@ NAMESPACE_BEGIN(Grid); accelerator_inline void synchronise(void) { #ifdef __CUDA_ARCH__ - __syncthreads(); +// __syncthreads(); + __syncwarp(); #endif return; } @@ -65,7 +66,8 @@ vobj coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int template accelerator_inline void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0) { - vstream(vec, extracted); + // vstream(vec, extracted); + vec = extracted; } #else accelerator_inline int SIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific