From d6ffadb33b7b59d3966926d2a8c0714d90e6781b Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 2 Jul 2019 17:25:13 +0100 Subject: [PATCH] Coalesced write --- Grid/tensors/Tensor_SIMT.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Grid/tensors/Tensor_SIMT.h b/Grid/tensors/Tensor_SIMT.h index 2fbc0ccb..c18b8484 100644 --- a/Grid/tensors/Tensor_SIMT.h +++ b/Grid/tensors/Tensor_SIMT.h @@ -69,6 +69,11 @@ void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted, // vstream(vec, extracted); vec = extracted; } +template accelerator_inline +void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0) +{ + vstream(vec, extracted); +} #else accelerator_inline int SIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific @@ -92,6 +97,11 @@ void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & { insertLane(lane,vec,extracted); } +template accelerator_inline +void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0) +{ + insertLane(lane,vec,extracted); +} #endif