diff --git a/Grid/tensors/Tensor_SIMT.h b/Grid/tensors/Tensor_SIMT.h index 2fbc0ccb..c18b8484 100644 --- a/Grid/tensors/Tensor_SIMT.h +++ b/Grid/tensors/Tensor_SIMT.h @@ -69,6 +69,11 @@ void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted, // vstream(vec, extracted); vec = extracted; } +template accelerator_inline +void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0) +{ + vstream(vec, extracted); +} #else accelerator_inline int SIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific @@ -92,6 +97,11 @@ void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & { insertLane(lane,vec,extracted); } +template accelerator_inline +void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0) +{ + insertLane(lane,vec,extracted); +} #endif