From c650bb3f3d56d4336c4f140277a548ed802680ce Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 16 Feb 2016 18:41:53 -0600 Subject: [PATCH] Very small merge speed up. --- lib/cshift/Cshift_common.h | 2 +- lib/tensors/Tensor_extract_merge.h | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/cshift/Cshift_common.h b/lib/cshift/Cshift_common.h index 82c5f124..70febc2e 100644 --- a/lib/cshift/Cshift_common.h +++ b/lib/cshift/Cshift_common.h @@ -103,7 +103,7 @@ PARALLEL_NESTED_LOOP2 for(int n=0;n_slice_stride[dimension]; + int o = n*rhs._grid->_slice_stride[dimension]; int offset = b+n*rhs._grid->_slice_block[dimension]; cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid); diff --git a/lib/tensors/Tensor_extract_merge.h b/lib/tensors/Tensor_extract_merge.h index 9fd780c5..4eb72254 100644 --- a/lib/tensors/Tensor_extract_merge.h +++ b/lib/tensors/Tensor_extract_merge.h @@ -227,17 +227,14 @@ void merge1(vobj &vec,std::vector &extracted,int const int Nsimd=vobj::vector_type::Nsimd(); const int words=sizeof(vobj)/sizeof(vector_type); - scalar_type *pointer; scalar_type *vp = (scalar_type *)&vec; // assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0); + for(int w=0;w inline