diff --git a/lib/tensors/Tensor_extract_merge.h b/lib/tensors/Tensor_extract_merge.h index a055ae22..13bb1df1 100644 --- a/lib/tensors/Tensor_extract_merge.h +++ b/lib/tensors/Tensor_extract_merge.h @@ -31,6 +31,10 @@ Author: Christopher Kelly /* END LEGAL */ #pragma once +#include + +//#pragma GCC optimize("no-strict-aliasing") + NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////// @@ -44,7 +48,6 @@ template using ExtractBuffer = AcceleratorVector<__T ,GRID_MAX //void extract(const vobj &vec,ExtractPointerArray &extracted, int offset); //void merge(vobj &vec,ExtractBuffer &extracted) //void merge(vobj &vec,ExtractPointerArray &extracted) -// //////////////////////////////////////////////////////////////////////// // Extract to contiguous array scalar object @@ -60,12 +63,15 @@ void extract(const vobj &vec,ExtractBuffer &extracted) const int Nsimd=vector_type::Nsimd(); const int Nextr=extracted.size(); const int s=Nsimd/Nextr; - - sobj_scalar_type *sp = (sobj_scalar_type *)&extracted[0]; + sobj_scalar_type *sp = (sobj_scalar_type *) &extracted[0]; scalar_type *vp = (scalar_type *)&vec; + scalar_type vtmp; + sobj_scalar_type stmp; for(int w=0;w &extracted) sobj_scalar_type *sp = (sobj_scalar_type *)&extracted[0]; scalar_type *vp = (scalar_type *)&vec; + scalar_type vtmp; + sobj_scalar_type stmp; for(int w=0;w &extracted, int offset) const int s = Nsimd/Nextr; scalar_type * vp = (scalar_type *)&vec; - + scalar_type vtmp; + sobj_scalar_type stmp; for(int w=0;w &extracted, int offset) const int s = Nsimd/Nextr; scalar_type * vp = (scalar_type *)&vec; + scalar_type vtmp; + sobj_scalar_type stmp; for(int w=0;w