1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-10-24 01:34:47 +01:00

Very small merge speed up.

This commit is contained in:
Peter Boyle
2016-02-16 18:41:53 -06:00
parent 81395e85d1
commit c650bb3f3d
2 changed files with 4 additions and 7 deletions

View File

@@ -103,7 +103,7 @@ PARALLEL_NESTED_LOOP2
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o=n*rhs._grid->_slice_stride[dimension];
int o = n*rhs._grid->_slice_stride[dimension];
int offset = b+n*rhs._grid->_slice_block[dimension];
cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);

View File

@@ -227,17 +227,14 @@ void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
const int Nsimd=vobj::vector_type::Nsimd();
const int words=sizeof(vobj)/sizeof(vector_type);
scalar_type *pointer;
scalar_type *vp = (scalar_type *)&vec;
// assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
for(int w=0;w<words;w++){
for(int i=0;i<Nsimd;i++){
pointer=(scalar_type *)&extracted[i][offset];
for(int w=0;w<words;w++){
vp[w*Nsimd+i] = pointer[w];
}
}
vp[w*Nsimd+i] = ((scalar_type *)&extracted[i][offset])[w];
}}
}
template<class vobj> inline