mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Very small merge speed up.
This commit is contained in:
		@@ -103,7 +103,7 @@ PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 | 
			
		||||
	int o=n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int o      =   n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int offset = b+n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
	cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
 
 | 
			
		||||
@@ -227,17 +227,14 @@ void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
 | 
			
		||||
  const int Nsimd=vobj::vector_type::Nsimd();
 | 
			
		||||
  const int words=sizeof(vobj)/sizeof(vector_type);
 | 
			
		||||
 | 
			
		||||
  scalar_type *pointer;
 | 
			
		||||
  scalar_type *vp = (scalar_type *)&vec;
 | 
			
		||||
 | 
			
		||||
  //  assert( (((uint64_t)vp)&(sizeof(scalar_type)-1)) == 0);
 | 
			
		||||
 | 
			
		||||
  for(int w=0;w<words;w++){
 | 
			
		||||
  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
    pointer=(scalar_type *)&extracted[i][offset];
 | 
			
		||||
    for(int w=0;w<words;w++){
 | 
			
		||||
      vp[w*Nsimd+i] = pointer[w];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
      vp[w*Nsimd+i] = ((scalar_type *)&extracted[i][offset])[w];
 | 
			
		||||
  }}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj> inline 
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user