mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Move excange into the stencil only; keep Cshift fully general
This commit is contained in:
		@@ -139,69 +139,6 @@ PARALLEL_NESTED_LOOP2
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
///////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
// Gather for when there *is* need to SIMD split with compression
 | 
					 | 
				
			||||||
///////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
template<class cobj,class vobj,class compressor> double
 | 
					 | 
				
			||||||
Gather_plane_exchange(const Lattice<vobj> &rhs,
 | 
					 | 
				
			||||||
		      std::vector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
					 | 
				
			||||||
  double t1,t2;
 | 
					 | 
				
			||||||
  if ( !rhs._grid->CheckerBoarded(dimension) ) {
 | 
					 | 
				
			||||||
    cbmask = 0x3;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
					 | 
				
			||||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
					 | 
				
			||||||
  int n1=rhs._grid->_slice_stride[dimension];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Need to switch to a table loop
 | 
					 | 
				
			||||||
  std::vector<std::pair<int,int> > table;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  if ( cbmask ==0x3){
 | 
					 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					 | 
				
			||||||
	int o      =   n*n1;
 | 
					 | 
				
			||||||
	int offset = b+n*e2;
 | 
					 | 
				
			||||||
	table.push_back(std::pair<int,int> (offset,o+b));
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  } else { 
 | 
					 | 
				
			||||||
    // Case of SIMD split AND checker dim cannot currently be hit, except in 
 | 
					 | 
				
			||||||
    // Test_cshift_red_black code.
 | 
					 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					 | 
				
			||||||
	int o=n*n1;
 | 
					 | 
				
			||||||
	int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
 | 
					 | 
				
			||||||
	int offset = b+n*e2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if ( ocb & cbmask ) {
 | 
					 | 
				
			||||||
	  table.push_back(std::pair<int,int> (offset,o+b));
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  assert( (table.size()&0x1)==0);
 | 
					 | 
				
			||||||
  t1=usecond();
 | 
					 | 
				
			||||||
PARALLEL_FOR_LOOP     
 | 
					 | 
				
			||||||
  for(int j=0;j<table.size()/2;j++){
 | 
					 | 
				
			||||||
    //    buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
 | 
					 | 
				
			||||||
    cobj temp1 =compress(rhs._odata[so+table[2*j].second]);
 | 
					 | 
				
			||||||
    cobj temp2 =compress(rhs._odata[so+table[2*j+1].second]);
 | 
					 | 
				
			||||||
    cobj temp3;
 | 
					 | 
				
			||||||
    cobj temp4;
 | 
					 | 
				
			||||||
    exchange(temp3,temp4,temp1,temp2,type);
 | 
					 | 
				
			||||||
    vstream(pointers[0][j],temp3);
 | 
					 | 
				
			||||||
    vstream(pointers[1][j],temp4);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  t2=usecond();
 | 
					 | 
				
			||||||
 return t2-t1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////
 | 
				
			||||||
// Gather for when there is no need to SIMD split
 | 
					// Gather for when there is no need to SIMD split
 | 
				
			||||||
//////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user