mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Move excange into the stencil only; keep Cshift fully general
This commit is contained in:
parent
37720c4db7
commit
41009cc142
@ -139,69 +139,6 @@ PARALLEL_NESTED_LOOP2
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there *is* need to SIMD split with compression
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class cobj,class vobj,class compressor> double
|
||||
Gather_plane_exchange(const Lattice<vobj> &rhs,
|
||||
std::vector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
double t1,t2;
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
cbmask = 0x3;
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int n1=rhs._grid->_slice_stride[dimension];
|
||||
|
||||
// Need to switch to a table loop
|
||||
std::vector<std::pair<int,int> > table;
|
||||
|
||||
if ( cbmask ==0x3){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*n1;
|
||||
int offset = b+n*e2;
|
||||
table.push_back(std::pair<int,int> (offset,o+b));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||
// Test_cshift_red_black code.
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o=n*n1;
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
int offset = b+n*e2;
|
||||
|
||||
if ( ocb & cbmask ) {
|
||||
table.push_back(std::pair<int,int> (offset,o+b));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert( (table.size()&0x1)==0);
|
||||
t1=usecond();
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int j=0;j<table.size()/2;j++){
|
||||
// buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
|
||||
cobj temp1 =compress(rhs._odata[so+table[2*j].second]);
|
||||
cobj temp2 =compress(rhs._odata[so+table[2*j+1].second]);
|
||||
cobj temp3;
|
||||
cobj temp4;
|
||||
exchange(temp3,temp4,temp1,temp2,type);
|
||||
vstream(pointers[0][j],temp3);
|
||||
vstream(pointers[1][j],temp4);
|
||||
}
|
||||
t2=usecond();
|
||||
return t2-t1;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Gather for when there is no need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
|
Loading…
Reference in New Issue
Block a user