1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 13:40:46 +01:00

Faster gather exchange

This commit is contained in:
paboyle 2017-02-16 23:52:22 +00:00
parent 5c0adf7bf2
commit 8a29c16bde

View File

@ -142,12 +142,12 @@ PARALLEL_NESTED_LOOP2
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split with compression // Gather for when there *is* need to SIMD split with compression
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor> void template<class cobj,class vobj,class compressor> double
Gather_plane_exchange(const Lattice<vobj> &rhs, Gather_plane_exchange(const Lattice<vobj> &rhs,
std::vector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type) std::vector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type)
{ {
int rd = rhs._grid->_rdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension];
double t1,t2;
if ( !rhs._grid->CheckerBoarded(dimension) ) { if ( !rhs._grid->CheckerBoarded(dimension) ) {
cbmask = 0x3; cbmask = 0x3;
} }
@ -186,13 +186,20 @@ Gather_plane_exchange(const Lattice<vobj> &rhs,
} }
assert( (table.size()&0x1)==0); assert( (table.size()&0x1)==0);
t1=usecond();
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int j=0;j<table.size()/2;j++){ for(int j=0;j<table.size()/2;j++){
// buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]); // buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
cobj temp1 =compress(rhs._odata[so+table[2*j].second]); cobj temp1 =compress(rhs._odata[so+table[2*j].second]);
cobj temp2 =compress(rhs._odata[so+table[2*j+1].second]); cobj temp2 =compress(rhs._odata[so+table[2*j+1].second]);
exchange(pointers[0][j],pointers[1][j],temp1,temp2,type); cobj temp3;
cobj temp4;
exchange(temp3,temp4,temp1,temp2,type);
vstream(pointers[0][j],temp3);
vstream(pointers[1][j],temp4);
} }
t2=usecond();
return t2-t1;
} }
////////////////////////////////////////////////////// //////////////////////////////////////////////////////