1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 20:57:06 +01:00

Parallelise the x faces; fix the segv on KNL with comms

This commit is contained in:
paboyle
2016-10-11 22:21:07 +01:00
parent 42cd148f5e
commit 7240d73184
4 changed files with 42 additions and 8 deletions

View File

@ -1,3 +1,4 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) {
PARALLEL_NESTED_LOOP2
@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2
}
} else {
int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
if ( ocb &cbmask ) {
buffer[off+bo++]=compress(rhs._odata[so+o+b]);
table.push_back(std::pair<int,int> (bo++,o+b));
}
}
}
PARALLEL_FOR_LOOP
for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
}
}
}