mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Parallelise the x faces; fix the segv on KNL with comms
This commit is contained in:
parent
42cd148f5e
commit
7240d73184
@ -81,11 +81,8 @@ public:
|
|||||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
||||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
||||||
int CheckerBoardFromOindex (int Oindex){
|
virtual int CheckerBoardFromOindex (int Oindex)=0;
|
||||||
std::vector<int> ocoor;
|
virtual int CheckerBoardFromOindexTable (int Oindex)=0;
|
||||||
oCoorFromOindex(ocoor,Oindex);
|
|
||||||
return CheckerBoard(ocoor);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Local layout calculations
|
// Local layout calculations
|
||||||
|
@ -39,6 +39,13 @@ class GridCartesian: public GridBase {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,7 @@ class GridRedBlackCartesian : public GridBase
|
|||||||
public:
|
public:
|
||||||
std::vector<int> _checker_dim_mask;
|
std::vector<int> _checker_dim_mask;
|
||||||
int _checker_dim;
|
int _checker_dim;
|
||||||
|
std::vector<int> _checker_board;
|
||||||
|
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
if( dim==_checker_dim) return 1;
|
if( dim==_checker_dim) return 1;
|
||||||
@ -72,12 +73,20 @@ public:
|
|||||||
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
||||||
|
|
||||||
if ( (source_cb+ocb)&1 ) {
|
if ( (source_cb+ocb)&1 ) {
|
||||||
|
|
||||||
return (shift)/2;
|
return (shift)/2;
|
||||||
} else {
|
} else {
|
||||||
return (shift+1)/2;
|
return (shift+1)/2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return _checker_board[Oindex];
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
std::vector<int> ocoor;
|
||||||
|
oCoorFromOindex(ocoor,Oindex);
|
||||||
|
return CheckerBoard(ocoor);
|
||||||
|
}
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
||||||
|
|
||||||
if(dim != _checker_dim) return shift;
|
if(dim != _checker_dim) return shift;
|
||||||
@ -185,6 +194,8 @@ public:
|
|||||||
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
||||||
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -206,6 +217,18 @@ public:
|
|||||||
block = block*_rdimensions[d];
|
block = block*_rdimensions[d];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Create a checkerboard lookup table
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
int rvol = 1;
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
rvol=rvol * _rdimensions[d];
|
||||||
|
}
|
||||||
|
_checker_board.resize(rvol);
|
||||||
|
for(int osite=0;osite<_osites;osite++){
|
||||||
|
_checker_board[osite] = CheckerBoardFromOindex (osite);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
protected:
|
protected:
|
||||||
virtual int oIndex(std::vector<int> &coor)
|
virtual int oIndex(std::vector<int> &coor)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int bo=0;
|
int bo=0;
|
||||||
|
std::vector<std::pair<int,int> > table;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
|
||||||
if ( ocb &cbmask ) {
|
if ( ocb &cbmask ) {
|
||||||
buffer[off+bo++]=compress(rhs._odata[so+o+b]);
|
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int i=0;i<table.size();i++){
|
||||||
|
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user