From 7240d731843c2f86e75ecfda5de1eb680bf9baca Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 11 Oct 2016 22:21:07 +0100 Subject: [PATCH] Parallelise the x faces; fix the segv on KNL with comms --- lib/cartesian/Cartesian_base.h | 7 ++----- lib/cartesian/Cartesian_full.h | 7 +++++++ lib/cartesian/Cartesian_red_black.h | 25 ++++++++++++++++++++++++- lib/cshift/Cshift_common.h | 11 +++++++++-- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index b69c3435..8a24c87b 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -81,11 +81,8 @@ public: virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; - int CheckerBoardFromOindex (int Oindex){ - std::vector ocoor; - oCoorFromOindex(ocoor,Oindex); - return CheckerBoard(ocoor); - } + virtual int CheckerBoardFromOindex (int Oindex)=0; + virtual int CheckerBoardFromOindexTable (int Oindex)=0; ////////////////////////////////////////////////////////////////////////////////////////////// // Local layout calculations diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index 1f8f7514..14ab8b55 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -39,6 +39,13 @@ class GridCartesian: public GridBase { public: + virtual int CheckerBoardFromOindexTable (int Oindex) { + return 0; + } + virtual int CheckerBoardFromOindex (int Oindex) + { + return 0; + } virtual int CheckerBoarded(int dim){ return 0; } diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 9f5a1103..3c10403f 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -43,6 +43,7 @@ class GridRedBlackCartesian : public GridBase public: std::vector _checker_dim_mask; int _checker_dim; + std::vector _checker_board; virtual int CheckerBoarded(int dim){ if( dim==_checker_dim) return 1; @@ -72,12 +73,20 @@ public: // or by looping over x,y,z and multiply rather than computing checkerboard. if ( (source_cb+ocb)&1 ) { - return (shift)/2; } else { return (shift+1)/2; } } + virtual int CheckerBoardFromOindexTable (int Oindex) { + return _checker_board[Oindex]; + } + virtual int CheckerBoardFromOindex (int Oindex) + { + std::vector ocoor; + oCoorFromOindex(ocoor,Oindex); + return CheckerBoard(ocoor); + } virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){ if(dim != _checker_dim) return shift; @@ -185,6 +194,8 @@ public: _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; _istride[d] = _istride[d-1]*_simd_layout[d-1]; } + + } //////////////////////////////////////////////////////////////////////////////////////////// @@ -205,6 +216,18 @@ public: _slice_nblock[d]=nblock; block = block*_rdimensions[d]; } + + //////////////////////////////////////////////// + // Create a checkerboard lookup table + //////////////////////////////////////////////// + int rvol = 1; + for(int d=0;d<_ndimension;d++){ + rvol=rvol * _rdimensions[d]; + } + _checker_board.resize(rvol); + for(int osite=0;osite<_osites;osite++){ + _checker_board[osite] = CheckerBoardFromOindex (osite); + } }; protected: diff --git a/lib/cshift/Cshift_common.h b/lib/cshift/Cshift_common.h index b8e1284a..b0e9b798 100644 --- a/lib/cshift/Cshift_common.h +++ b/lib/cshift/Cshift_common.h @@ -1,3 +1,4 @@ + /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice &rhs,std::vector_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; + int stride=rhs._grid->_slice_stride[dimension]; if ( cbmask == 0x3 ) { PARALLEL_NESTED_LOOP2 @@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2 } } else { int bo=0; + std::vector > table; for(int n=0;nCheckerBoardFromOindex(o+b);// Could easily be a table lookup + int ocb=1<CheckerBoardFromOindexTable(o+b); if ( ocb &cbmask ) { - buffer[off+bo++]=compress(rhs._odata[so+o+b]); + table.push_back(std::pair (bo++,o+b)); } } } +PARALLEL_FOR_LOOP + for(int i=0;i