diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index 7e5436b1..7b1b2c1a 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -36,7 +36,7 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); #define LMAX (32) -#define LMIN (4) +#define LMIN (16) #define LINC (4) int64_t Nloop=2000; @@ -204,7 +204,7 @@ int main (int argc, char ** argv) std::cout< &rhs,commVector &buffer,int dimen int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane int e1=rhs._grid->_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; + int ent = 0; + + static std::vector > table; table.resize(e1*e2); int stride=rhs._grid->_slice_stride[dimension]; if ( cbmask == 0x3 ) { - parallel_for_nest2(int n=0;n(off+bo+b,so+o+b); } } } else { int bo=0; - std::vector > table; for(int n=0;nCheckerBoardFromOindex(o+b); if ( ocb &cbmask ) { - table.push_back(std::pair (bo++,o+b)); + table[ent++]=std::pair (off+bo++,so+o+b); } } } - parallel_for(int i=0;i void Scatter_plane_simple (Lattice &rhs,commVector_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; int stride=rhs._grid->_slice_stride[dimension]; - + + static std::vector > table; table.resize(e1*e2); + int ent =0; + if ( cbmask ==0x3 ) { - parallel_for_nest2(int n=0;n_slice_stride[dimension]; int bo =n*rhs._grid->_slice_block[dimension]; - rhs._odata[so+o+b]=buffer[bo+b]; + table[ent++] = std::pair(so+o+b,bo); } } + } else { - std::vector > table; int bo=0; for(int n=0;n_slice_stride[dimension]; int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { - table.push_back(std::pair (so+o+b,bo++)); + table[ent++]=std::pair (so+o+b,bo++); } } } - parallel_for(int i=0;i void Copy_plane(Lattice& lhs,const Lattice &rhs int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc int e2=rhs._grid->_slice_block[dimension]; int stride = rhs._grid->_slice_stride[dimension]; + static std::vector > table; table.resize(e1*e2); + int ent=0; + if(cbmask == 0x3 ){ - parallel_for_nest2(int n=0;n(lo+o,ro+o); } } } else { - parallel_for_nest2(int n=0;nCheckerBoardFromOindex(o); if ( ocb&cbmask ) { - //lhs._odata[lo+o]=rhs._odata[ro+o]; - vstream(lhs._odata[lo+o],rhs._odata[ro+o]); + table[ent++] = std::pair(lo+o,ro+o); } } } } - + + parallel_for(int i=0;i void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) @@ -269,16 +278,28 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice_slice_block [dimension]; int stride = rhs._grid->_slice_stride[dimension]; - parallel_for_nest2(int n=0;n > table; table.resize(e1*e2); + int ent=0; + double t_tab,t_perm; + if ( cbmask == 0x3 ) { + for(int n=0;n(lo+o+b,ro+o+b); + }} + } else { + for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb&cbmask ) { - permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type); - } + if ( ocb&cbmask ) table[ent++] = std::pair(lo+o+b,ro+o+b); + }} + } - }} + parallel_for(int i=0;i void Cshift_local(Lattice& ret,const Lattice &r sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + double t_local; + if ( sshift[0] == sshift[1] ) { Cshift_local(ret,rhs,dimension,shift,0x3); } else { @@ -299,7 +322,7 @@ template void Cshift_local(Lattice& ret,const Lattice &r } } -template Lattice Cshift_local(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) +template void Cshift_local(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) { GridBase *grid = rhs._grid; int fd = grid->_fdimensions[dimension]; @@ -325,11 +348,7 @@ template Lattice Cshift_local(Lattice &ret,const Lattice int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); int sx = (x+sshift)%rd; - - // FIXME : This must change where we have a - // Rotate slice. - // Document how this works ; why didn't I do this when I first wrote it... // wrap is whether sshift > rd. // num is sshift mod rd. // @@ -365,10 +384,8 @@ template Lattice Cshift_local(Lattice &ret,const Lattice if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist); else Copy_plane(ret,rhs,dimension,x,sx,cbmask); - } - return ret; } } #endif diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index 98713c14..1169d18f 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -256,7 +256,7 @@ public: _odata[ss]=r._odata[ss]; } } - + Lattice(Lattice&& r){ // move constructor _grid = r._grid; checkerboard = r.checkerboard; @@ -270,6 +270,7 @@ public: _odata =std::move(r._odata); return *this; } + inline Lattice & operator = (const Lattice & r){ _grid = r._grid; checkerboard = r.checkerboard;