#ifndef _GRID_FAKE_H_ #define _GRID_FAKE_H_ friend Lattice Cshift(Lattice &rhs,int dimension,int shift) { typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_type scalar_type; const int Nsimd = vector_type::Nsimd(); Lattice ret(rhs._grid); int fd = rhs._grid->_fdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension]; //int ld = rhs._grid->_ldimensions[dimension]; //int gd = rhs._grid->_gdimensions[dimension]; // Map to always positive shift modulo global full dimension. shift = (shift+fd)%fd; ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift); // the permute type int permute_dim =rhs._grid->_simd_layout[dimension]>1 ; int permute_type=0; for(int d=0;d_simd_layout[d]>1 ) permute_type++; } /////////////////////////////////////////////// // Move via a fake comms buffer // Simd direction uses an extract/merge pair /////////////////////////////////////////////// int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; int words = sizeof(vobj)/sizeof(vector_type); std::vector > comm_buf(buffer_size); std::vector > comm_buf_extract(Nsimd,std::vector(buffer_size*words) ); std::vector pointers(Nsimd); for(int x=0;x_ostride[dimension]; // base offset for result if ( permute_dim ) { int o = 0; // relative offset to base for(int n=0;n_slice_nblock[dimension];n++){ for(int b=0;b_slice_block[dimension];b++){ int sshift = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,o+b); int sx = (x+sshift)%rd; // base offset for source int so = sx*rhs._grid->_ostride[dimension]; int permute_slice=0; int wrap = sshift/rd; int num = sshift%rd; if ( x< rd-num ) permute_slice=wrap; else permute_slice = 1-wrap; if ( permute_slice ) { extract(rhs._odata[so+o+b],pointers); } else { ret._odata[ro+o+b]=rhs._odata[so+o+b]; } } o +=rhs._grid->_slice_stride[dimension]; } for(int i=0;i_slice_nblock[dimension];n++){ for(int b=0;b_slice_block[dimension];b++){ int sshift = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,o+b); int sx = (x+sshift)%rd; // base offset for source int so = sx*rhs._grid->_ostride[dimension]; int permute_slice=0; int wrap = sshift/rd; int num = sshift%rd; if ( x< rd-num ) permute_slice=wrap; else permute_slice = 1-wrap; if ( permute_slice ) { merge(ret._odata[ro+o+b],pointers); } } o +=rhs._grid->_slice_stride[dimension]; } } else { int co; // comm offset int o; co=0; o=0; for(int n=0;n_slice_nblock[dimension];n++){ for(int b=0;b_slice_block[dimension];b++){ // This call in inner loop is annoying but necessary for dimension=0 // in the case of RedBlack grids. Could optimise away with // alternate code paths for all other cases. int sshift = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,o+b); int sx = (x+sshift)%rd; int so = sx*rhs._grid->_ostride[dimension]; comm_buf[co++]=rhs._odata[so+o+b]; } o +=rhs._grid->_slice_stride[dimension]; } // Step through a copy into a comms buffer and pull back in. // Genuine fake implementation could calculate if loops back co=0; o=0; for(int n=0;n_slice_nblock[dimension];n++){ for(int b=0;b_slice_block[dimension];b++){ ret._odata[ro+o+b]=comm_buf[co++]; } o +=rhs._grid->_slice_stride[dimension]; } } } return ret; } /* friend Lattice Cshift(Lattice &rhs,int dimension,int shift) { Lattice ret(rhs._grid); int rd = rhs._grid->_rdimensions[dimension]; int ld = rhs._grid->_ldimensions[dimension]; int gd = rhs._grid->_gdimensions[dimension]; // Map to always positive shift. shift = (shift+gd)%gd; ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift); shift = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift); // Work out whether to permute and the permute type // ABCDEFGH -> AE BF CG DH permute // Shift 0 AE BF CG DH 0 0 0 0 ABCDEFGH // Shift 1 BF CG DH AE 0 0 0 1 BCDEFGHA // Shift 2 CG DH AE BF 0 0 1 1 CDEFGHAB // Shift 3 DH AE BF CG 0 1 1 1 DEFGHABC // Shift 4 AE BF CG DH 1 1 1 1 EFGHABCD // Shift 5 BF CG DH AE 1 1 1 0 FGHACBDE // Shift 6 CG DH AE BF 1 1 0 0 GHABCDEF // Shift 7 DH AE BF CG 1 0 0 0 HABCDEFG int permute_dim =rhs._grid->_simd_layout[dimension]>1 ; int permute_type=0; for(int d=0;d_simd_layout[d]>1 ) permute_type++; // loop over all work int work =rd*rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; // Packed gather sequence is clean int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; typedef typename vobj::scalar_type scalar_t; typedef typename vobj::vector_type vector_t; const int ns=sizeof(vobj)/sizeof(scalar_t); const int nv=sizeof(vobj)/sizeof(vector_t); std::vector > comm_buf(buffer_size); for(int x=0;x_ostride[dimension]; int so =sx*rhs._grid->_ostride[dimension]; int permute_slice=0; if ( permute_dim ) { permute_slice = shift/rd; if ( x_slice_nblock[dimension];n++){ vector_t *optr = (vector_t *)&ret._odata[o]; vector_t *iptr = (vector_t *)&rhs._odata[so]; int skew = buffer_size*ns/2; for(int b=0;b_slice_block[dimension];b++){ for(int n=0;n_slice_stride[dimension]; // bo+=rhs._grid->_slice_stride[dimension]*ns/2; } } else { int bo=0; for(int n=0;n_slice_nblock[dimension];n++){ for(int i=0;i_slice_block[dimension];i++){ comm_buf[bo++] =rhs._odata[so+i]; } so+=rhs._grid->_slice_stride[dimension]; } bo=0; for(int n=0;n_slice_nblock[dimension];n++){ for(int i=0;i_slice_block[dimension];i++){ ret._odata[o+i]=comm_buf[bo++]; } o+=rhs._grid->_slice_stride[dimension]; } } } return ret; }; */ #endif