1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Updates for simd in one dir

This commit is contained in:
paboyle 2016-04-19 15:34:10 -07:00
parent c8a93d6a93
commit b27bac4669

View File

@ -321,6 +321,9 @@ PARALLEL_FOR_LOOP
int simd_layout = _grid->_simd_layout[dimension]; int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ; int comm_dim = _grid->_processors[dimension] >1 ;
int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim);
int rotate_dim = _grid->_simd_layout[dimension]>2;
assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported
int sshift[2]; int sshift[2];
@ -368,7 +371,8 @@ PARALLEL_FOR_LOOP
int rd = _grid->_rdimensions[dimension]; int rd = _grid->_rdimensions[dimension];
int ld = _grid->_ldimensions[dimension]; int ld = _grid->_ldimensions[dimension];
int gd = _grid->_gdimensions[dimension]; int gd = _grid->_gdimensions[dimension];
int ly = _grid->_simd_layout[dimension];
// Map to always positive shift modulo global full dimension. // Map to always positive shift modulo global full dimension.
int shift = (shiftpm+fd)%fd; int shift = (shiftpm+fd)%fd;
@ -398,7 +402,7 @@ PARALLEL_FOR_LOOP
int wrap = sshift/rd; int wrap = sshift/rd;
int num = sshift%rd; int num = sshift%rd;
if ( x< rd-num ) permute_slice=wrap; if ( x< rd-num ) permute_slice=wrap;
else permute_slice = 1-wrap; else permute_slice = (wrap+1)%ly;
} }
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
@ -418,7 +422,6 @@ PARALLEL_FOR_LOOP
int simd_layout = _grid->_simd_layout[dimension]; int simd_layout = _grid->_simd_layout[dimension];
int comm_dim = _grid->_processors[dimension] >1 ; int comm_dim = _grid->_processors[dimension] >1 ;
// assert(simd_layout==1); // Why?
assert(comm_dim==1); assert(comm_dim==1);
int shift = (shiftpm + fd) %fd; int shift = (shiftpm + fd) %fd;
assert(shift>=0); assert(shift>=0);
@ -529,7 +532,7 @@ PARALLEL_FOR_LOOP
_entries[point][lo+o+b]._around_the_world=wrap; _entries[point][lo+o+b]._around_the_world=wrap;
} }
} }
o +=_grid->_slice_stride[dimension]; o +=_grid->_slice_stride[dimension];
} }
@ -749,6 +752,7 @@ PARALLEL_FOR_LOOP
int comm_dim = _grid->_processors[dimension] >1 ; int comm_dim = _grid->_processors[dimension] >1 ;
assert(comm_dim==1); assert(comm_dim==1);
// This will not work with a rotate dim
assert(simd_layout==2); assert(simd_layout==2);
assert(shift>=0); assert(shift>=0);
assert(shift<fd); assert(shift<fd);
@ -793,7 +797,9 @@ PARALLEL_FOR_LOOP
gathermtime+=usecond(); gathermtime+=usecond();
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
// FIXME
// This logic is hard coded to simd_layout ==2 and not allowing >2
// std::cout << "GatherSimd : lane 1st elem " << i << u_simd_send_buf[i ][u_comm_offset]<<std::endl; // std::cout << "GatherSimd : lane 1st elem " << i << u_simd_send_buf[i ][u_comm_offset]<<std::endl;
int inner_bit = (Nsimd>>(permute_type+1)); int inner_bit = (Nsimd>>(permute_type+1));