mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Better openMP for Cshift
This commit is contained in:
parent
3530000ea6
commit
808066fdf1
@ -68,7 +68,8 @@ public:
|
|||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
#if 0
|
|
||||||
|
|
||||||
friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
||||||
{
|
{
|
||||||
Lattice<vobj> ret(rhs._grid);
|
Lattice<vobj> ret(rhs._grid);
|
||||||
@ -131,20 +132,24 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( permute_slice ) {
|
if ( permute_slice ) {
|
||||||
|
|
||||||
int internal=sizeof(vobj)/sizeof(vComplex);
|
int internal=sizeof(vobj)/sizeof(vComplex);
|
||||||
int num =rhs._grid->_slice_block[dimension]*internal;
|
int num =rhs._grid->_slice_block[dimension]*internal;
|
||||||
|
|
||||||
|
#pragma omp parallel for collapse(2)
|
||||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||||
vComplex *optr = (vComplex *)&ret._odata[o];
|
|
||||||
vComplex *iptr = (vComplex *)&rhs._odata[so];
|
|
||||||
for(int b=0;b<num;b++){
|
for(int b=0;b<num;b++){
|
||||||
permute(optr[b],iptr[b],permute_type);
|
vComplex *optr = (vComplex *)&ret._odata[o];
|
||||||
|
vComplex *iptr = (vComplex *)&rhs._odata[so];
|
||||||
|
permute(optr[b],iptr[b],permute_type);
|
||||||
}
|
}
|
||||||
o+=rhs._grid->_slice_stride[dimension];
|
o+=rhs._grid->_slice_stride[dimension];
|
||||||
so+=rhs._grid->_slice_stride[dimension];
|
so+=rhs._grid->_slice_stride[dimension];
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
#pragma omp parallel for collapse(2)
|
||||||
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
for(int n=0;n<rhs._grid->_slice_nblock[dimension];n++){
|
||||||
for(int i=0;i<rhs._grid->_slice_block[dimension];i++){
|
for(int i=0;i<rhs._grid->_slice_block[dimension];i++){
|
||||||
ret._odata[o+i]=rhs._odata[so+i];
|
ret._odata[o+i]=rhs._odata[so+i];
|
||||||
@ -157,78 +162,6 @@ public:
|
|||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
|
|
||||||
{
|
|
||||||
Lattice<vobj> ret(rhs._grid);
|
|
||||||
int sx,so,o;
|
|
||||||
|
|
||||||
ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift);
|
|
||||||
shift = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift);
|
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
|
||||||
int ld = rhs._grid->_dimensions[dimension];
|
|
||||||
|
|
||||||
// Map to always positive shift.
|
|
||||||
shift = (shift+ld)%ld;
|
|
||||||
|
|
||||||
// Work out whether to permute and the permute type
|
|
||||||
// ABCDEFGH -> AE BF CG DH permute
|
|
||||||
// Shift 0 AE BF CG DH 0 0 0 0 ABCDEFGH
|
|
||||||
// Shift 1 DH AE BF CG 1 0 0 0 HABCDEFG
|
|
||||||
// Shift 2 CG DH AE BF 1 1 0 0 GHABCDEF
|
|
||||||
// Shift 3 BF CG DH AE 1 1 1 0 FGHACBDE
|
|
||||||
// Shift 4 AE BF CG DH 1 1 1 1 EFGHABCD
|
|
||||||
// Shift 5 DH AE BF CG 0 1 1 1 DEFGHABC
|
|
||||||
// Shift 6 CG DH AE BF 0 0 1 1 CDEFGHAB
|
|
||||||
// Shift 7 BF CG DH AE 0 0 0 1 BCDEFGHA
|
|
||||||
int permute_dim =rhs._grid->_layout[dimension]>1 ;
|
|
||||||
int permute_type=0;
|
|
||||||
for(int d=0;d<dimension;d++)
|
|
||||||
if (rhs._grid->_layout[d]>1 ) permute_type++;
|
|
||||||
|
|
||||||
|
|
||||||
// loop over all work
|
|
||||||
int work =rd*rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
|
||||||
//#pragma omp parallel for
|
|
||||||
for(int ww=0;ww<work;ww++){
|
|
||||||
|
|
||||||
|
|
||||||
// can optimise this if know w moves congtiguously for a given thread.
|
|
||||||
// b=(b+1);
|
|
||||||
// if (b==_slice_block) {b=0; n=n+1;}
|
|
||||||
// if (n==_slice_nblock) { n=0; x=x+1}
|
|
||||||
//
|
|
||||||
// Perhaps a five cycle iterator, or so.
|
|
||||||
int w=ww;
|
|
||||||
int b = w%rhs._grid->_slice_block[dimension] ; w=w/rhs._grid->_slice_block[dimension];
|
|
||||||
int n = w%rhs._grid->_slice_nblock[dimension]; w=w/rhs._grid->_slice_nblock[dimension];
|
|
||||||
int x = w;
|
|
||||||
|
|
||||||
sx = (x-shift+ld)%rd;
|
|
||||||
o = x*rhs._grid->_ostride[dimension]+n*rhs._grid->_slice_stride[dimension]; // common sub expression alert.
|
|
||||||
so =sx*rhs._grid->_ostride[dimension]+n*rhs._grid->_slice_stride[dimension];
|
|
||||||
|
|
||||||
int permute_slice=0;
|
|
||||||
if ( permute_dim ) {
|
|
||||||
permute_slice = shift/rd;
|
|
||||||
if ( x<shift%rd ) permute_slice = 1-permute_slice;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( permute_slice ) {
|
|
||||||
|
|
||||||
int internal=sizeof(vobj)/sizeof(vComplex);
|
|
||||||
vComplex *optr = (vComplex *)&ret._odata[o+b];
|
|
||||||
vComplex *iptr = (vComplex *)&rhs._odata[so+b];
|
|
||||||
for(int i=0;i<internal;i++){
|
|
||||||
permute(optr[i],iptr[i],permute_type);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret._odata[o+b]=rhs._odata[so+b];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template<class sobj>
|
template<class sobj>
|
||||||
inline Lattice<vobj> & operator = (const sobj & r){
|
inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
|
@ -2,10 +2,10 @@
|
|||||||
/* Grid_config.h.in. Generated from configure.ac by autoheader. */
|
/* Grid_config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
/* AVX */
|
/* AVX */
|
||||||
/* #undef AVX1 */
|
#define AVX1 1
|
||||||
|
|
||||||
/* AVX2 */
|
/* AVX2 */
|
||||||
#define AVX2 1
|
/* #undef AVX2 */
|
||||||
|
|
||||||
/* AVX512 */
|
/* AVX512 */
|
||||||
/* #undef AVX512 */
|
/* #undef AVX512 */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user