1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Wilson perf improvements with Gauge prefetching

This commit is contained in:
Peter Boyle
2015-05-06 06:37:21 +01:00
parent cdd5cdeda2
commit 52403d587c
14 changed files with 2819 additions and 3386 deletions

View File

@ -106,7 +106,9 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
for(int sss=0;sss<grid->oSites();sss++){
int ss = sss;
//int ss = Stencil._LebesgueReorder[sss];
int ssu= sss;
//int ss = 0;
//int ss = Stencil._LebesgueReorder[sss];
// Xp
offset = Stencil._offsets [Xp][ss];
@ -123,7 +125,8 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Xp),&chi());
mult(&Uchi(),&Umu._odata[ssu](Xp),&chi());
prefetch(Umu._odata[ssu](Yp));
spReconXp(result,Uchi);
// Yp
@ -141,7 +144,8 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Yp),&chi());
mult(&Uchi(),&Umu._odata[ssu](Yp),&chi());
prefetch(Umu._odata[ssu](Zp));
accumReconYp(result,Uchi);
// Zp
@ -159,7 +163,8 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Zp),&chi());
mult(&Uchi(),&Umu._odata[ssu](Zp),&chi());
prefetch(Umu._odata[ssu](Tp));
accumReconZp(result,Uchi);
// Tp
@ -177,7 +182,8 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Tp),&chi());
mult(&Uchi(),&Umu._odata[ssu](Tp),&chi());
prefetch(Umu._odata[ssu](Xm));
accumReconTp(result,Uchi);
// Xm
@ -195,7 +201,8 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Xm),&chi());
mult(&Uchi(),&Umu._odata[ssu](Xm),&chi());
prefetch(Umu._odata[ssu](Ym));
accumReconXm(result,Uchi);
@ -214,7 +221,8 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Ym),&chi());
mult(&Uchi(),&Umu._odata[ssu](Ym),&chi());
prefetch(Umu._odata[ssu](Zm));
accumReconYm(result,Uchi);
// Zm
@ -232,7 +240,8 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Zm),&chi());
mult(&Uchi(),&Umu._odata[ssu](Zm),&chi());
prefetch(Umu._odata[ssu](Tm));
accumReconZm(result,Uchi);
// Tm
@ -250,7 +259,7 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
} else {
chi=comm_buf[offset];
}
mult(&Uchi(),&Umu._odata[ss](Tm),&chi());
mult(&Uchi(),&Umu._odata[ssu](Tm),&chi());
accumReconTm(result,Uchi);
vstream(out._odata[ss],result);