1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

clean up HISQSmear with decltypes

This commit is contained in:
david clarke 2023-10-12 12:41:06 -06:00
parent 36600899e2
commit bf4369f72d

View File

@ -40,6 +40,8 @@ directory
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
// TODO: find a way to fold this into the stencil header. need to access grid to get
// Nd, since you don't want to inherit from QCD.h
/*! @brief append arbitrary shift path to shifts */ /*! @brief append arbitrary shift path to shifts */
template<typename... Args> template<typename... Args>
void appendShift(std::vector<Coordinate>& shifts, int dir, Args... args) { void appendShift(std::vector<Coordinate>& shifts, int dir, Args... args) {
@ -51,16 +53,6 @@ void appendShift(std::vector<Coordinate>& shifts, int dir, Args... args) {
} }
// This is to optimize the SIMD (will also need to be in the class, at least for now)
template<class vobj> void gpermute(vobj & inout,int perm) {
vobj tmp=inout;
if (perm & 0x1) {permute(inout,tmp,0); tmp=inout;}
if (perm & 0x2) {permute(inout,tmp,1); tmp=inout;}
if (perm & 0x4) {permute(inout,tmp,2); tmp=inout;}
if (perm & 0x8) {permute(inout,tmp,3); tmp=inout;}
}
/*! @brief figure out the stencil index from mu and nu */ /*! @brief figure out the stencil index from mu and nu */
inline int stencilIndex(int mu, int nu) { inline int stencilIndex(int mu, int nu) {
// Nshifts depends on how you built the stencil // Nshifts depends on how you built the stencil
@ -163,6 +155,12 @@ public:
Ughost_5linkA=Zero(); Ughost_5linkA=Zero();
Ughost_5linkB=Zero(); Ughost_5linkB=Zero();
// We infer some types that will be needed in the calculation.
typedef decltype(gStencil.GetEntry(0,0)) stencilElement;
typedef decltype(coalescedReadGeneralPermute(U_v[0](0),gStencil.GetEntry(0,0)->_permute,Nd)) U3matrix;
stencilElement SE0, SE1, SE2, SE3, SE4;
U3matrix U0, U1, U2, U3, U4, U5, W;
// 3-link // 3-link
for(int site=0;site<U_v.size();site++){ for(int site=0;site<U_v.size();site++){
for(int nu=0;nu<Nd;nu++) { for(int nu=0;nu<Nd;nu++) {
@ -171,25 +169,25 @@ public:
// The stencil gives us support points in the mu-nu plane that we will use to // The stencil gives us support points in the mu-nu plane that we will use to
// grab the links we need. // grab the links we need.
auto SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset; SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
auto SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset; SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
auto SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset; SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset;
auto SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
auto SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset; SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
// When you're deciding whether to take an adjoint, the question is: how is the // When you're deciding whether to take an adjoint, the question is: how is the
// stored link oriented compared to the one you want? If I imagine myself travelling // stored link oriented compared to the one you want? If I imagine myself travelling
// with the to-be-updated link, I have two possible, alternative 3-link paths I can // with the to-be-updated link, I have two possible, alternative 3-link paths I can
// take, one starting by going to the left, the other starting by going to the right. // take, one starting by going to the left, the other starting by going to the right.
auto U0 = U_v[x_p_mu ](nu); gpermute(U0,SE0->_permute); U0 = coalescedReadGeneralPermute(U_v[x_p_mu ](nu),SE0->_permute,Nd);
auto U1 = U_v[x_p_nu ](mu); gpermute(U1,SE1->_permute); U1 = coalescedReadGeneralPermute(U_v[x_p_nu ](mu),SE1->_permute,Nd);
auto U2 = U_v[x ](nu); gpermute(U2,SE2->_permute); U2 = coalescedReadGeneralPermute(U_v[x ](nu),SE2->_permute,Nd);
auto U3 = U_v[x_p_mu_m_nu](nu); gpermute(U3,SE3->_permute); U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd);
auto U4 = U_v[x_m_nu ](mu); gpermute(U4,SE4->_permute); U4 = coalescedReadGeneralPermute(U_v[x_m_nu ](mu),SE4->_permute,Nd);
auto U5 = U_v[x_m_nu ](nu); gpermute(U5,SE4->_permute); U5 = coalescedReadGeneralPermute(U_v[x_m_nu ](nu),SE4->_permute,Nd);
// "left" "right" // "left" "right"
auto W = U2*U1*adj(U0) + adj(U5)*U4*U3; W = U2*U1*adj(U0) + adj(U5)*U4*U3;
U_3link_v[site](nu) = W; U_3link_v[site](nu) = W;
@ -197,7 +195,6 @@ public:
} }
} }
// 5-link // 5-link
for(int site=0;site<U_v.size();site++){ for(int site=0;site<U_v.size();site++){
int sigmaIndex = 0; int sigmaIndex = 0;
@ -207,22 +204,22 @@ public:
for(int rho=0;rho<Nd;rho++) { for(int rho=0;rho<Nd;rho++) {
if (rho == mu || rho == nu) continue; if (rho == mu || rho == nu) continue;
auto SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset; SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
auto SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset; SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
auto SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset; SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset;
auto SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
auto SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset; SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
// gpermutes will be replaced with single line of code, combines load and permute // gpermutes will be replaced with single line of code, combines load and permute
// into one step. still in pull request stage // into one step. still in pull request stage
auto U0 = U_v[x_p_mu ](nu) ; gpermute(U0,SE0->_permute); U0 = coalescedReadGeneralPermute( U_v[x_p_mu ](nu ),SE0->_permute,Nd);
auto U1 = U_3link_v[x_p_nu ](rho); gpermute(U1,SE1->_permute); U1 = coalescedReadGeneralPermute(U_3link_v[x_p_nu ](rho),SE1->_permute,Nd);
auto U2 = U_v[x ](nu) ; gpermute(U2,SE2->_permute); U2 = coalescedReadGeneralPermute( U_v[x ](nu ),SE2->_permute,Nd);
auto U3 = U_v[x_p_mu_m_nu](nu) ; gpermute(U3,SE3->_permute); U3 = coalescedReadGeneralPermute( U_v[x_p_mu_m_nu](nu ),SE3->_permute,Nd);
auto U4 = U_3link_v[x_m_nu ](rho); gpermute(U4,SE4->_permute); U4 = coalescedReadGeneralPermute(U_3link_v[x_m_nu ](rho),SE4->_permute,Nd);
auto U5 = U_v[x_m_nu ](nu) ; gpermute(U5,SE4->_permute); U5 = coalescedReadGeneralPermute( U_v[x_m_nu ](nu ),SE4->_permute,Nd);
auto W = U2*U1*adj(U0) + adj(U5)*U4*U3; W = U2*U1*adj(U0) + adj(U5)*U4*U3;
if(sigmaIndex<3) { if(sigmaIndex<3) {
U_5linkA_v[site](rho) = W; U_5linkA_v[site](rho) = W;
@ -246,33 +243,29 @@ public:
for(int rho=0;rho<Nd;rho++) { for(int rho=0;rho<Nd;rho++) {
if (rho == mu || rho == nu) continue; if (rho == mu || rho == nu) continue;
auto SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset; SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
auto SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset; SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
auto SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset; SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset;
auto SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset; SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
auto SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset; SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
auto U0 = U_v[x_p_mu ](nu) ; gpermute(U0,SE0->_permute); U0 = coalescedReadGeneralPermute(U_v[x_p_mu](nu),SE0->_permute,Nd);
// decltype, or auto U1 = { ? ... }
auto U1 = U0;
if(sigmaIndex<3) { if(sigmaIndex<3) {
U1 = U_5linkB_v[x_p_nu](rho); gpermute(U1,SE1->_permute); U1 = coalescedReadGeneralPermute(U_5linkB_v[x_p_nu](rho),SE1->_permute,Nd);
} else { } else {
U1 = U_5linkA_v[x_p_nu](rho); gpermute(U1,SE1->_permute); U1 = coalescedReadGeneralPermute(U_5linkA_v[x_p_nu](rho),SE1->_permute,Nd);
} }
auto U2 = U_v[x ](nu) ; gpermute(U2,SE2->_permute); U2 = coalescedReadGeneralPermute(U_v[x](nu),SE2->_permute,Nd);
auto U3 = U_v[x_p_mu_m_nu](nu) ; gpermute(U3,SE3->_permute); U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd);
auto U4 = U0;
if(sigmaIndex<3) { if(sigmaIndex<3) {
U4 = U_5linkB_v[x_m_nu](rho); gpermute(U4,SE4->_permute); U4 = coalescedReadGeneralPermute(U_5linkB_v[x_m_nu](rho),SE4->_permute,Nd);
} else { } else {
U4 = U_5linkA_v[x_m_nu](rho); gpermute(U4,SE4->_permute); U4 = coalescedReadGeneralPermute(U_5linkA_v[x_m_nu](rho),SE4->_permute,Nd);
} }
auto U5 = U_v[x_m_nu ](nu) ; gpermute(U5,SE4->_permute); U5 = coalescedReadGeneralPermute(U_v[x_m_nu](nu),SE4->_permute,Nd);
auto W = U2*U1*adj(U0) + adj(U5)*U4*U3; W = U2*U1*adj(U0) + adj(U5)*U4*U3;
// std::vector<LatticeColorMatrix>(3) ?
U_fat_v[site](mu) = U_fat_v[site](mu) + lt.c_7*W; U_fat_v[site](mu) = U_fat_v[site](mu) + lt.c_7*W;
sigmaIndex++; sigmaIndex++;