From f4033ad8cb32c34debe1623d84eab7c0d79116d5 Mon Sep 17 00:00:00 2001 From: ferben Date: Mon, 27 Apr 2020 17:46:14 +0100 Subject: [PATCH 1/3] baryon speedup by a factor 2 --- Grid/qcd/utils/BaryonUtils.h | 416 ++++++++++++++++++++++++++++++++++- 1 file changed, 407 insertions(+), 9 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index d65b9176..18d6f84b 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -46,7 +46,8 @@ public: typedef typename SpinMatrixField::vector_object sobj; static const int epsilon[6][3] ; - static const Complex epsilon_sgn[6]; + //static const Complex epsilon_sgn[6]; + static const double epsilon_sgn[6]; private: template @@ -60,6 +61,62 @@ public: const int parity, const int * wick_contractions, robj &result); + template + static void baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + template + static void baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + const int * wick_contractions, + robj &result); + template + static inline void baryon_site_template(unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + template + static inline void baryon_site_template(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + + template + struct BaryonSiteHelper + { + template + static inline void function(const unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + }; public: static void ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, @@ -151,14 +208,18 @@ public: template const int BaryonUtils::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; -template +/*template const Complex BaryonUtils::epsilon_sgn[6] = {Complex(1), Complex(1), Complex(1), Complex(-1), Complex(-1), Complex(-1)}; +*/ +template +const double BaryonUtils::epsilon_sgn[6] = {1.0,1.0,1.0,-1.0,-1.0,-1.0}; +//This is the old version template template void BaryonUtils::baryon_site(const mobj &D1, @@ -188,13 +249,15 @@ void BaryonUtils::baryon_site(const mobj &D1, int a_right = epsilon[ie_right][0]; //a' int b_right = epsilon[ie_right][1]; //b' int c_right = epsilon[ie_right][2]; //c' + //complex ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; //This is the \delta_{456}^{123} part if (wick_contraction[0]){ auto D2g = D2 * GammaB_left; for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right +template +void BaryonUtils::baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + + Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) + + auto gD1a = GammaA_left * GammaA_right * D1; + auto gD1b = GammaA_left * g4 * GammaA_right * D1; + auto pD1 = 0.5* (gD1a + (double)parity * gD1b); + auto gD3 = GammaB_right * D3; + + auto D2g = D2 * GammaB_left; + auto pD1g = pD1 * GammaB_left; + auto gD3g = gD3 * GammaB_left; + + for (int ie_left=0; ie_left < 6 ; ie_left++){ + int a_left = epsilon[ie_left][0]; //a + int b_left = epsilon[ie_left][1]; //b + int c_left = epsilon[ie_left][2]; //c + for (int ie_right=0; ie_right < 6 ; ie_right++){ + int a_right = epsilon[ie_right][0]; //a' + int b_right = epsilon[ie_right][1]; //b' + int c_right = epsilon[ie_right][2]; //c' + double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + //All parts together + for (int gamma_left=0; gamma_left( D1, D2, D3, GA_l, GB_l, GA_r, GB_r, parity, result );\ +} + +template +template +void BaryonUtils::baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + const int * wick_contraction, + robj &result) +{ +BARYON_SITE( 0 , 0 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); + +} + + +template +template +inline void BaryonUtils::baryon_site_template(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + constexpr bool wick_contraction_0 = ((mask & (1 << 5)) >> 5); + constexpr bool wick_contraction_1 = ((mask & (1 << 4)) >> 4); + constexpr bool wick_contraction_2 = ((mask & (1 << 3)) >> 3); + constexpr bool wick_contraction_3 = ((mask & (1 << 2)) >> 2); + constexpr bool wick_contraction_4 = ((mask & (1 << 1)) >> 1); + constexpr bool wick_contraction_5 = ((mask & (1 << 0)) >> 0); + + Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) + + auto gD1a = GammaA_left * GammaA_right * D1; + auto gD1b = GammaA_left * g4 * GammaA_right * D1; + auto pD1 = 0.5* (gD1a + (double)parity * gD1b); + auto gD3 = GammaB_right * D3; + + auto D2g = D2 * GammaB_left; + auto pD1g = pD1 * GammaB_left; + auto gD3g = gD3 * GammaB_left; + + for (int ie_left=0; ie_left < 6 ; ie_left++){ + int a_left = epsilon[ie_left][0]; //a + int b_left = epsilon[ie_left][1]; //b + int c_left = epsilon[ie_left][2]; //c + for (int ie_right=0; ie_right < 6 ; ie_right++){ + int a_right = epsilon[ie_right][0]; //a' + int b_right = epsilon[ie_right][1]; //b' + int c_right = epsilon[ie_right][2]; //c' + double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + //All parts together + for (int gamma_left=0; gamma_left +template +template +inline void BaryonUtils::BaryonSiteHelper::function(const unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + if (mask == maxMask) + { + baryon_site_template(D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); + } + else + { + BaryonSiteHelper<(maxMask>0) ? maxMask-1 : 0>::function(mask, D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); + } +} + +// top-level function +template +template +inline void BaryonUtils::baryon_site_template(const unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + BaryonSiteHelper<63>::function(mask, D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); +} + + template void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, @@ -259,6 +593,10 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const int parity, ComplexField &baryon_corr) { + const std::chrono::system_clock::time_point start{ std::chrono::system_clock::now() }; + std::time_t now = std::chrono::system_clock::to_time_t( start ); + std::cout << "Setup start " << std::ctime( &now ); + std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; @@ -278,6 +616,16 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, auto v2 = q2_left.View(); auto v3 = q3_left.View(); + const std::chrono::system_clock::time_point stop{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop ); + const std::chrono::duration duration_seconds = stop - start; + const double seconds{ ( duration_seconds.count() ) }; + std::cout << "Setup stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds << " seconds." << std::endl; + + const std::chrono::system_clock::time_point start2{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( start2 ); + std::cout << "Normal Loop start " << std::ctime( &now ); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ //for(int ss=0; ss < grid->oSites(); ss++){ @@ -290,6 +638,56 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); vbaryon_corr[ss] = result; } );//end loop over lattice sites + const std::chrono::system_clock::time_point stop2{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop2 ); + const std::chrono::duration duration_seconds2 = stop2 - start2; + const double seconds2{ ( duration_seconds2.count() ) }; + std::cout << "Normal Loop stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds2 << " seconds." << std::endl; + const std::chrono::system_clock::time_point start4{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( start4 ); + std::cout << "Opt-macro Loop start " << std::ctime( &now ); + // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + thread_for(ss,grid->oSites(),{ + //for(int ss=0; ss < grid->oSites(); ss++){ + + auto D1 = v1[ss]; + auto D2 = v2[ss]; + auto D3 = v3[ss]; + + vobj result=Zero(); + baryon_site_macro(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + vbaryon_corr[ss] = result; + } );//end loop over lattice sites + const std::chrono::system_clock::time_point stop4{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop4 ); + const std::chrono::duration duration_seconds4 = stop4 - start4; + const double seconds4{ ( duration_seconds4.count() ) }; + std::cout << "Opt-macro Loop stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds4 << " seconds." << std::endl; + const std::chrono::system_clock::time_point start3{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( start3 ); + int wick_id=32*wick_contraction[0]+16*wick_contraction[1]+8*wick_contraction[2]+4*wick_contraction[3]+2*wick_contraction[4]+wick_contraction[5]; + std::cout << "Opt-template Loop start " << std::ctime( &now ); + // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + thread_for(ss,grid->oSites(),{ + //for(int ss=0; ss < grid->oSites(); ss++){ + + auto D1 = v1[ss]; + auto D2 = v2[ss]; + auto D3 = v3[ss]; + + vobj result=Zero(); + baryon_site_template(wick_id,D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,result); + vbaryon_corr[ss] = result; + } );//end loop over lattice sites + const std::chrono::system_clock::time_point stop3{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop3 ); + const std::chrono::duration duration_seconds3 = stop3 - start3; + const double seconds3{ ( duration_seconds3.count() ) }; + std::cout << "Opt-template Loop stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds3 << " seconds." << std::endl; + } template template @@ -318,7 +716,7 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); } /*********************************************************************** From 6240e02619be0b0c5bda84173c0dc6cba62aef84 Mon Sep 17 00:00:00 2001 From: ferben Date: Mon, 27 Apr 2020 18:50:53 +0100 Subject: [PATCH 2/3] added assertion to avoid potential infinite loop --- Grid/qcd/utils/BaryonUtils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 18d6f84b..a392f223 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -553,6 +553,7 @@ inline void BaryonUtils::BaryonSiteHelper::function(const unsign const int parity, robj &result) { + assert(mask <= maxMask); if (mask == maxMask) { baryon_site_template(D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); From 56e2f7d088aa36af4990742822c8cd47363d2391 Mon Sep 17 00:00:00 2001 From: ferben Date: Thu, 7 May 2020 10:03:45 +0100 Subject: [PATCH 3/3] deleted test routines. cleaned up fast version. assert Ns=4,Nc=3. --- Grid/qcd/utils/BaryonUtils.h | 414 ++++------------------------------- 1 file changed, 40 insertions(+), 374 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index a392f223..241395c3 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -46,44 +46,11 @@ public: typedef typename SpinMatrixField::vector_object sobj; static const int epsilon[6][3] ; - //static const Complex epsilon_sgn[6]; static const double epsilon_sgn[6]; private: template - static void baryon_site(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, - const int parity, - const int * wick_contractions, - robj &result); - template - static void baryon_site_macro(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, - const int parity, - robj &result); - template - static void baryon_site_macro(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, - const int parity, - const int * wick_contractions, - robj &result); - template - static inline void baryon_site_template(unsigned int mask, const mobj &D1, + static inline void baryon_site(unsigned int mask, const mobj &D1, const mobj &D2, const mobj &D3, const Gamma GammaA_left, @@ -93,7 +60,7 @@ public: const int parity, robj &result); template - static inline void baryon_site_template(const mobj &D1, + static inline void baryon_site(const mobj &D1, const mobj &D2, const mobj &D3, const Gamma GammaA_left, @@ -208,266 +175,12 @@ public: template const int BaryonUtils::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; -/*template -const Complex BaryonUtils::epsilon_sgn[6] = {Complex(1), - Complex(1), - Complex(1), - Complex(-1), - Complex(-1), - Complex(-1)}; -*/ template const double BaryonUtils::epsilon_sgn[6] = {1.0,1.0,1.0,-1.0,-1.0,-1.0}; -//This is the old version -template -template -void BaryonUtils::baryon_site(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, - const int parity, - const int * wick_contraction, - robj &result) -{ - - Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) - - auto gD1a = GammaA_left * GammaA_right * D1; - auto gD1b = GammaA_left * g4 * GammaA_right * D1; - auto pD1 = 0.5* (gD1a + (double)parity * gD1b); - auto gD3 = GammaB_right * D3; - - for (int ie_left=0; ie_left < 6 ; ie_left++){ - int a_left = epsilon[ie_left][0]; //a - int b_left = epsilon[ie_left][1]; //b - int c_left = epsilon[ie_left][2]; //c - for (int ie_right=0; ie_right < 6 ; ie_right++){ - int a_right = epsilon[ie_right][0]; //a' - int b_right = epsilon[ie_right][1]; //b' - int c_right = epsilon[ie_right][2]; //c' - //complex ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; - double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; - //This is the \delta_{456}^{123} part - if (wick_contraction[0]){ - auto D2g = D2 * GammaB_left; - for (int alpha_right=0; alpha_right -template -void BaryonUtils::baryon_site_macro(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, - const int parity, - robj &result) -{ - - Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) - - auto gD1a = GammaA_left * GammaA_right * D1; - auto gD1b = GammaA_left * g4 * GammaA_right * D1; - auto pD1 = 0.5* (gD1a + (double)parity * gD1b); - auto gD3 = GammaB_right * D3; - - auto D2g = D2 * GammaB_left; - auto pD1g = pD1 * GammaB_left; - auto gD3g = gD3 * GammaB_left; - - for (int ie_left=0; ie_left < 6 ; ie_left++){ - int a_left = epsilon[ie_left][0]; //a - int b_left = epsilon[ie_left][1]; //b - int c_left = epsilon[ie_left][2]; //c - for (int ie_right=0; ie_right < 6 ; ie_right++){ - int a_right = epsilon[ie_right][0]; //a' - int b_right = epsilon[ie_right][1]; //b' - int c_right = epsilon[ie_right][2]; //c' - double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; - //All parts together - for (int gamma_left=0; gamma_left( D1, D2, D3, GA_l, GB_l, GA_r, GB_r, parity, result );\ -} - -template -template -void BaryonUtils::baryon_site_macro(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, - const int parity, - const int * wick_contraction, - robj &result) -{ -BARYON_SITE( 0 , 0 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 0 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 0 , 1 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 0 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); -BARYON_SITE( 1 , 1 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); - -} - - template template -inline void BaryonUtils::baryon_site_template(const mobj &D1, +inline void BaryonUtils::baryon_site(const mobj &D1, const mobj &D2, const mobj &D3, const Gamma GammaA_left, @@ -556,7 +269,7 @@ inline void BaryonUtils::BaryonSiteHelper::function(const unsign assert(mask <= maxMask); if (mask == maxMask) { - baryon_site_template(D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); + baryon_site(D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); } else { @@ -567,7 +280,7 @@ inline void BaryonUtils::BaryonSiteHelper::function(const unsign // top-level function template template -inline void BaryonUtils::baryon_site_template(const unsigned int mask, const mobj &D1, +inline void BaryonUtils::baryon_site(const unsigned int mask, const mobj &D1, const mobj &D2, const mobj &D3, const Gamma GammaA_left, @@ -594,40 +307,30 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const int parity, ComplexField &baryon_corr) { - const std::chrono::system_clock::time_point start{ std::chrono::system_clock::now() }; - std::time_t now = std::chrono::system_clock::to_time_t( start ); - std::cout << "Setup start " << std::ctime( &now ); + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; + std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; - assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); + assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); - GridBase *grid = q1_left.Grid(); + GridBase *grid = q1_left.Grid(); - int wick_contraction[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; + int wick_id; + for (int ie=0; ie < 6 ; ie++) + wick_id = ((quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0) << (5-ie); - auto vbaryon_corr= baryon_corr.View(); - auto v1 = q1_left.View(); - auto v2 = q2_left.View(); - auto v3 = q3_left.View(); + auto vbaryon_corr= baryon_corr.View(); + auto v1 = q1_left.View(); + auto v2 = q2_left.View(); + auto v3 = q3_left.View(); - const std::chrono::system_clock::time_point stop{ std::chrono::system_clock::now() }; - now = std::chrono::system_clock::to_time_t( stop ); - const std::chrono::duration duration_seconds = stop - start; - const double seconds{ ( duration_seconds.count() ) }; - std::cout << "Setup stop " << std::ctime( &now ) - << "Total duration " << std::fixed << std::setprecision(5) << seconds << " seconds." << std::endl; - - const std::chrono::system_clock::time_point start2{ std::chrono::system_clock::now() }; - now = std::chrono::system_clock::to_time_t( start2 ); - std::cout << "Normal Loop start " << std::ctime( &now ); - // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ //for(int ss=0; ss < grid->oSites(); ss++){ @@ -636,58 +339,9 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, auto D3 = v3[ss]; vobj result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + baryon_site(wick_id,D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,result); vbaryon_corr[ss] = result; } );//end loop over lattice sites - const std::chrono::system_clock::time_point stop2{ std::chrono::system_clock::now() }; - now = std::chrono::system_clock::to_time_t( stop2 ); - const std::chrono::duration duration_seconds2 = stop2 - start2; - const double seconds2{ ( duration_seconds2.count() ) }; - std::cout << "Normal Loop stop " << std::ctime( &now ) - << "Total duration " << std::fixed << std::setprecision(5) << seconds2 << " seconds." << std::endl; - const std::chrono::system_clock::time_point start4{ std::chrono::system_clock::now() }; - now = std::chrono::system_clock::to_time_t( start4 ); - std::cout << "Opt-macro Loop start " << std::ctime( &now ); - // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - thread_for(ss,grid->oSites(),{ - //for(int ss=0; ss < grid->oSites(); ss++){ - - auto D1 = v1[ss]; - auto D2 = v2[ss]; - auto D3 = v3[ss]; - - vobj result=Zero(); - baryon_site_macro(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); - vbaryon_corr[ss] = result; - } );//end loop over lattice sites - const std::chrono::system_clock::time_point stop4{ std::chrono::system_clock::now() }; - now = std::chrono::system_clock::to_time_t( stop4 ); - const std::chrono::duration duration_seconds4 = stop4 - start4; - const double seconds4{ ( duration_seconds4.count() ) }; - std::cout << "Opt-macro Loop stop " << std::ctime( &now ) - << "Total duration " << std::fixed << std::setprecision(5) << seconds4 << " seconds." << std::endl; - const std::chrono::system_clock::time_point start3{ std::chrono::system_clock::now() }; - now = std::chrono::system_clock::to_time_t( start3 ); - int wick_id=32*wick_contraction[0]+16*wick_contraction[1]+8*wick_contraction[2]+4*wick_contraction[3]+2*wick_contraction[4]+wick_contraction[5]; - std::cout << "Opt-template Loop start " << std::ctime( &now ); - // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - thread_for(ss,grid->oSites(),{ - //for(int ss=0; ss < grid->oSites(); ss++){ - - auto D1 = v1[ss]; - auto D2 = v2[ss]; - auto D3 = v3[ss]; - - vobj result=Zero(); - baryon_site_template(wick_id,D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,result); - vbaryon_corr[ss] = result; - } );//end loop over lattice sites - const std::chrono::system_clock::time_point stop3{ std::chrono::system_clock::now() }; - now = std::chrono::system_clock::to_time_t( stop3 ); - const std::chrono::duration duration_seconds3 = stop3 - start3; - const double seconds3{ ( duration_seconds3.count() ) }; - std::cout << "Opt-template Loop stop " << std::ctime( &now ) - << "Total duration " << std::fixed << std::setprecision(5) << seconds3 << " seconds." << std::endl; } template @@ -704,20 +358,24 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, const int parity, robj &result) { - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + + std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; - assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); + assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); - int wick_contraction[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - - result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + int wick_id; + for (int ie=0; ie < 6 ; ie++) + wick_id = ((quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0) << (5-ie); + + result=Zero(); + baryon_site(wick_id,D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,result); } /*********************************************************************** @@ -957,6 +615,10 @@ void BaryonUtils::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop, const std::string op, SpinMatrixField &stn_corr) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + GridBase *grid = qs_ti.Grid(); auto vcorr= stn_corr.View(); @@ -994,6 +656,10 @@ void BaryonUtils::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti, const std::string op, SpinMatrixField &stn_corr) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + GridBase *grid = qs_ti.Grid(); auto vcorr= stn_corr.View();