From f4033ad8cb32c34debe1623d84eab7c0d79116d5 Mon Sep 17 00:00:00 2001 From: ferben Date: Mon, 27 Apr 2020 17:46:14 +0100 Subject: [PATCH] baryon speedup by a factor 2 --- Grid/qcd/utils/BaryonUtils.h | 416 ++++++++++++++++++++++++++++++++++- 1 file changed, 407 insertions(+), 9 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index d65b9176..18d6f84b 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -46,7 +46,8 @@ public: typedef typename SpinMatrixField::vector_object sobj; static const int epsilon[6][3] ; - static const Complex epsilon_sgn[6]; + //static const Complex epsilon_sgn[6]; + static const double epsilon_sgn[6]; private: template @@ -60,6 +61,62 @@ public: const int parity, const int * wick_contractions, robj &result); + template + static void baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + template + static void baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + const int * wick_contractions, + robj &result); + template + static inline void baryon_site_template(unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + template + static inline void baryon_site_template(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + + template + struct BaryonSiteHelper + { + template + static inline void function(const unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result); + }; public: static void ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, @@ -151,14 +208,18 @@ public: template const int BaryonUtils::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; -template +/*template const Complex BaryonUtils::epsilon_sgn[6] = {Complex(1), Complex(1), Complex(1), Complex(-1), Complex(-1), Complex(-1)}; +*/ +template +const double BaryonUtils::epsilon_sgn[6] = {1.0,1.0,1.0,-1.0,-1.0,-1.0}; +//This is the old version template template void BaryonUtils::baryon_site(const mobj &D1, @@ -188,13 +249,15 @@ void BaryonUtils::baryon_site(const mobj &D1, int a_right = epsilon[ie_right][0]; //a' int b_right = epsilon[ie_right][1]; //b' int c_right = epsilon[ie_right][2]; //c' + //complex ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; //This is the \delta_{456}^{123} part if (wick_contraction[0]){ auto D2g = D2 * GammaB_left; for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right::baryon_site(const mobj &D1, for (int alpha_right=0; alpha_right +template +void BaryonUtils::baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + + Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) + + auto gD1a = GammaA_left * GammaA_right * D1; + auto gD1b = GammaA_left * g4 * GammaA_right * D1; + auto pD1 = 0.5* (gD1a + (double)parity * gD1b); + auto gD3 = GammaB_right * D3; + + auto D2g = D2 * GammaB_left; + auto pD1g = pD1 * GammaB_left; + auto gD3g = gD3 * GammaB_left; + + for (int ie_left=0; ie_left < 6 ; ie_left++){ + int a_left = epsilon[ie_left][0]; //a + int b_left = epsilon[ie_left][1]; //b + int c_left = epsilon[ie_left][2]; //c + for (int ie_right=0; ie_right < 6 ; ie_right++){ + int a_right = epsilon[ie_right][0]; //a' + int b_right = epsilon[ie_right][1]; //b' + int c_right = epsilon[ie_right][2]; //c' + double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + //All parts together + for (int gamma_left=0; gamma_left( D1, D2, D3, GA_l, GB_l, GA_r, GB_r, parity, result );\ +} + +template +template +void BaryonUtils::baryon_site_macro(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + const int * wick_contraction, + robj &result) +{ +BARYON_SITE( 0 , 0 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 0 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 0 , 1 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 0 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 0 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 0 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 0 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 0 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 1 , 0 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); +BARYON_SITE( 1 , 1 , 1 , 1 , 1 , 1 , D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, wick_contraction, result); + +} + + +template +template +inline void BaryonUtils::baryon_site_template(const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + constexpr bool wick_contraction_0 = ((mask & (1 << 5)) >> 5); + constexpr bool wick_contraction_1 = ((mask & (1 << 4)) >> 4); + constexpr bool wick_contraction_2 = ((mask & (1 << 3)) >> 3); + constexpr bool wick_contraction_3 = ((mask & (1 << 2)) >> 2); + constexpr bool wick_contraction_4 = ((mask & (1 << 1)) >> 1); + constexpr bool wick_contraction_5 = ((mask & (1 << 0)) >> 0); + + Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) + + auto gD1a = GammaA_left * GammaA_right * D1; + auto gD1b = GammaA_left * g4 * GammaA_right * D1; + auto pD1 = 0.5* (gD1a + (double)parity * gD1b); + auto gD3 = GammaB_right * D3; + + auto D2g = D2 * GammaB_left; + auto pD1g = pD1 * GammaB_left; + auto gD3g = gD3 * GammaB_left; + + for (int ie_left=0; ie_left < 6 ; ie_left++){ + int a_left = epsilon[ie_left][0]; //a + int b_left = epsilon[ie_left][1]; //b + int c_left = epsilon[ie_left][2]; //c + for (int ie_right=0; ie_right < 6 ; ie_right++){ + int a_right = epsilon[ie_right][0]; //a' + int b_right = epsilon[ie_right][1]; //b' + int c_right = epsilon[ie_right][2]; //c' + double ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + //All parts together + for (int gamma_left=0; gamma_left +template +template +inline void BaryonUtils::BaryonSiteHelper::function(const unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + if (mask == maxMask) + { + baryon_site_template(D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); + } + else + { + BaryonSiteHelper<(maxMask>0) ? maxMask-1 : 0>::function(mask, D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); + } +} + +// top-level function +template +template +inline void BaryonUtils::baryon_site_template(const unsigned int mask, const mobj &D1, + const mobj &D2, + const mobj &D3, + const Gamma GammaA_left, + const Gamma GammaB_left, + const Gamma GammaA_right, + const Gamma GammaB_right, + const int parity, + robj &result) +{ + BaryonSiteHelper<63>::function(mask, D1, D2, D3, GammaA_left, GammaB_left, GammaA_right, GammaB_right, parity, result); +} + + template void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, @@ -259,6 +593,10 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const int parity, ComplexField &baryon_corr) { + const std::chrono::system_clock::time_point start{ std::chrono::system_clock::now() }; + std::time_t now = std::chrono::system_clock::to_time_t( start ); + std::cout << "Setup start " << std::ctime( &now ); + std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; @@ -278,6 +616,16 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, auto v2 = q2_left.View(); auto v3 = q3_left.View(); + const std::chrono::system_clock::time_point stop{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop ); + const std::chrono::duration duration_seconds = stop - start; + const double seconds{ ( duration_seconds.count() ) }; + std::cout << "Setup stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds << " seconds." << std::endl; + + const std::chrono::system_clock::time_point start2{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( start2 ); + std::cout << "Normal Loop start " << std::ctime( &now ); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ //for(int ss=0; ss < grid->oSites(); ss++){ @@ -290,6 +638,56 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); vbaryon_corr[ss] = result; } );//end loop over lattice sites + const std::chrono::system_clock::time_point stop2{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop2 ); + const std::chrono::duration duration_seconds2 = stop2 - start2; + const double seconds2{ ( duration_seconds2.count() ) }; + std::cout << "Normal Loop stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds2 << " seconds." << std::endl; + const std::chrono::system_clock::time_point start4{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( start4 ); + std::cout << "Opt-macro Loop start " << std::ctime( &now ); + // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + thread_for(ss,grid->oSites(),{ + //for(int ss=0; ss < grid->oSites(); ss++){ + + auto D1 = v1[ss]; + auto D2 = v2[ss]; + auto D3 = v3[ss]; + + vobj result=Zero(); + baryon_site_macro(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + vbaryon_corr[ss] = result; + } );//end loop over lattice sites + const std::chrono::system_clock::time_point stop4{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop4 ); + const std::chrono::duration duration_seconds4 = stop4 - start4; + const double seconds4{ ( duration_seconds4.count() ) }; + std::cout << "Opt-macro Loop stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds4 << " seconds." << std::endl; + const std::chrono::system_clock::time_point start3{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( start3 ); + int wick_id=32*wick_contraction[0]+16*wick_contraction[1]+8*wick_contraction[2]+4*wick_contraction[3]+2*wick_contraction[4]+wick_contraction[5]; + std::cout << "Opt-template Loop start " << std::ctime( &now ); + // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + thread_for(ss,grid->oSites(),{ + //for(int ss=0; ss < grid->oSites(); ss++){ + + auto D1 = v1[ss]; + auto D2 = v2[ss]; + auto D3 = v3[ss]; + + vobj result=Zero(); + baryon_site_template(wick_id,D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,result); + vbaryon_corr[ss] = result; + } );//end loop over lattice sites + const std::chrono::system_clock::time_point stop3{ std::chrono::system_clock::now() }; + now = std::chrono::system_clock::to_time_t( stop3 ); + const std::chrono::duration duration_seconds3 = stop3 - start3; + const double seconds3{ ( duration_seconds3.count() ) }; + std::cout << "Opt-template Loop stop " << std::ctime( &now ) + << "Total duration " << std::fixed << std::setprecision(5) << seconds3 << " seconds." << std::endl; + } template template @@ -318,7 +716,7 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); } /***********************************************************************