Step size modification in HMC; ICC happy thread pragmas

2025-07-26 17:27:08 +01:00 · 2018-12-20 09:32:33 +00:00
parent 35ed1defac
commit 2fcedb13dd
2 changed files with 38 additions and 7 deletions
--- a/Grid/qcd/action/gauge/GaugeImplTypes.h
+++ b/Grid/qcd/action/gauge/GaugeImplTypes.h
@@ -89,8 +89,39 @@ public:
    // specific for SU gauge fields
    LinkField Pmu(P.Grid());
    Pmu = Zero();
+    //
+    // Zbigniew Srocinsky thesis:
+    // P(p) =  N \Prod_{x\mu}e^-{1/2 Tr (p^2_mux)}
+    // 
+    // p_x,mu = c_x,mu,a T_a
+    //
+    // Tr p^2 =  sum_a,x,mu 1/2 (c_x,mu,a)^2
+    //
+    // Which implies P(p) =  N \Prod_{x,\mu,a} e^-{1/4 c_xmua^2  }
+    //
+    //                    =  N \Prod_{x,\mu,a} e^-{1/2 (c_xmua/sqrt{2})^2  }
+    // 
+    // Expect cx' = cxmua/sqrt(2) to be a unit variance gaussian.
+    //
+    // Expect        cxmua_new variance sqrt(2).
+    // Was variance  cxmua_old variance 1
+    // 
+    // tau_old * Pold = 1 = tau_old/sqrt(2)  * [Pold * sqrt(2)]
+    //                    = tau_new          * Pnew
+    //
+    // Hence tau_new = tau_cps = tau_guido/sqrt(2).
+    //
+    //
+    // Must scale the momentum by sqrt(2) up to invoke CPS and UKQCD conventions
+    //
+    //
+    // Hence expect cxmua = cx'*sqrt(2).
+    //
+    // Seek the scale parameter to be 
    for (int mu = 0; mu < Nd; mu++) {
      SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
+      RealD scale = ::sqrt(2) ;
+      Pmu = Pmu*scale;
      PokeIndex<LorentzIndex>(P, Pmu, mu);
    }
  }
--- a/Grid/threads/Pragmas.h
+++ b/Grid/threads/Pragmas.h
@@ -82,13 +82,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #define DO_PRAGMA_(x) _Pragma (#x)
 #define DO_PRAGMA(x) DO_PRAGMA_(x)

-#define thread_loop( range , ... )            DO_PRAGMA(omp parallel for schedule(static))   for range { __VA_ARGS__ ; };
-#define thread_loop_in_region( range , ... )  DO_PRAGMA(omp for schedule(static))            for range  { __VA_ARGS__ ; };
-#define thread_loop_collapse2( range , ... )  DO_PRAGMA(omp parallel for collapse(2))        for range  { __VA_ARGS__ };
-#define thread_loop_collapse( N , range , ... )  DO_PRAGMA(omp parallel for collapse ( N ) ) for range  { __VA_ARGS__ };
-#define thread_loop_collapse_in_region( N , range , ... )  DO_PRAGMA(omp for collapse ( N )) for range  { __VA_ARGS__ };
-#define thread_region                         DO_PRAGMA("omp parallel")
-#define thread_critical                       DO_PRAGMA("omp critical")
+#define thread_loop( range , ... )                        DO_PRAGMA(omp parallel for schedule(static))for range { __VA_ARGS__ };
+#define thread_loop_collapse2( range , ... )              DO_PRAGMA(omp parallel for collapse(2))     for range { __VA_ARGS__ };
+#define thread_loop_collapse( N , range , ... )           DO_PRAGMA(omp parallel for collapse ( N ) ) for range { __VA_ARGS__ };
+#define thread_loop_in_region( range , ... )              DO_PRAGMA(omp for schedule(static))         for range { __VA_ARGS__ };
+#define thread_loop_collapse_in_region( N , range , ... ) DO_PRAGMA(omp for collapse ( N ))           for range { __VA_ARGS__ };
+#define thread_region                                     DO_PRAGMA(omp parallel)
+#define thread_critical                                   DO_PRAGMA(omp critical)
 #define thread_num(a) omp_get_thread_num()
 #define thread_max(a) omp_get_max_threads()
 #else