Threading support rework.

Placed parallel pragmas as macros; implemented deterministic thread reduction in style of BFM.
2025-12-20 20:54:30 +00:00 · 2015-05-12 07:51:41 +01:00
parent b1d2c60d07
commit 6103c29ee3
26 changed files with 276 additions and 184 deletions
--- a/lib/lattice/Grid_lattice_local.h
+++ b/lib/lattice/Grid_lattice_local.h
@@ -16,7 +16,7 @@ namespace Grid {
    inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
    {
      Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
-#pragma omp parallel for
+PARALLEL_FOR_LOOP
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 	  ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
        }
@@ -29,7 +29,7 @@ namespace Grid {
      -> Lattice<typename vobj::tensor_reduced>
    {
      Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
-#pragma omp parallel for
+PARALLEL_FOR_LOOP
      for(int ss=0;ss<rhs._grid->oSites(); ss++){
 	ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
      }
@@ -42,7 +42,7 @@ namespace Grid {
    inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
    {
        Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
-#pragma omp parallel for
+PARALLEL_FOR_LOOP
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
            ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
        }