Thread loop constructs changing a little

2026-06-11 14:43:11 +01:00 · 2019-06-15 12:54:11 +01:00
parent 462900b48d
commit cb336aa8f8
8 changed files with 22 additions and 25 deletions
@@ -170,7 +170,7 @@ public:
        if ((MatLeft::Options == Eigen::RowMajor) and
            (MatRight::Options == Eigen::ColMajor))
        {
-  	  thread_loop( (unsigned int r = 0; r < a.rows(); ++r),
+  	  thread_for(r,a.rows(),
            {
                C tmp;
 #ifdef USE_MKL
@@ -186,7 +186,7 @@ public:
        }
        else
 	  {
-            thread_loop( (unsigned int c = 0; c < a.cols(); ++c),
+            thread_for(c,a.cols(),
            {
                C tmp;
 #ifdef USE_MKL 
@@ -646,13 +646,14 @@ void A2AMatrixBlockComputation<T, Field, MetadataType, TIo>
            bytes    += kernel.bytes(N_iii, N_jjj);

            START_TIMER("cache copy");
-            thread_loop_collapse( 5, (int e =0;e<next_;e++),
-            for(int s =0;s< nstr_;s++)
-            for(int t =0;t< nt_;t++)
-            for(int iii=0;iii< N_iii;iii++)
-            for(int jjj=0;jjj< N_jjj;jjj++)
-            {
+            thread_for_collapse( 5,e,next_,{
+              for(int s =0;s< nstr_;s++)
+              for(int t =0;t< nt_;t++)
+              for(int iii=0;iii< N_iii;iii++)
+              for(int jjj=0;jjj< N_jjj;jjj++)
+              {
                mBlock(e,s,t,ii+iii,jj+jjj) = mCacheBlock(e,s,t,iii,jjj);
+              }
            });
            STOP_TIMER("cache copy");
        }
@@ -29,7 +29,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #include <Hadrons/Global.hpp>

 using namespace Grid;
-using namespace QCD;
 using namespace Hadrons;

 HadronsLogger Hadrons::HadronsLogError(1,"Error");
@@ -50,7 +50,6 @@ See the full license in the file "LICENSE" in the top level distribution directo

 #define BEGIN_HADRONS_NAMESPACE \
 namespace Grid {\
-using namespace QCD;\
 namespace Hadrons {\
 using Grid::operator<<;\
 using Grid::operator>>;
@@ -140,7 +140,7 @@ void TFourQuark<FImpl1, FImpl2>::tensorprod(LatticeSpinColourSpinColourMatrix &l
 	    auto  lret_v = lret.View();
 	    auto  a_v = a.View();
 	    auto  b_v = b.View();
-            thread_loop( (auto site=lret_v.begin();site<lret_v.end();site++) ,{
+            thread_foreach( site,lret_v,{
 		vTComplex left;
                for(int si=0; si < Ns; ++si){
                for(int sj=0; sj < Ns; ++sj){
@@ -28,7 +28,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #include <Hadrons/TimerArray.hpp>

 using namespace Grid;
-using namespace QCD;
 using namespace Hadrons;

 void TimerArray::startTimer(const std::string &name)
@@ -30,7 +30,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #include <Hadrons/TimerArray.hpp>

 using namespace Grid;
-using namespace QCD;
 using namespace Hadrons;

 #define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
@@ -353,11 +352,12 @@ int main(int argc, char* argv[])

                tAr.startTimer("Transpose caching");
                lastTerm[t].resize(ref.rows(), ref.cols());
-                thread_loop( (unsigned int j = 0; j < ref.cols(); ++j),
-                for (unsigned int i = 0; i < ref.rows(); ++i)
-                {
-                    lastTerm[t](i, j) = ref(i, j);
-                });
+                thread_for( j,ref.cols(),{
+                  for (unsigned int i = 0; i < ref.rows(); ++i)
+                  {
+                      lastTerm[t](i, j) = ref(i, j);
+                  }
+		});
                tAr.stopTimer("Transpose caching");
            }
            bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
@@ -205,7 +205,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
        auto nr = a.rows(), nc = a.cols();
        
        res = 0.;
-        thread_loop( (unsigned int i = 0; i < nr; ++i),
+        thread_for(i,nr,
        {
            ComplexD tmp = 0.;

@@ -225,7 +225,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
        auto nr = a.rows(), nc = a.cols();
        
        res = 0.;
-        thread_loop( (unsigned int j = 0; j < nc; ++j),
+        thread_for(j,nc,
        {
            ComplexD tmp = 0.;

@@ -248,7 +248,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
    [](ComplexD &res, const MatLeft &a, const MatRight &b)
    {
        res = 0.;
-        thread_loop( (unsigned int r = 0; r < a.rows(); ++r),
+        thread_for(r,a.rows(),
        {
            ComplexD tmp;

@@ -263,7 +263,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
    [](ComplexD &res, const MatLeft &a, const MatRight &b)
    {
        res = 0.;
-        thread_loop( (unsigned int c = 0; c < a.cols(); ++c),
+        thread_for(c,a.cols(),
        {
            ComplexD tmp;

@@ -284,7 +284,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
    [](ComplexD &res, const MatLeft &a, const MatRight &b)
    {
        res = 0.;
-        thread_loop( (unsigned int r = 0; r < a.rows(); ++r),
+        thread_for(r,a.rows()
        {
            ComplexD tmp;

@@ -299,7 +299,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
    [](ComplexD &res, const MatLeft &a, const MatRight &b)
    {
        res = 0.;
-        thread_loop( (unsigned int c = 0; c < a.cols(); ++c),
+        thread_for(c,a.cols(),
        {
            ComplexD tmp;

@@ -29,7 +29,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
 #include <Hadrons/Environment.hpp>

 using namespace Grid;
-using namespace QCD;
 using namespace Hadrons;

 template <typename FOut, typename FIn>