Hadrons: contractor faster transpose and finer timings

2025-07-27 09:47:08 +01:00 · 2018-11-12 15:59:54 +00:00
parent cfe281f1a4
commit 344e832a4e
1 changed files with 13 additions and 1 deletions
--- a/Hadrons/Utilities/Contractor.cc
+++ b/Hadrons/Utilities/Contractor.cc
@@ -260,6 +260,7 @@ int main(int argc, char* argv[])
        TimerArray                             tAr;
        double                                 fusec, busec, flops, bytes, tusec;

+        tAr.startTimer("Total");
        std::cout << "======== Contraction tr(";
        for (unsigned int g = 0; g < term.size(); ++g)
        {
@@ -276,6 +277,7 @@ int main(int argc, char* argv[])
        {
            times.push_back(parseTimeRange(s, par.global.nt));
        }
+
        translations = parseTimeRange(p.translations, par.global.nt);
        makeTimeSeq(timeSeq, times);
        std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
@@ -285,15 +287,23 @@ int main(int argc, char* argv[])
        std::cout << "* Caching transposed last term" << std::endl;
        for (unsigned int t = 0; t < par.global.nt; ++t)
        {
+            tAr.startTimer("Disk vector overhead");
            const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
+            tAr.stopTimer("Disk vector overhead");

            tAr.startTimer("Transpose caching");
-            lastTerm[t] = ref;
+            lastTerm[t].resize(ref.rows(), ref.cols());
+            parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
+            for (unsigned int i = 0; i < ref.rows(); ++i)
+            {
+                lastTerm[t](i, j) = ref(i, j);
+            }
            tAr.stopTimer("Transpose caching");
        }
        bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
        std::cout << Sec(tAr.getDTimer("Transpose caching")) << " " 
                  << Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
+        std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
        for (unsigned int i = 0; i < timeSeq.size(); ++i)
        {
            unsigned int dti = 0;
@@ -365,6 +375,8 @@ int main(int argc, char* argv[])
                std::cout << tLast << " " << corr[tLast] << std::endl;
            }
        }
+        tAr.stopTimer("Total");
+        printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
    }
    
    return EXIT_SUCCESS;