1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Hadrons: contractor faster transpose and finer timings

This commit is contained in:
Antonin Portelli 2018-11-12 15:59:54 +00:00
parent cfe281f1a4
commit 344e832a4e

View File

@ -260,6 +260,7 @@ int main(int argc, char* argv[])
TimerArray tAr;
double fusec, busec, flops, bytes, tusec;
tAr.startTimer("Total");
std::cout << "======== Contraction tr(";
for (unsigned int g = 0; g < term.size(); ++g)
{
@ -276,6 +277,7 @@ int main(int argc, char* argv[])
{
times.push_back(parseTimeRange(s, par.global.nt));
}
translations = parseTimeRange(p.translations, par.global.nt);
makeTimeSeq(timeSeq, times);
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
@ -285,15 +287,23 @@ int main(int argc, char* argv[])
std::cout << "* Caching transposed last term" << std::endl;
for (unsigned int t = 0; t < par.global.nt; ++t)
{
tAr.startTimer("Disk vector overhead");
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
tAr.stopTimer("Disk vector overhead");
tAr.startTimer("Transpose caching");
lastTerm[t] = ref;
lastTerm[t].resize(ref.rows(), ref.cols());
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
for (unsigned int i = 0; i < ref.rows(); ++i)
{
lastTerm[t](i, j) = ref(i, j);
}
tAr.stopTimer("Transpose caching");
}
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
for (unsigned int i = 0; i < timeSeq.size(); ++i)
{
unsigned int dti = 0;
@ -365,6 +375,8 @@ int main(int argc, char* argv[])
std::cout << tLast << " " << corr[tLast] << std::endl;
}
}
tAr.stopTimer("Total");
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
}
return EXIT_SUCCESS;