1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Hadrons: contractor faster transpose and finer timings

This commit is contained in:
Antonin Portelli 2018-11-12 15:59:54 +00:00
parent cfe281f1a4
commit 344e832a4e

View File

@ -260,6 +260,7 @@ int main(int argc, char* argv[])
TimerArray tAr; TimerArray tAr;
double fusec, busec, flops, bytes, tusec; double fusec, busec, flops, bytes, tusec;
tAr.startTimer("Total");
std::cout << "======== Contraction tr("; std::cout << "======== Contraction tr(";
for (unsigned int g = 0; g < term.size(); ++g) for (unsigned int g = 0; g < term.size(); ++g)
{ {
@ -276,6 +277,7 @@ int main(int argc, char* argv[])
{ {
times.push_back(parseTimeRange(s, par.global.nt)); times.push_back(parseTimeRange(s, par.global.nt));
} }
translations = parseTimeRange(p.translations, par.global.nt); translations = parseTimeRange(p.translations, par.global.nt);
makeTimeSeq(timeSeq, times); makeTimeSeq(timeSeq, times);
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, " std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
@ -285,15 +287,23 @@ int main(int argc, char* argv[])
std::cout << "* Caching transposed last term" << std::endl; std::cout << "* Caching transposed last term" << std::endl;
for (unsigned int t = 0; t < par.global.nt; ++t) for (unsigned int t = 0; t < par.global.nt; ++t)
{ {
tAr.startTimer("Disk vector overhead");
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t]; const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
tAr.stopTimer("Disk vector overhead");
tAr.startTimer("Transpose caching"); tAr.startTimer("Transpose caching");
lastTerm[t] = ref; lastTerm[t].resize(ref.rows(), ref.cols());
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
for (unsigned int i = 0; i < ref.rows(); ++i)
{
lastTerm[t](i, j) = ref(i, j);
}
tAr.stopTimer("Transpose caching"); tAr.stopTimer("Transpose caching");
} }
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD); bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " " std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl; << Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
for (unsigned int i = 0; i < timeSeq.size(); ++i) for (unsigned int i = 0; i < timeSeq.size(); ++i)
{ {
unsigned int dti = 0; unsigned int dti = 0;
@ -365,6 +375,8 @@ int main(int argc, char* argv[])
std::cout << tLast << " " << corr[tLast] << std::endl; std::cout << tLast << " " << corr[tLast] << std::endl;
} }
} }
tAr.stopTimer("Total");
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
} }
return EXIT_SUCCESS; return EXIT_SUCCESS;