mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Hadrons: contractor faster transpose and finer timings
This commit is contained in:
parent
cfe281f1a4
commit
344e832a4e
@ -260,6 +260,7 @@ int main(int argc, char* argv[])
|
|||||||
TimerArray tAr;
|
TimerArray tAr;
|
||||||
double fusec, busec, flops, bytes, tusec;
|
double fusec, busec, flops, bytes, tusec;
|
||||||
|
|
||||||
|
tAr.startTimer("Total");
|
||||||
std::cout << "======== Contraction tr(";
|
std::cout << "======== Contraction tr(";
|
||||||
for (unsigned int g = 0; g < term.size(); ++g)
|
for (unsigned int g = 0; g < term.size(); ++g)
|
||||||
{
|
{
|
||||||
@ -276,6 +277,7 @@ int main(int argc, char* argv[])
|
|||||||
{
|
{
|
||||||
times.push_back(parseTimeRange(s, par.global.nt));
|
times.push_back(parseTimeRange(s, par.global.nt));
|
||||||
}
|
}
|
||||||
|
|
||||||
translations = parseTimeRange(p.translations, par.global.nt);
|
translations = parseTimeRange(p.translations, par.global.nt);
|
||||||
makeTimeSeq(timeSeq, times);
|
makeTimeSeq(timeSeq, times);
|
||||||
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
|
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
|
||||||
@ -285,15 +287,23 @@ int main(int argc, char* argv[])
|
|||||||
std::cout << "* Caching transposed last term" << std::endl;
|
std::cout << "* Caching transposed last term" << std::endl;
|
||||||
for (unsigned int t = 0; t < par.global.nt; ++t)
|
for (unsigned int t = 0; t < par.global.nt; ++t)
|
||||||
{
|
{
|
||||||
|
tAr.startTimer("Disk vector overhead");
|
||||||
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
|
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
|
||||||
|
tAr.stopTimer("Disk vector overhead");
|
||||||
|
|
||||||
tAr.startTimer("Transpose caching");
|
tAr.startTimer("Transpose caching");
|
||||||
lastTerm[t] = ref;
|
lastTerm[t].resize(ref.rows(), ref.cols());
|
||||||
|
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
|
||||||
|
for (unsigned int i = 0; i < ref.rows(); ++i)
|
||||||
|
{
|
||||||
|
lastTerm[t](i, j) = ref(i, j);
|
||||||
|
}
|
||||||
tAr.stopTimer("Transpose caching");
|
tAr.stopTimer("Transpose caching");
|
||||||
}
|
}
|
||||||
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
|
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
|
||||||
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
|
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
|
||||||
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
|
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
|
||||||
|
std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
|
||||||
for (unsigned int i = 0; i < timeSeq.size(); ++i)
|
for (unsigned int i = 0; i < timeSeq.size(); ++i)
|
||||||
{
|
{
|
||||||
unsigned int dti = 0;
|
unsigned int dti = 0;
|
||||||
@ -365,6 +375,8 @@ int main(int argc, char* argv[])
|
|||||||
std::cout << tLast << " " << corr[tLast] << std::endl;
|
std::cout << tLast << " " << corr[tLast] << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tAr.stopTimer("Total");
|
||||||
|
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
|
||||||
}
|
}
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
|
Loading…
Reference in New Issue
Block a user