1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Hadrons: contractor fixes and improvements

This commit is contained in:
Antonin Portelli 2018-11-08 18:46:28 +00:00
parent 65349b07a7
commit 69b6ba0a73

View File

@ -142,11 +142,38 @@ std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int
return tSet; return tSet;
} }
void printPerf(const double flops, const double fusec, const double bytes, const double busec) struct Flops
{ {
std::cout << std::setw(10) << flops/fusec/1.0e3 << " GFlop/s " Flops(const double flops, const double fusec)
<< std::setw(10) << bytes/busec*1.0e6/1024/1024/1024 << " GB/s" {
<< std::endl; gFlopsPerSec = flops/fusec/1.0e3;
}
double gFlopsPerSec;
};
inline std::ostream & operator<< (std::ostream& s, const Flops &&f)
{
s << std::setw(10) << f.gFlopsPerSec << " GFlop/s";
return s;
}
struct Bytes
{
Bytes(const double bytes, const double busec)
{
gBytesPerSec = bytes/busec*1.0e6/1024/1024/1024;
}
double gBytesPerSec;
};
inline std::ostream & operator<< (std::ostream& s, const Bytes &&b)
{
s << std::setw(10) << b.gBytesPerSec << " GB/s";
return s;
} }
int main(int argc, char* argv[]) int main(int argc, char* argv[])
@ -238,8 +265,9 @@ int main(int argc, char* argv[])
std::cout << "-- caching transposed last term" << std::endl; std::cout << "-- caching transposed last term" << std::endl;
for (unsigned int t = 0; t < par.global.nt; ++t) for (unsigned int t = 0; t < par.global.nt; ++t)
{ {
buf = a2aMat.at(term.back())[t]; const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
lastTerm[t] = buf;
lastTerm[t] = ref;
} }
for (auto &t: timeSeq) for (auto &t: timeSeq)
{ {
@ -252,7 +280,7 @@ int main(int argc, char* argv[])
std::cout << "-- position " << t << ", translation " << dt << std::endl; std::cout << "-- position " << t << ", translation " << dt << std::endl;
if (term.size() > 2) if (term.size() > 2)
{ {
std::cout << "* matrix products "; std::cout << "*" << std::setw(12) << "products";
} }
flops = 0.; flops = 0.;
bytes = 0.; bytes = 0.;
@ -266,16 +294,16 @@ int main(int argc, char* argv[])
busec -= usecond(); busec -= usecond();
A2AContraction::mul(tmp, prod, ref); A2AContraction::mul(tmp, prod, ref);
fusec += usecond(); fusec += usecond();
flops += A2AContraction::mulFlops(tmp, prod, ref); flops += A2AContraction::mulFlops(prod, ref);
prod = tmp; prod = tmp;
busec += usecond(); busec += usecond();
bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD); bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
} }
if (term.size() > 2) if (term.size() > 2)
{ {
printPerf(flops, fusec, bytes, busec); std::cout << Flops(flops, fusec) << " " << Bytes(bytes, busec) << std::endl;
} }
std::cout << "* traces "; std::cout << "*" << std::setw(12) << "traces";
flops = 0.; flops = 0.;
bytes = 0.; bytes = 0.;
fusec = 0.; fusec = 0.;
@ -284,13 +312,13 @@ int main(int argc, char* argv[])
{ {
fusec -= usecond(); fusec -= usecond();
busec -= usecond(); busec -= usecond();
A2AContraction::accTrMul(corr[tLast], prod, lastTerm[tLast]); A2AContraction::accTrMul(corr[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
fusec += usecond(); fusec += usecond();
busec += usecond(); busec += usecond();
flops += A2AContraction::accTrMulFlops(corr[tLast], prod, lastTerm[tLast]); flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD); bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
} }
printPerf(flops, fusec, bytes, busec); std::cout << Flops(flops, fusec) << " " << Bytes(bytes, busec) << std::endl;
} }
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast) for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{ {