mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Hadrons: contractor trajectory loop and file output
This commit is contained in:
parent
f592ec8baa
commit
f22a27d7f9
@ -166,7 +166,13 @@ std::string Hadrons::dirname(const std::string &s)
|
||||
|
||||
void Hadrons::makeFileDir(const std::string filename, GridBase *g)
|
||||
{
|
||||
if (g->IsBoss())
|
||||
bool doIt = true;
|
||||
|
||||
if (g)
|
||||
{
|
||||
doIt = g->IsBoss();
|
||||
}
|
||||
if (doIt)
|
||||
{
|
||||
std::string dir = dirname(filename);
|
||||
int status = mkdir(dir);
|
||||
|
@ -218,15 +218,15 @@ typedef XmlReader ResultReader;
|
||||
typedef XmlWriter ResultWriter;
|
||||
#endif
|
||||
|
||||
#define RESULT_FILE_NAME(name) \
|
||||
name + "." + std::to_string(vm().getTrajectory()) + "." + resultFileExt
|
||||
#define RESULT_FILE_NAME(name, traj) \
|
||||
name + "." + std::to_string(traj) + "." + resultFileExt
|
||||
|
||||
// recursive mkdir
|
||||
#define MAX_PATH_LENGTH 512u
|
||||
int mkdir(const std::string dirName);
|
||||
std::string basename(const std::string &s);
|
||||
std::string dirname(const std::string &s);
|
||||
void makeFileDir(const std::string filename, GridBase *g);
|
||||
void makeFileDir(const std::string filename, GridBase *g = nullptr);
|
||||
|
||||
// default Schur convention
|
||||
#ifndef HADRONS_DEFAULT_SCHUR
|
||||
|
@ -144,7 +144,7 @@ if (env().getGrid()->IsBoss() and !ioStem.empty())\
|
||||
{\
|
||||
makeFileDir(ioStem, env().getGrid());\
|
||||
{\
|
||||
ResultWriter _writer(RESULT_FILE_NAME(ioStem));\
|
||||
ResultWriter _writer(RESULT_FILE_NAME(ioStem, vm().getTrajectory()));\
|
||||
write(_writer, name, result);\
|
||||
}\
|
||||
}
|
||||
|
@ -146,7 +146,7 @@ void TChargedProp::execute(void)
|
||||
std::vector<int> siteCoor;
|
||||
|
||||
LOG(Message) << "Saving momentum-projected propagator to '"
|
||||
<< RESULT_FILE_NAME(par().output) << "'..."
|
||||
<< RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
|
||||
<< std::endl;
|
||||
result.projection.resize(par().outputMom.size());
|
||||
result.lattice_size = env().getGrid()->_fdimensions;
|
||||
|
@ -462,7 +462,7 @@ void TScalarVP::execute(void)
|
||||
if (!par().output.empty())
|
||||
{
|
||||
LOG(Message) << "Saving momentum-projected HVP to '"
|
||||
<< RESULT_FILE_NAME(par().output) << "'..."
|
||||
<< RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
|
||||
<< std::endl;
|
||||
saveResult(par().output, "HVP", outputData);
|
||||
}
|
||||
|
@ -239,7 +239,7 @@ void TVPCounterTerms::execute(void)
|
||||
if (!par().output.empty())
|
||||
{
|
||||
LOG(Message) << "Saving momentum-projected correlators to '"
|
||||
<< RESULT_FILE_NAME(par().output) << "'..."
|
||||
<< RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
|
||||
<< std::endl;
|
||||
saveResult(par().output, "scalar_loops", outputData);
|
||||
}
|
||||
|
@ -37,10 +37,20 @@ using namespace Hadrons;
|
||||
|
||||
namespace Contractor
|
||||
{
|
||||
class TrajRange: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
|
||||
unsigned int, start,
|
||||
unsigned int, end,
|
||||
unsigned int, step);
|
||||
};
|
||||
|
||||
class GlobalPar: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
|
||||
TrajRange, trajCounter,
|
||||
unsigned int, nt,
|
||||
std::string, diskVectorDir,
|
||||
std::string, output);
|
||||
@ -50,7 +60,7 @@ namespace Contractor
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
|
||||
std::string, file,
|
||||
std::string, fileStem,
|
||||
std::string, dataset,
|
||||
unsigned int, cacheSize,
|
||||
std::string, name);
|
||||
@ -62,7 +72,18 @@ namespace Contractor
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
|
||||
std::string, terms,
|
||||
std::vector<std::string>, times,
|
||||
std::string, translations);
|
||||
std::string, translations,
|
||||
bool, translationAverage);
|
||||
};
|
||||
|
||||
class CorrelatorResult: Serializable
|
||||
{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
|
||||
std::vector<Contractor::A2AMatrixPar>, a2aMatrix,
|
||||
ProductPar, contraction,
|
||||
std::vector<unsigned int>, times,
|
||||
std::vector<ComplexD>, correlator);
|
||||
};
|
||||
}
|
||||
|
||||
@ -100,6 +121,26 @@ void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
|
||||
makeTimeSeq(timeSeq, times, current, times.size());
|
||||
}
|
||||
|
||||
void saveCorrelator(const Contractor::CorrelatorResult &result, const std::string dir,
|
||||
const unsigned int dt, const unsigned int traj)
|
||||
{
|
||||
std::string fileStem = "", us = "_", filename;
|
||||
|
||||
for (unsigned int i = 0; i < result.contraction.terms.size() - 1; i++)
|
||||
{
|
||||
fileStem += result.contraction.terms[i] + us + std::to_string(result.times[i]) + us;
|
||||
}
|
||||
fileStem += result.contraction.terms.back();
|
||||
if (!result.contraction.translationAverage)
|
||||
{
|
||||
fileStem += "_dt_" + std::to_string(dt);
|
||||
}
|
||||
filename = dir + "/" + RESULT_FILE_NAME(fileStem, traj);
|
||||
std::cout << "Saving correlator to '" << filename << "'" << std::endl;
|
||||
makeFileDir(dir);
|
||||
ResultWriter writer(filename);
|
||||
write(writer, fileStem, result);
|
||||
}
|
||||
|
||||
std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
|
||||
{
|
||||
@ -107,7 +148,6 @@ std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int
|
||||
std::smatch sm;
|
||||
std::vector<std::string> rstr = strToVec<std::string>(str);
|
||||
std::set<unsigned int> tSet;
|
||||
|
||||
|
||||
for (auto &s: rstr)
|
||||
{
|
||||
@ -230,158 +270,184 @@ int main(int argc, char* argv[])
|
||||
a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
|
||||
}
|
||||
|
||||
// load data
|
||||
for (unsigned int i = 0; i < a2aMat.size(); ++i)
|
||||
// trajectory loop
|
||||
for (unsigned int traj = par.global.trajCounter.start;
|
||||
traj < par.global.trajCounter.end; traj += par.global.trajCounter.step)
|
||||
{
|
||||
auto &p = par.a2aMatrix[i];
|
||||
double t, size;
|
||||
std::cout << ":::::::: Trajectory " << traj << std::endl;
|
||||
|
||||
std::cout << "======== Loading '" << p.file << "'" << std::endl;
|
||||
|
||||
A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(p.file, p.dataset, par.global.nt);
|
||||
|
||||
a2aIo.load(a2aMat.at(p.name), &t);
|
||||
std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6
|
||||
<< " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
|
||||
}
|
||||
|
||||
// contract
|
||||
EigenDiskVector<ComplexD>::Matrix buf;
|
||||
|
||||
for (auto &p: par.product)
|
||||
{
|
||||
std::vector<std::string> term = strToVec<std::string>(p.terms);
|
||||
std::vector<std::set<unsigned int>> times;
|
||||
std::vector<std::vector<unsigned int>> timeSeq;
|
||||
std::set<unsigned int> translations;
|
||||
std::vector<ComplexD> corr(par.global.nt);
|
||||
std::vector<A2AMatrixTr<ComplexD>> lastTerm(par.global.nt);
|
||||
A2AMatrix<ComplexD> prod, buf, tmp;
|
||||
TimerArray tAr;
|
||||
double fusec, busec, flops, bytes, tusec;
|
||||
|
||||
tAr.startTimer("Total");
|
||||
std::cout << "======== Contraction tr(";
|
||||
for (unsigned int g = 0; g < term.size(); ++g)
|
||||
// load data
|
||||
for (auto &p: par.a2aMatrix)
|
||||
{
|
||||
std::cout << term[g] << ((g == term.size() - 1) ? ')' : '*');
|
||||
}
|
||||
std::cout << std::endl;
|
||||
if (term.size() != p.times.size() + 1)
|
||||
{
|
||||
HADRONS_ERROR(Size, "number of terms (" + std::to_string(term.size())
|
||||
+ ") different from number of times ("
|
||||
+ std::to_string(p.times.size() + 1) + ")");
|
||||
}
|
||||
for (auto &s: p.times)
|
||||
{
|
||||
times.push_back(parseTimeRange(s, par.global.nt));
|
||||
std::string filename = RESULT_FILE_NAME(p.fileStem, traj);
|
||||
double t, size;
|
||||
|
||||
std::cout << "======== Loading '" << filename << "'" << std::endl;
|
||||
|
||||
A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
|
||||
|
||||
a2aIo.load(a2aMat.at(p.name), &t);
|
||||
std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6
|
||||
<< " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
|
||||
}
|
||||
|
||||
translations = parseTimeRange(p.translations, par.global.nt);
|
||||
makeTimeSeq(timeSeq, times);
|
||||
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
|
||||
<< timeSeq.size()*translations.size()*par.global.nt << " tr(A*B)"
|
||||
<< std::endl;
|
||||
// contract
|
||||
EigenDiskVector<ComplexD>::Matrix buf;
|
||||
|
||||
std::cout << "* Caching transposed last term" << std::endl;
|
||||
for (unsigned int t = 0; t < par.global.nt; ++t)
|
||||
for (auto &p: par.product)
|
||||
{
|
||||
tAr.startTimer("Disk vector overhead");
|
||||
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
|
||||
tAr.stopTimer("Disk vector overhead");
|
||||
std::vector<std::string> term = strToVec<std::string>(p.terms);
|
||||
std::vector<std::set<unsigned int>> times;
|
||||
std::vector<std::vector<unsigned int>> timeSeq;
|
||||
std::set<unsigned int> translations;
|
||||
std::vector<A2AMatrixTr<ComplexD>> lastTerm(par.global.nt);
|
||||
A2AMatrix<ComplexD> prod, buf, tmp;
|
||||
TimerArray tAr;
|
||||
double fusec, busec, flops, bytes, tusec;
|
||||
Contractor::CorrelatorResult result;
|
||||
|
||||
tAr.startTimer("Transpose caching");
|
||||
lastTerm[t].resize(ref.rows(), ref.cols());
|
||||
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
|
||||
for (unsigned int i = 0; i < ref.rows(); ++i)
|
||||
tAr.startTimer("Total");
|
||||
std::cout << "======== Contraction tr(";
|
||||
for (unsigned int g = 0; g < term.size(); ++g)
|
||||
{
|
||||
lastTerm[t](i, j) = ref(i, j);
|
||||
std::cout << term[g] << ((g == term.size() - 1) ? ')' : '*');
|
||||
}
|
||||
tAr.stopTimer("Transpose caching");
|
||||
}
|
||||
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
|
||||
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
|
||||
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
|
||||
std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
|
||||
for (unsigned int i = 0; i < timeSeq.size(); ++i)
|
||||
{
|
||||
unsigned int dti = 0;
|
||||
auto &t = timeSeq[i];
|
||||
|
||||
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||
std::cout << std::endl;
|
||||
if (term.size() != p.times.size() + 1)
|
||||
{
|
||||
corr[tLast] = 0.;
|
||||
HADRONS_ERROR(Size, "number of terms (" + std::to_string(term.size())
|
||||
+ ") different from number of times ("
|
||||
+ std::to_string(p.times.size() + 1) + ")");
|
||||
}
|
||||
for (auto &dt: translations)
|
||||
for (auto &s: p.times)
|
||||
{
|
||||
std::cout << "* Step " << i*translations.size() + dti + 1
|
||||
<< "/" << timeSeq.size()*translations.size()
|
||||
<< " -- positions= " << t << ", dt= " << dt << std::endl;
|
||||
if (term.size() > 2)
|
||||
times.push_back(parseTimeRange(s, par.global.nt));
|
||||
}
|
||||
for (auto &m: par.a2aMatrix)
|
||||
{
|
||||
if (std::find(result.a2aMatrix.begin(), result.a2aMatrix.end(), m) == result.a2aMatrix.end())
|
||||
{
|
||||
std::cout << std::setw(8) << "products";
|
||||
result.a2aMatrix.push_back(m);
|
||||
}
|
||||
flops = 0.;
|
||||
bytes = 0.;
|
||||
fusec = tAr.getDTimer("A*B algebra");
|
||||
busec = tAr.getDTimer("A*B total");
|
||||
tAr.startTimer("Linear algebra");
|
||||
}
|
||||
result.contraction = p;
|
||||
result.correlator.resize(par.global.nt, 0.);
|
||||
|
||||
translations = parseTimeRange(p.translations, par.global.nt);
|
||||
makeTimeSeq(timeSeq, times);
|
||||
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
|
||||
<< timeSeq.size()*translations.size()*par.global.nt << " tr(A*B)"
|
||||
<< std::endl;
|
||||
|
||||
std::cout << "* Caching transposed last term" << std::endl;
|
||||
for (unsigned int t = 0; t < par.global.nt; ++t)
|
||||
{
|
||||
tAr.startTimer("Disk vector overhead");
|
||||
prod = a2aMat.at(term[0])[TIME_MOD(t[0] + dt)];
|
||||
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
|
||||
tAr.stopTimer("Disk vector overhead");
|
||||
for (unsigned int j = 1; j < term.size() - 1; ++j)
|
||||
|
||||
tAr.startTimer("Transpose caching");
|
||||
lastTerm[t].resize(ref.rows(), ref.cols());
|
||||
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
|
||||
for (unsigned int i = 0; i < ref.rows(); ++i)
|
||||
{
|
||||
tAr.startTimer("Disk vector overhead");
|
||||
const A2AMatrix<ComplexD> &ref = a2aMat.at(term[j])[TIME_MOD(t[j] + dt)];
|
||||
tAr.stopTimer("Disk vector overhead");
|
||||
|
||||
tAr.startTimer("A*B total");
|
||||
tAr.startTimer("A*B algebra");
|
||||
A2AContraction::mul(tmp, prod, ref);
|
||||
tAr.stopTimer("A*B algebra");
|
||||
flops += A2AContraction::mulFlops(prod, ref);
|
||||
prod = tmp;
|
||||
tAr.stopTimer("A*B total");
|
||||
bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
|
||||
lastTerm[t](i, j) = ref(i, j);
|
||||
}
|
||||
if (term.size() > 2)
|
||||
{
|
||||
std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
|
||||
<< Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " "
|
||||
<< Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
|
||||
}
|
||||
std::cout << std::setw(8) << "traces";
|
||||
flops = 0.;
|
||||
bytes = 0.;
|
||||
fusec = tAr.getDTimer("tr(A*B)");
|
||||
busec = tAr.getDTimer("tr(A*B)");
|
||||
tAr.stopTimer("Transpose caching");
|
||||
}
|
||||
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
|
||||
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
|
||||
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
|
||||
std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
|
||||
for (unsigned int i = 0; i < timeSeq.size(); ++i)
|
||||
{
|
||||
unsigned int dti = 0;
|
||||
auto &t = timeSeq[i];
|
||||
|
||||
result.times = t;
|
||||
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||
{
|
||||
tAr.startTimer("tr(A*B)");
|
||||
A2AContraction::accTrMul(corr[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
|
||||
tAr.stopTimer("tr(A*B)");
|
||||
flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
|
||||
bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
|
||||
result.correlator[tLast] = 0.;
|
||||
}
|
||||
for (auto &dt: translations)
|
||||
{
|
||||
std::cout << "* Step " << i*translations.size() + dti + 1
|
||||
<< "/" << timeSeq.size()*translations.size()
|
||||
<< " -- positions= " << t << ", dt= " << dt << std::endl;
|
||||
if (term.size() > 2)
|
||||
{
|
||||
std::cout << std::setw(8) << "products";
|
||||
}
|
||||
flops = 0.;
|
||||
bytes = 0.;
|
||||
fusec = tAr.getDTimer("A*B algebra");
|
||||
busec = tAr.getDTimer("A*B total");
|
||||
tAr.startTimer("Linear algebra");
|
||||
tAr.startTimer("Disk vector overhead");
|
||||
prod = a2aMat.at(term[0])[TIME_MOD(t[0] + dt)];
|
||||
tAr.stopTimer("Disk vector overhead");
|
||||
for (unsigned int j = 1; j < term.size() - 1; ++j)
|
||||
{
|
||||
tAr.startTimer("Disk vector overhead");
|
||||
const A2AMatrix<ComplexD> &ref = a2aMat.at(term[j])[TIME_MOD(t[j] + dt)];
|
||||
tAr.stopTimer("Disk vector overhead");
|
||||
|
||||
tAr.startTimer("A*B total");
|
||||
tAr.startTimer("A*B algebra");
|
||||
A2AContraction::mul(tmp, prod, ref);
|
||||
tAr.stopTimer("A*B algebra");
|
||||
flops += A2AContraction::mulFlops(prod, ref);
|
||||
prod = tmp;
|
||||
tAr.stopTimer("A*B total");
|
||||
bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
|
||||
}
|
||||
if (term.size() > 2)
|
||||
{
|
||||
std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
|
||||
<< Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " "
|
||||
<< Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
|
||||
}
|
||||
std::cout << std::setw(8) << "traces";
|
||||
flops = 0.;
|
||||
bytes = 0.;
|
||||
fusec = tAr.getDTimer("tr(A*B)");
|
||||
busec = tAr.getDTimer("tr(A*B)");
|
||||
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||
{
|
||||
tAr.startTimer("tr(A*B)");
|
||||
A2AContraction::accTrMul(result.correlator[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
|
||||
tAr.stopTimer("tr(A*B)");
|
||||
flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
|
||||
bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
|
||||
}
|
||||
tAr.stopTimer("Linear algebra");
|
||||
std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
|
||||
<< Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " "
|
||||
<< Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
|
||||
if (!p.translationAverage)
|
||||
{
|
||||
saveCorrelator(result, par.global.output, dt, traj);
|
||||
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||
{
|
||||
result.correlator[tLast] = 0.;
|
||||
}
|
||||
}
|
||||
dti++;
|
||||
}
|
||||
if (p.translationAverage)
|
||||
{
|
||||
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||
{
|
||||
result.correlator[tLast] /= translations.size();
|
||||
}
|
||||
saveCorrelator(result, par.global.output, 0, traj);
|
||||
}
|
||||
tAr.stopTimer("Linear algebra");
|
||||
std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
|
||||
<< Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " "
|
||||
<< Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
|
||||
dti++;
|
||||
}
|
||||
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||
{
|
||||
corr[tLast] /= translations.size();
|
||||
}
|
||||
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||
{
|
||||
std::cout << tLast << " " << corr[tLast] << std::endl;
|
||||
}
|
||||
tAr.stopTimer("Total");
|
||||
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
|
||||
}
|
||||
tAr.stopTimer("Total");
|
||||
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
|
||||
}
|
||||
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user