mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Hadrons: contractor trajectory loop and file output
This commit is contained in:
		@@ -166,7 +166,13 @@ std::string Hadrons::dirname(const std::string &s)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void Hadrons::makeFileDir(const std::string filename, GridBase *g)
 | 
					void Hadrons::makeFileDir(const std::string filename, GridBase *g)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    if (g->IsBoss())
 | 
					    bool doIt = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (g)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        doIt = g->IsBoss();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (doIt)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        std::string dir    = dirname(filename);
 | 
					        std::string dir    = dirname(filename);
 | 
				
			||||||
        int         status = mkdir(dir);
 | 
					        int         status = mkdir(dir);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -218,15 +218,15 @@ typedef XmlReader ResultReader;
 | 
				
			|||||||
typedef XmlWriter ResultWriter;
 | 
					typedef XmlWriter ResultWriter;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define RESULT_FILE_NAME(name) \
 | 
					#define RESULT_FILE_NAME(name, traj) \
 | 
				
			||||||
name + "." + std::to_string(vm().getTrajectory()) + "." + resultFileExt
 | 
					name + "." + std::to_string(traj) + "." + resultFileExt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// recursive mkdir
 | 
					// recursive mkdir
 | 
				
			||||||
#define MAX_PATH_LENGTH 512u
 | 
					#define MAX_PATH_LENGTH 512u
 | 
				
			||||||
int         mkdir(const std::string dirName);
 | 
					int         mkdir(const std::string dirName);
 | 
				
			||||||
std::string basename(const std::string &s);
 | 
					std::string basename(const std::string &s);
 | 
				
			||||||
std::string dirname(const std::string &s);
 | 
					std::string dirname(const std::string &s);
 | 
				
			||||||
void        makeFileDir(const std::string filename, GridBase *g);
 | 
					void        makeFileDir(const std::string filename, GridBase *g = nullptr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// default Schur convention
 | 
					// default Schur convention
 | 
				
			||||||
#ifndef HADRONS_DEFAULT_SCHUR 
 | 
					#ifndef HADRONS_DEFAULT_SCHUR 
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -144,7 +144,7 @@ if (env().getGrid()->IsBoss() and !ioStem.empty())\
 | 
				
			|||||||
{\
 | 
					{\
 | 
				
			||||||
    makeFileDir(ioStem, env().getGrid());\
 | 
					    makeFileDir(ioStem, env().getGrid());\
 | 
				
			||||||
    {\
 | 
					    {\
 | 
				
			||||||
        ResultWriter _writer(RESULT_FILE_NAME(ioStem));\
 | 
					        ResultWriter _writer(RESULT_FILE_NAME(ioStem, vm().getTrajectory()));\
 | 
				
			||||||
        write(_writer, name, result);\
 | 
					        write(_writer, name, result);\
 | 
				
			||||||
    }\
 | 
					    }\
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -146,7 +146,7 @@ void TChargedProp::execute(void)
 | 
				
			|||||||
        std::vector<int>    siteCoor;
 | 
					        std::vector<int>    siteCoor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        LOG(Message) << "Saving momentum-projected propagator to '"
 | 
					        LOG(Message) << "Saving momentum-projected propagator to '"
 | 
				
			||||||
                     << RESULT_FILE_NAME(par().output) << "'..."
 | 
					                     << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
 | 
				
			||||||
                     << std::endl;
 | 
					                     << std::endl;
 | 
				
			||||||
        result.projection.resize(par().outputMom.size());
 | 
					        result.projection.resize(par().outputMom.size());
 | 
				
			||||||
        result.lattice_size = env().getGrid()->_fdimensions;
 | 
					        result.lattice_size = env().getGrid()->_fdimensions;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -462,7 +462,7 @@ void TScalarVP::execute(void)
 | 
				
			|||||||
    if (!par().output.empty())
 | 
					    if (!par().output.empty())
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        LOG(Message) << "Saving momentum-projected HVP to '"
 | 
					        LOG(Message) << "Saving momentum-projected HVP to '"
 | 
				
			||||||
                     << RESULT_FILE_NAME(par().output) << "'..."
 | 
					                     << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
 | 
				
			||||||
                     << std::endl;
 | 
					                     << std::endl;
 | 
				
			||||||
        saveResult(par().output, "HVP", outputData);
 | 
					        saveResult(par().output, "HVP", outputData);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -239,7 +239,7 @@ void TVPCounterTerms::execute(void)
 | 
				
			|||||||
    if (!par().output.empty())
 | 
					    if (!par().output.empty())
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        LOG(Message) << "Saving momentum-projected correlators to '"
 | 
					        LOG(Message) << "Saving momentum-projected correlators to '"
 | 
				
			||||||
                     << RESULT_FILE_NAME(par().output) << "'..."
 | 
					                     << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
 | 
				
			||||||
                     << std::endl;
 | 
					                     << std::endl;
 | 
				
			||||||
        saveResult(par().output, "scalar_loops", outputData);
 | 
					        saveResult(par().output, "scalar_loops", outputData);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -37,10 +37,20 @@ using namespace Hadrons;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace Contractor
 | 
					namespace Contractor
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					    class TrajRange: Serializable
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
 | 
				
			||||||
 | 
					                                        unsigned int, start,
 | 
				
			||||||
 | 
					                                        unsigned int, end,
 | 
				
			||||||
 | 
					                                        unsigned int, step);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    class GlobalPar: Serializable
 | 
					    class GlobalPar: Serializable
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
 | 
				
			||||||
 | 
					                                        TrajRange, trajCounter,
 | 
				
			||||||
                                        unsigned int, nt,
 | 
					                                        unsigned int, nt,
 | 
				
			||||||
                                        std::string, diskVectorDir,
 | 
					                                        std::string, diskVectorDir,
 | 
				
			||||||
                                        std::string, output);
 | 
					                                        std::string, output);
 | 
				
			||||||
@@ -50,7 +60,7 @@ namespace Contractor
 | 
				
			|||||||
    {
 | 
					    {
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
 | 
				
			||||||
                                        std::string, file,
 | 
					                                        std::string, fileStem,
 | 
				
			||||||
                                        std::string, dataset,
 | 
					                                        std::string, dataset,
 | 
				
			||||||
                                        unsigned int, cacheSize,
 | 
					                                        unsigned int, cacheSize,
 | 
				
			||||||
                                        std::string, name);
 | 
					                                        std::string, name);
 | 
				
			||||||
@@ -62,7 +72,18 @@ namespace Contractor
 | 
				
			|||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
 | 
				
			||||||
                                        std::string, terms,
 | 
					                                        std::string, terms,
 | 
				
			||||||
                                        std::vector<std::string>, times,
 | 
					                                        std::vector<std::string>, times,
 | 
				
			||||||
                                        std::string, translations);
 | 
					                                        std::string, translations,
 | 
				
			||||||
 | 
					                                        bool, translationAverage);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    class CorrelatorResult: Serializable
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
 | 
				
			||||||
 | 
					                                        std::vector<Contractor::A2AMatrixPar>,  a2aMatrix,
 | 
				
			||||||
 | 
					                                        ProductPar, contraction,
 | 
				
			||||||
 | 
					                                        std::vector<unsigned int>, times,
 | 
				
			||||||
 | 
					                                        std::vector<ComplexD>, correlator);
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -100,6 +121,26 @@ void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
 | 
				
			|||||||
    makeTimeSeq(timeSeq, times, current, times.size());
 | 
					    makeTimeSeq(timeSeq, times, current, times.size());
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void saveCorrelator(const Contractor::CorrelatorResult &result, const std::string dir, 
 | 
				
			||||||
 | 
					                    const unsigned int dt, const unsigned int traj)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    std::string fileStem = "", us = "_", filename;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (unsigned int i = 0; i < result.contraction.terms.size() - 1; i++)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        fileStem += result.contraction.terms[i] + us + std::to_string(result.times[i]) + us;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fileStem += result.contraction.terms.back();
 | 
				
			||||||
 | 
					    if (!result.contraction.translationAverage)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        fileStem += "_dt_" + std::to_string(dt);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    filename = dir + "/" + RESULT_FILE_NAME(fileStem, traj);
 | 
				
			||||||
 | 
					    std::cout << "Saving correlator to '" << filename << "'" << std::endl;
 | 
				
			||||||
 | 
					    makeFileDir(dir);
 | 
				
			||||||
 | 
					    ResultWriter writer(filename);
 | 
				
			||||||
 | 
					    write(writer, fileStem, result);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
 | 
					std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -107,7 +148,6 @@ std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int
 | 
				
			|||||||
    std::smatch              sm;
 | 
					    std::smatch              sm;
 | 
				
			||||||
    std::vector<std::string> rstr = strToVec<std::string>(str);
 | 
					    std::vector<std::string> rstr = strToVec<std::string>(str);
 | 
				
			||||||
    std::set<unsigned int>   tSet;
 | 
					    std::set<unsigned int>   tSet;
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (auto &s: rstr)
 | 
					    for (auto &s: rstr)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
@@ -230,158 +270,184 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
        a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
 | 
					        a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // load data
 | 
					    // trajectory loop
 | 
				
			||||||
    for (unsigned int i = 0; i < a2aMat.size(); ++i)
 | 
					    for (unsigned int traj = par.global.trajCounter.start; 
 | 
				
			||||||
 | 
					         traj < par.global.trajCounter.end; traj += par.global.trajCounter.step)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        auto   &p = par.a2aMatrix[i];
 | 
					        std::cout << ":::::::: Trajectory " << traj << std::endl;
 | 
				
			||||||
        double t, size;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        std::cout << "======== Loading '" << p.file << "'" << std::endl;
 | 
					        // load data
 | 
				
			||||||
 | 
					        for (auto &p: par.a2aMatrix)
 | 
				
			||||||
        A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(p.file, p.dataset, par.global.nt);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        a2aIo.load(a2aMat.at(p.name), &t);
 | 
					 | 
				
			||||||
        std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6 
 | 
					 | 
				
			||||||
                  << " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // contract
 | 
					 | 
				
			||||||
    EigenDiskVector<ComplexD>::Matrix buf;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (auto &p: par.product)
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        std::vector<std::string>               term = strToVec<std::string>(p.terms);
 | 
					 | 
				
			||||||
        std::vector<std::set<unsigned int>>    times;
 | 
					 | 
				
			||||||
        std::vector<std::vector<unsigned int>> timeSeq;
 | 
					 | 
				
			||||||
        std::set<unsigned int>                 translations;
 | 
					 | 
				
			||||||
        std::vector<ComplexD>                  corr(par.global.nt);
 | 
					 | 
				
			||||||
        std::vector<A2AMatrixTr<ComplexD>>     lastTerm(par.global.nt);
 | 
					 | 
				
			||||||
        A2AMatrix<ComplexD>                    prod, buf, tmp;
 | 
					 | 
				
			||||||
        TimerArray                             tAr;
 | 
					 | 
				
			||||||
        double                                 fusec, busec, flops, bytes, tusec;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        tAr.startTimer("Total");
 | 
					 | 
				
			||||||
        std::cout << "======== Contraction tr(";
 | 
					 | 
				
			||||||
        for (unsigned int g = 0; g < term.size(); ++g)
 | 
					 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            std::cout << term[g] << ((g == term.size() - 1) ? ')' : '*');
 | 
					            std::string filename = RESULT_FILE_NAME(p.fileStem, traj);
 | 
				
			||||||
        }
 | 
					            double      t, size;
 | 
				
			||||||
        std::cout << std::endl;
 | 
					
 | 
				
			||||||
        if (term.size() != p.times.size() + 1)
 | 
					            std::cout << "======== Loading '" << filename << "'" << std::endl;
 | 
				
			||||||
        {
 | 
					
 | 
				
			||||||
            HADRONS_ERROR(Size, "number of terms (" + std::to_string(term.size()) 
 | 
					            A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
 | 
				
			||||||
                          + ") different from number of times (" 
 | 
					
 | 
				
			||||||
                          + std::to_string(p.times.size() + 1) + ")");
 | 
					            a2aIo.load(a2aMat.at(p.name), &t);
 | 
				
			||||||
        }
 | 
					            std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6 
 | 
				
			||||||
        for (auto &s: p.times)
 | 
					                    << " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
 | 
				
			||||||
        {
 | 
					 | 
				
			||||||
            times.push_back(parseTimeRange(s, par.global.nt));
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        translations = parseTimeRange(p.translations, par.global.nt);
 | 
					        // contract
 | 
				
			||||||
        makeTimeSeq(timeSeq, times);
 | 
					        EigenDiskVector<ComplexD>::Matrix buf;
 | 
				
			||||||
        std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
 | 
					 | 
				
			||||||
                  << timeSeq.size()*translations.size()*par.global.nt << " tr(A*B)"
 | 
					 | 
				
			||||||
                  << std::endl;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        std::cout << "* Caching transposed last term" << std::endl;
 | 
					        for (auto &p: par.product)
 | 
				
			||||||
        for (unsigned int t = 0; t < par.global.nt; ++t)
 | 
					 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            tAr.startTimer("Disk vector overhead");
 | 
					            std::vector<std::string>               term = strToVec<std::string>(p.terms);
 | 
				
			||||||
            const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
 | 
					            std::vector<std::set<unsigned int>>    times;
 | 
				
			||||||
            tAr.stopTimer("Disk vector overhead");
 | 
					            std::vector<std::vector<unsigned int>> timeSeq;
 | 
				
			||||||
 | 
					            std::set<unsigned int>                 translations;
 | 
				
			||||||
 | 
					            std::vector<A2AMatrixTr<ComplexD>>     lastTerm(par.global.nt);
 | 
				
			||||||
 | 
					            A2AMatrix<ComplexD>                    prod, buf, tmp;
 | 
				
			||||||
 | 
					            TimerArray                             tAr;
 | 
				
			||||||
 | 
					            double                                 fusec, busec, flops, bytes, tusec;
 | 
				
			||||||
 | 
					            Contractor::CorrelatorResult           result;             
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            tAr.startTimer("Transpose caching");
 | 
					            tAr.startTimer("Total");
 | 
				
			||||||
            lastTerm[t].resize(ref.rows(), ref.cols());
 | 
					            std::cout << "======== Contraction tr(";
 | 
				
			||||||
            parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
 | 
					            for (unsigned int g = 0; g < term.size(); ++g)
 | 
				
			||||||
            for (unsigned int i = 0; i < ref.rows(); ++i)
 | 
					 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                lastTerm[t](i, j) = ref(i, j);
 | 
					                std::cout << term[g] << ((g == term.size() - 1) ? ')' : '*');
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            tAr.stopTimer("Transpose caching");
 | 
					            std::cout << std::endl;
 | 
				
			||||||
        }
 | 
					            if (term.size() != p.times.size() + 1)
 | 
				
			||||||
        bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
 | 
					 | 
				
			||||||
        std::cout << Sec(tAr.getDTimer("Transpose caching")) << " " 
 | 
					 | 
				
			||||||
                  << Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
 | 
					 | 
				
			||||||
        std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
 | 
					 | 
				
			||||||
        for (unsigned int i = 0; i < timeSeq.size(); ++i)
 | 
					 | 
				
			||||||
        {
 | 
					 | 
				
			||||||
            unsigned int dti = 0;
 | 
					 | 
				
			||||||
            auto         &t = timeSeq[i];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
					 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                corr[tLast] = 0.;
 | 
					                HADRONS_ERROR(Size, "number of terms (" + std::to_string(term.size()) 
 | 
				
			||||||
 | 
					                            + ") different from number of times (" 
 | 
				
			||||||
 | 
					                            + std::to_string(p.times.size() + 1) + ")");
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            for (auto &dt: translations)
 | 
					            for (auto &s: p.times)
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                std::cout << "* Step " << i*translations.size() + dti + 1
 | 
					                times.push_back(parseTimeRange(s, par.global.nt));
 | 
				
			||||||
                          << "/" << timeSeq.size()*translations.size()
 | 
					            }
 | 
				
			||||||
                          << " -- positions= " << t << ", dt= " << dt << std::endl;
 | 
					            for (auto &m: par.a2aMatrix)
 | 
				
			||||||
                if (term.size() > 2)
 | 
					            {
 | 
				
			||||||
 | 
					                if (std::find(result.a2aMatrix.begin(), result.a2aMatrix.end(), m) == result.a2aMatrix.end())
 | 
				
			||||||
                {
 | 
					                {
 | 
				
			||||||
                    std::cout << std::setw(8) << "products";
 | 
					                    result.a2aMatrix.push_back(m);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                flops  = 0.;
 | 
					            }
 | 
				
			||||||
                bytes  = 0.;
 | 
					            result.contraction = p;
 | 
				
			||||||
                fusec  = tAr.getDTimer("A*B algebra");
 | 
					            result.correlator.resize(par.global.nt, 0.);
 | 
				
			||||||
                busec  = tAr.getDTimer("A*B total");
 | 
					
 | 
				
			||||||
                tAr.startTimer("Linear algebra");
 | 
					            translations = parseTimeRange(p.translations, par.global.nt);
 | 
				
			||||||
 | 
					            makeTimeSeq(timeSeq, times);
 | 
				
			||||||
 | 
					            std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
 | 
				
			||||||
 | 
					                    << timeSeq.size()*translations.size()*par.global.nt << " tr(A*B)"
 | 
				
			||||||
 | 
					                    << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            std::cout << "* Caching transposed last term" << std::endl;
 | 
				
			||||||
 | 
					            for (unsigned int t = 0; t < par.global.nt; ++t)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
                tAr.startTimer("Disk vector overhead");
 | 
					                tAr.startTimer("Disk vector overhead");
 | 
				
			||||||
                prod = a2aMat.at(term[0])[TIME_MOD(t[0] + dt)];
 | 
					                const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
 | 
				
			||||||
                tAr.stopTimer("Disk vector overhead");
 | 
					                tAr.stopTimer("Disk vector overhead");
 | 
				
			||||||
                for (unsigned int j = 1; j < term.size() - 1; ++j)
 | 
					
 | 
				
			||||||
 | 
					                tAr.startTimer("Transpose caching");
 | 
				
			||||||
 | 
					                lastTerm[t].resize(ref.rows(), ref.cols());
 | 
				
			||||||
 | 
					                parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
 | 
				
			||||||
 | 
					                for (unsigned int i = 0; i < ref.rows(); ++i)
 | 
				
			||||||
                {
 | 
					                {
 | 
				
			||||||
                    tAr.startTimer("Disk vector overhead");
 | 
					                    lastTerm[t](i, j) = ref(i, j);
 | 
				
			||||||
                    const A2AMatrix<ComplexD> &ref = a2aMat.at(term[j])[TIME_MOD(t[j] + dt)];
 | 
					 | 
				
			||||||
                    tAr.stopTimer("Disk vector overhead");
 | 
					 | 
				
			||||||
                    
 | 
					 | 
				
			||||||
                    tAr.startTimer("A*B total");
 | 
					 | 
				
			||||||
                    tAr.startTimer("A*B algebra");
 | 
					 | 
				
			||||||
                    A2AContraction::mul(tmp, prod, ref);
 | 
					 | 
				
			||||||
                    tAr.stopTimer("A*B algebra");
 | 
					 | 
				
			||||||
                    flops += A2AContraction::mulFlops(prod, ref);
 | 
					 | 
				
			||||||
                    prod   = tmp;
 | 
					 | 
				
			||||||
                    tAr.stopTimer("A*B total");
 | 
					 | 
				
			||||||
                    bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                if (term.size() > 2)
 | 
					                tAr.stopTimer("Transpose caching");
 | 
				
			||||||
                {
 | 
					            }
 | 
				
			||||||
                    std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
 | 
					            bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
 | 
				
			||||||
                              << Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " " 
 | 
					            std::cout << Sec(tAr.getDTimer("Transpose caching")) << " " 
 | 
				
			||||||
                              << Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
 | 
					                    << Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
 | 
				
			||||||
                }
 | 
					            std::cout << Sec(tAr.getDTimer("Disk vector overhead")) << std::endl;
 | 
				
			||||||
                std::cout << std::setw(8) << "traces";
 | 
					            for (unsigned int i = 0; i < timeSeq.size(); ++i)
 | 
				
			||||||
                flops  = 0.;
 | 
					            {
 | 
				
			||||||
                bytes  = 0.;
 | 
					                unsigned int dti = 0;
 | 
				
			||||||
                fusec  = tAr.getDTimer("tr(A*B)");
 | 
					                auto         &t = timeSeq[i];
 | 
				
			||||||
                busec  = tAr.getDTimer("tr(A*B)");
 | 
					
 | 
				
			||||||
 | 
					                result.times = t;
 | 
				
			||||||
                for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
					                for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
				
			||||||
                {
 | 
					                {
 | 
				
			||||||
                    tAr.startTimer("tr(A*B)");
 | 
					                    result.correlator[tLast] = 0.;
 | 
				
			||||||
                    A2AContraction::accTrMul(corr[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
 | 
					                }
 | 
				
			||||||
                    tAr.stopTimer("tr(A*B)");
 | 
					                for (auto &dt: translations)
 | 
				
			||||||
                    flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
 | 
					                {
 | 
				
			||||||
                    bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
 | 
					                    std::cout << "* Step " << i*translations.size() + dti + 1
 | 
				
			||||||
 | 
					                            << "/" << timeSeq.size()*translations.size()
 | 
				
			||||||
 | 
					                            << " -- positions= " << t << ", dt= " << dt << std::endl;
 | 
				
			||||||
 | 
					                    if (term.size() > 2)
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        std::cout << std::setw(8) << "products";
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    flops  = 0.;
 | 
				
			||||||
 | 
					                    bytes  = 0.;
 | 
				
			||||||
 | 
					                    fusec  = tAr.getDTimer("A*B algebra");
 | 
				
			||||||
 | 
					                    busec  = tAr.getDTimer("A*B total");
 | 
				
			||||||
 | 
					                    tAr.startTimer("Linear algebra");
 | 
				
			||||||
 | 
					                    tAr.startTimer("Disk vector overhead");
 | 
				
			||||||
 | 
					                    prod = a2aMat.at(term[0])[TIME_MOD(t[0] + dt)];
 | 
				
			||||||
 | 
					                    tAr.stopTimer("Disk vector overhead");
 | 
				
			||||||
 | 
					                    for (unsigned int j = 1; j < term.size() - 1; ++j)
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        tAr.startTimer("Disk vector overhead");
 | 
				
			||||||
 | 
					                        const A2AMatrix<ComplexD> &ref = a2aMat.at(term[j])[TIME_MOD(t[j] + dt)];
 | 
				
			||||||
 | 
					                        tAr.stopTimer("Disk vector overhead");
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					                        tAr.startTimer("A*B total");
 | 
				
			||||||
 | 
					                        tAr.startTimer("A*B algebra");
 | 
				
			||||||
 | 
					                        A2AContraction::mul(tmp, prod, ref);
 | 
				
			||||||
 | 
					                        tAr.stopTimer("A*B algebra");
 | 
				
			||||||
 | 
					                        flops += A2AContraction::mulFlops(prod, ref);
 | 
				
			||||||
 | 
					                        prod   = tmp;
 | 
				
			||||||
 | 
					                        tAr.stopTimer("A*B total");
 | 
				
			||||||
 | 
					                        bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    if (term.size() > 2)
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
 | 
				
			||||||
 | 
					                                << Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " " 
 | 
				
			||||||
 | 
					                                << Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    std::cout << std::setw(8) << "traces";
 | 
				
			||||||
 | 
					                    flops  = 0.;
 | 
				
			||||||
 | 
					                    bytes  = 0.;
 | 
				
			||||||
 | 
					                    fusec  = tAr.getDTimer("tr(A*B)");
 | 
				
			||||||
 | 
					                    busec  = tAr.getDTimer("tr(A*B)");
 | 
				
			||||||
 | 
					                    for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        tAr.startTimer("tr(A*B)");
 | 
				
			||||||
 | 
					                        A2AContraction::accTrMul(result.correlator[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
 | 
				
			||||||
 | 
					                        tAr.stopTimer("tr(A*B)");
 | 
				
			||||||
 | 
					                        flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
 | 
				
			||||||
 | 
					                        bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    tAr.stopTimer("Linear algebra");
 | 
				
			||||||
 | 
					                    std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
 | 
				
			||||||
 | 
					                            << Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " " 
 | 
				
			||||||
 | 
					                            << Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
 | 
				
			||||||
 | 
					                    if (!p.translationAverage)
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        saveCorrelator(result, par.global.output, dt, traj);
 | 
				
			||||||
 | 
					                        for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
				
			||||||
 | 
					                        {
 | 
				
			||||||
 | 
					                            result.correlator[tLast] = 0.;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    dti++;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                if (p.translationAverage)
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        result.correlator[tLast] /= translations.size();
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    saveCorrelator(result, par.global.output, 0, traj);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                tAr.stopTimer("Linear algebra");
 | 
					 | 
				
			||||||
                std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
 | 
					 | 
				
			||||||
                          << Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " " 
 | 
					 | 
				
			||||||
                          << Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
 | 
					 | 
				
			||||||
                dti++;
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
					 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
                corr[tLast] /= translations.size();
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
 | 
					 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
                std::cout << tLast << " " << corr[tLast] << std::endl;
 | 
					 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					            tAr.stopTimer("Total");
 | 
				
			||||||
 | 
					            printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        tAr.stopTimer("Total");
 | 
					 | 
				
			||||||
        printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					    return EXIT_SUCCESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user