From 0887566134b7cd7b1a4fb3af69180c0dd9dbed91 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 13 Dec 2017 16:36:15 +0000 Subject: [PATCH] Hadrons: scheduler back! --- extras/Hadrons/Application.cc | 110 +++-------------------- extras/Hadrons/Application.hpp | 31 +++---- extras/Hadrons/GeneticScheduler.hpp | 66 +++++++------- extras/Hadrons/VirtualMachine.cc | 134 +++++++++++++++++++++++----- extras/Hadrons/VirtualMachine.hpp | 34 ++++--- 5 files changed, 189 insertions(+), 186 deletions(-) diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc index 24618447..9a3366d4 100644 --- a/extras/Hadrons/Application.cc +++ b/extras/Hadrons/Application.cc @@ -94,10 +94,7 @@ void Application::run(void) } vm().printContent(); env().printContent(); - if (!scheduled_) - { - schedule(); - } + schedule(); printSchedule(); configLoop(); } @@ -122,11 +119,13 @@ void Application::parseParameterFile(const std::string parameterFileName) setPar(par); if (!push(reader, "modules")) { - HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '" + parameterFileName + "'"); + HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '" + + parameterFileName + "'"); } if (!push(reader, "module")) { - HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '" + parameterFileName + "'"); + HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '" + + parameterFileName + "'"); } do { @@ -160,98 +159,13 @@ void Application::saveParameterFile(const std::string parameterFileName) } // schedule computation //////////////////////////////////////////////////////// -#define MEM_MSG(size)\ -sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)" - -#define DEFINE_MEMPEAK \ -GeneticScheduler::ObjFunc memPeak = \ -[this](const std::vector &program)\ -{\ - unsigned int memPeak;\ - bool msg;\ - \ - msg = HadronsLogMessage.isActive();\ - HadronsLogMessage.Active(false);\ - vm().dryRun(true);\ - memPeak = vm().executeProgram(program);\ - vm().dryRun(false);\ - env().freeAll();\ - HadronsLogMessage.Active(msg);\ - \ - return memPeak;\ -} - void Application::schedule(void) { - //DEFINE_MEMPEAK; - - // build module dependency graph - LOG(Message) << "Building module graph..." << std::endl; - auto graph = vm().getModuleGraph(); - LOG(Debug) << "Module graph:" << std::endl; - LOG(Debug) << graph << std::endl; - auto con = graph.getConnectedComponents(); - - // constrained topological sort using a genetic algorithm - // LOG(Message) << "Scheduling computation..." << std::endl; - // LOG(Message) << " #module= " << graph.size() << std::endl; - // LOG(Message) << " population size= " << par_.genetic.popSize << std::endl; - // LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl; - // LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl; - // LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl; - - // unsigned int k = 0, gen, prevPeak, nCstPeak = 0; - // std::random_device rd; - // GeneticScheduler::Parameters par; - - // par.popSize = par_.genetic.popSize; - // par.mutationRate = par_.genetic.mutationRate; - // par.seed = rd(); - // memPeak_ = 0; - // CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed)); - for (unsigned int i = 0; i < con.size(); ++i) + if (!scheduled_ and !loadedSchedule_) { - // GeneticScheduler scheduler(con[i], memPeak, par); - - // gen = 0; - // do - // { - // LOG(Debug) << "Generation " << gen << ":" << std::endl; - // scheduler.nextGeneration(); - // if (gen != 0) - // { - // if (prevPeak == scheduler.getMinValue()) - // { - // nCstPeak++; - // } - // else - // { - // nCstPeak = 0; - // } - // } - - // prevPeak = scheduler.getMinValue(); - // if (gen % 10 == 0) - // { - // LOG(Iterative) << "Generation " << gen << ": " - // << MEM_MSG(scheduler.getMinValue()) << std::endl; - // } - - // gen++; - // } while ((gen < par_.genetic.maxGen) - // and (nCstPeak < par_.genetic.maxCstGen)); - // auto &t = scheduler.getMinSchedule(); - // if (scheduler.getMinValue() > memPeak_) - // { - // memPeak_ = scheduler.getMinValue(); - // } - auto t = con[i].topoSort(); - for (unsigned int j = 0; j < t.size(); ++j) - { - program_.push_back(t[j]); - } + program_ = vm().schedule(par_.genetic); + scheduled_ = true; } - scheduled_ = true; } void Application::saveSchedule(const std::string filename) @@ -274,8 +188,6 @@ void Application::saveSchedule(const std::string filename) void Application::loadSchedule(const std::string filename) { - //DEFINE_MEMPEAK; - TextReader reader(filename); std::vector program; @@ -287,8 +199,7 @@ void Application::loadSchedule(const std::string filename) { program_.push_back(vm().getModuleAddress(name)); } - scheduled_ = true; - //memPeak_ = memPeak(program_); + loadedSchedule_ = true; } void Application::printSchedule(void) @@ -297,7 +208,8 @@ void Application::printSchedule(void) { HADRON_ERROR(Definition, "Computation not scheduled"); } - LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):" + auto peak = vm().memoryNeeded(program_); + LOG(Message) << "Schedule (memory needed: " << sizeString(peak) << "):" << std::endl; for (unsigned int i = 0; i < program_.size(); ++i) { diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp index 8d2537d0..4b2ce77b 100644 --- a/extras/Hadrons/Application.hpp +++ b/extras/Hadrons/Application.hpp @@ -50,25 +50,13 @@ public: unsigned int, end, unsigned int, step); }; - class GeneticPar: Serializable - { - public: - GeneticPar(void): - popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {}; - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar, - unsigned int, popSize, - unsigned int, maxGen, - unsigned int, maxCstGen, - double , mutationRate); - }; class GlobalPar: Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar, - TrajRange, trajCounter, - GeneticPar, genetic, - std::string, seed); + TrajRange, trajCounter, + VirtualMachine::GeneticPar, genetic, + std::string, seed); }; public: // constructors @@ -103,12 +91,11 @@ private: // virtual machine shortcut DEFINE_VM_ALIAS; private: - long unsigned int locVol_; - std::string parameterFileName_{""}; - GlobalPar par_; - std::vector program_; - Environment::Size memPeak_; - bool scheduled_{false}; + long unsigned int locVol_; + std::string parameterFileName_{""}; + GlobalPar par_; + VirtualMachine::Program program_; + bool scheduled_{false}, loadedSchedule_{false}; }; /****************************************************************************** @@ -119,6 +106,7 @@ template void Application::createModule(const std::string name) { vm().createModule(name); + scheduled_ = false; } template @@ -126,6 +114,7 @@ void Application::createModule(const std::string name, const typename M::Par &par) { vm().createModule(name, par); + scheduled_ = false; } END_HADRONS_NAMESPACE diff --git a/extras/Hadrons/GeneticScheduler.hpp b/extras/Hadrons/GeneticScheduler.hpp index 3b0195e7..f199f1ed 100644 --- a/extras/Hadrons/GeneticScheduler.hpp +++ b/extras/Hadrons/GeneticScheduler.hpp @@ -38,13 +38,13 @@ BEGIN_HADRONS_NAMESPACE /****************************************************************************** * Scheduler based on a genetic algorithm * ******************************************************************************/ -template +template class GeneticScheduler { public: - typedef std::vector Gene; - typedef std::pair GenePair; - typedef std::function ObjFunc; + typedef std::vector Gene; + typedef std::pair GenePair; + typedef std::function ObjFunc; struct Parameters { double mutationRate; @@ -65,7 +65,7 @@ public: void benchmarkCrossover(const unsigned int nIt); // print population friend std::ostream & operator<<(std::ostream &out, - const GeneticScheduler &s) + const GeneticScheduler &s) { out << "["; for (auto &p: s.population_) @@ -87,19 +87,19 @@ private: void mutation(Gene &m, const Gene &c); private: - Graph &graph_; - const ObjFunc &func_; - const Parameters par_; - std::multimap population_; - std::mt19937 gen_; + Graph &graph_; + const ObjFunc &func_; + const Parameters par_; + std::multimap population_; + std::mt19937 gen_; }; /****************************************************************************** * template implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// -template -GeneticScheduler::GeneticScheduler(Graph &graph, const ObjFunc &func, +template +GeneticScheduler::GeneticScheduler(Graph &graph, const ObjFunc &func, const Parameters &par) : graph_(graph) , func_(func) @@ -109,22 +109,22 @@ GeneticScheduler::GeneticScheduler(Graph &graph, const ObjFunc &func, } // access ////////////////////////////////////////////////////////////////////// -template -const typename GeneticScheduler::Gene & -GeneticScheduler::getMinSchedule(void) +template +const typename GeneticScheduler::Gene & +GeneticScheduler::getMinSchedule(void) { return population_.begin()->second; } -template -int GeneticScheduler::getMinValue(void) +template +int GeneticScheduler::getMinValue(void) { return population_.begin()->first; } // breed a new generation ////////////////////////////////////////////////////// -template -void GeneticScheduler::nextGeneration(void) +template +void GeneticScheduler::nextGeneration(void) { // random initialization of the population if necessary if (population_.size() != par_.popSize) @@ -158,8 +158,8 @@ void GeneticScheduler::nextGeneration(void) } // evolution steps ///////////////////////////////////////////////////////////// -template -void GeneticScheduler::initPopulation(void) +template +void GeneticScheduler::initPopulation(void) { population_.clear(); for (unsigned int i = 0; i < par_.popSize; ++i) @@ -170,8 +170,8 @@ void GeneticScheduler::initPopulation(void) } } -template -void GeneticScheduler::doCrossover(void) +template +void GeneticScheduler::doCrossover(void) { auto p = selectPair(); Gene &p1 = *(p.first), &p2 = *(p.second); @@ -185,8 +185,8 @@ void GeneticScheduler::doCrossover(void) } } -template -void GeneticScheduler::doMutation(void) +template +void GeneticScheduler::doMutation(void) { std::uniform_real_distribution mdis(0., 1.); std::uniform_int_distribution pdis(0, population_.size() - 1); @@ -206,8 +206,8 @@ void GeneticScheduler::doMutation(void) } // genetic operators /////////////////////////////////////////////////////////// -template -typename GeneticScheduler::GenePair GeneticScheduler::selectPair(void) +template +typename GeneticScheduler::GenePair GeneticScheduler::selectPair(void) { std::vector prob; unsigned int ind; @@ -233,8 +233,8 @@ typename GeneticScheduler::GenePair GeneticScheduler::selectPair(void) return std::make_pair(p1, p2); } -template -void GeneticScheduler::crossover(Gene &c1, Gene &c2, const Gene &p1, +template +void GeneticScheduler::crossover(Gene &c1, Gene &c2, const Gene &p1, const Gene &p2) { Gene buf; @@ -268,8 +268,8 @@ void GeneticScheduler::crossover(Gene &c1, Gene &c2, const Gene &p1, } } -template -void GeneticScheduler::mutation(Gene &m, const Gene &c) +template +void GeneticScheduler::mutation(Gene &m, const Gene &c) { Gene buf; std::uniform_int_distribution dis(0, c.size() - 1); @@ -298,8 +298,8 @@ void GeneticScheduler::mutation(Gene &m, const Gene &c) } } -template -void GeneticScheduler::benchmarkCrossover(const unsigned int nIt) +template +void GeneticScheduler::benchmarkCrossover(const unsigned int nIt) { Gene p1, p2, c1, c2; double neg = 0., eq = 0., pos = 0., total; diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index 8667a51c..8a6bd149 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -27,6 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution directo /* END LEGAL */ #include +#include #include using namespace Grid; @@ -133,6 +134,8 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } } } + graphOutdated_ = true; + memoryProfileOutdated_ = true; } else { @@ -364,6 +367,7 @@ void VirtualMachine::updateProfile(const unsigned int address) if (env().hasCreatedObject(a) and (profile_.object[a].module == -1)) { profile_.object[a].size = env().getObjectSize(a); + profile_.object[a].storage = env().getObjectStorage(a); profile_.object[a].module = address; profile_.module[address][a] = profile_.object[a].size; if (env().getObjectModule(a) < 0) @@ -419,37 +423,130 @@ void VirtualMachine::memoryProfile(const std::string name) } // garbage collector /////////////////////////////////////////////////////////// -VirtualMachine::GarbageSchedule -VirtualMachine::makeGarbageSchedule(const std::vector &p) const +VirtualMachine::GarbageSchedule +VirtualMachine::makeGarbageSchedule(const Program &p) const { GarbageSchedule freeProg; freeProg.resize(p.size()); - for (unsigned int i = 0; i < env().getMaxAddress(); ++i) + for (unsigned int a = 0; a < env().getMaxAddress(); ++a) { - auto pred = [i, this](const unsigned int j) + if (env().getObjectStorage(a) == Environment::Storage::temporary) { - auto &in = module_[j].input; - auto it = std::find(in.begin(), in.end(), i); - - return (it != in.end()) or (j == env().getObjectModule(i)); - }; - auto it = std::find_if(p.rbegin(), p.rend(), pred); - if (it != p.rend()) + auto it = std::find(p.begin(), p.end(), env().getObjectModule(a)); + + if (it != p.end()) + { + freeProg[std::distance(p.begin(), it)].insert(a); + } + } + else if (env().getObjectStorage(a) == Environment::Storage::object) { - freeProg[std::distance(it, p.rend()) - 1].insert(i); + auto pred = [a, this](const unsigned int b) + { + auto &in = module_[b].input; + auto it = std::find(in.begin(), in.end(), a); + + return (it != in.end()) or (b == env().getObjectModule(a)); + }; + auto it = std::find_if(p.rbegin(), p.rend(), pred); + if (it != p.rend()) + { + freeProg[std::distance(it, p.rend()) - 1].insert(a); + } } } return freeProg; } +// high-water memory function ////////////////////////////////////////////////// +VirtualMachine::Size VirtualMachine::memoryNeeded(const Program &p) +{ + const MemoryProfile &profile = getMemoryProfile(); + GarbageSchedule freep = makeGarbageSchedule(p); + Size current = 0, max = 0; + + for (unsigned int i = 0; i < p.size(); ++i) + { + for (auto &o: profile.module[p[i]]) + { + current += o.second; + } + max = std::max(current, max); + for (auto &o: freep[i]) + { + current -= profile.object[o].size; + } + } + + return max; +} + +// genetic scheduler /////////////////////////////////////////////////////////// +VirtualMachine::Program VirtualMachine::schedule(const GeneticPar &par) +{ + typedef GeneticScheduler Scheduler; + + auto graph = getModuleGraph(); + + //constrained topological sort using a genetic algorithm + LOG(Message) << "Scheduling computation..." << std::endl; + LOG(Message) << " #module= " << graph.size() << std::endl; + LOG(Message) << " population size= " << par.popSize << std::endl; + LOG(Message) << " max. generation= " << par.maxGen << std::endl; + LOG(Message) << " max. cst. generation= " << par.maxCstGen << std::endl; + LOG(Message) << " mutation rate= " << par.mutationRate << std::endl; + + unsigned int k = 0, gen, prevPeak, nCstPeak = 0; + std::random_device rd; + Scheduler::Parameters gpar; + + gpar.popSize = par.popSize; + gpar.mutationRate = par.mutationRate; + gpar.seed = rd(); + CartesianCommunicator::BroadcastWorld(0, &(gpar.seed), sizeof(gpar.seed)); + Scheduler::ObjFunc memPeak = [this](const Program &p)->Size + { + return memoryNeeded(p); + }; + Scheduler scheduler(graph, memPeak, gpar); + gen = 0; + do + { + LOG(Debug) << "Generation " << gen << ":" << std::endl; + scheduler.nextGeneration(); + if (gen != 0) + { + if (prevPeak == scheduler.getMinValue()) + { + nCstPeak++; + } + else + { + nCstPeak = 0; + } + } + + prevPeak = scheduler.getMinValue(); + if (gen % 10 == 0) + { + LOG(Iterative) << "Generation " << gen << ": " + << sizeString(scheduler.getMinValue()) << std::endl; + } + + gen++; + } while ((gen < par.maxGen) and (nCstPeak < par.maxCstGen)); + + return scheduler.getMinSchedule(); +} + // general execution /////////////////////////////////////////////////////////// #define BIG_SEP "===============" #define SEP "---------------" #define MEM_MSG(size) sizeString(size) -void VirtualMachine::executeProgram(const std::vector &p) const +void VirtualMachine::executeProgram(const Program &p) const { Size memPeak = 0, sizeBefore, sizeAfter; GarbageSchedule freeProg; @@ -481,15 +578,6 @@ void VirtualMachine::executeProgram(const std::vector &p) const { env().freeObject(j); } - // free temporaries - for (unsigned int i = 0; i < env().getMaxAddress(); ++i) - { - if ((env().getObjectStorage(i) == Environment::Storage::temporary) - and env().hasCreatedObject(i)) - { - env().freeObject(i); - } - } // print used memory after garbage collection if necessary sizeAfter = env().getTotalSize(); if (sizeBefore != sizeAfter) @@ -506,7 +594,7 @@ void VirtualMachine::executeProgram(const std::vector &p) const void VirtualMachine::executeProgram(const std::vector &p) const { - std::vector pAddress; + Program pAddress; for (auto &n: p) { diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index 88e70b55..a411c108 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -51,19 +51,33 @@ class VirtualMachine { SINGLETON_DEFCTOR(VirtualMachine); public: - typedef SITE_SIZE_TYPE Size; - typedef std::unique_ptr ModPt; - typedef std::vector> GarbageSchedule; + typedef SITE_SIZE_TYPE Size; + typedef std::unique_ptr ModPt; + typedef std::vector> GarbageSchedule; + typedef std::vector Program; struct MemoryPrint { - Size size; - unsigned int module; + Size size; + Environment::Storage storage; + unsigned int module; }; struct MemoryProfile { std::vector> module; std::vector object; }; + class GeneticPar: Serializable + { + public: + GeneticPar(void): + popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {}; + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar, + unsigned int, popSize, + unsigned int, maxGen, + unsigned int, maxCstGen, + double , mutationRate); + }; private: struct ModuleInfo { @@ -109,13 +123,13 @@ public: // memory profile const MemoryProfile &getMemoryProfile(void); // garbage collector - GarbageSchedule makeGarbageSchedule(const std::vector &p) const; + GarbageSchedule makeGarbageSchedule(const Program &p) const; // high-water memory function - Size memoryNeeded(const std::vector &p, - const GarbageSchedule &g); - Size memoryNeeded(const std::vector &p); + Size memoryNeeded(const Program &p); + // genetic scheduler + Program schedule(const GeneticPar &par); // general execution - void executeProgram(const std::vector &p) const; + void executeProgram(const Program &p) const; void executeProgram(const std::vector &p) const; private: // environment shortcut