1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Hadrons: scheduler back!

This commit is contained in:
Antonin Portelli 2017-12-13 16:36:15 +00:00
parent 61fc50d616
commit 0887566134
5 changed files with 189 additions and 186 deletions

View File

@ -94,10 +94,7 @@ void Application::run(void)
}
vm().printContent();
env().printContent();
if (!scheduled_)
{
schedule();
}
schedule();
printSchedule();
configLoop();
}
@ -122,11 +119,13 @@ void Application::parseParameterFile(const std::string parameterFileName)
setPar(par);
if (!push(reader, "modules"))
{
HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '" + parameterFileName + "'");
HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '"
+ parameterFileName + "'");
}
if (!push(reader, "module"))
{
HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '" + parameterFileName + "'");
HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '"
+ parameterFileName + "'");
}
do
{
@ -160,98 +159,13 @@ void Application::saveParameterFile(const std::string parameterFileName)
}
// schedule computation ////////////////////////////////////////////////////////
#define MEM_MSG(size)\
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \
GeneticScheduler<unsigned int>::ObjFunc memPeak = \
[this](const std::vector<unsigned int> &program)\
{\
unsigned int memPeak;\
bool msg;\
\
msg = HadronsLogMessage.isActive();\
HadronsLogMessage.Active(false);\
vm().dryRun(true);\
memPeak = vm().executeProgram(program);\
vm().dryRun(false);\
env().freeAll();\
HadronsLogMessage.Active(msg);\
\
return memPeak;\
}
void Application::schedule(void)
{
//DEFINE_MEMPEAK;
// build module dependency graph
LOG(Message) << "Building module graph..." << std::endl;
auto graph = vm().getModuleGraph();
LOG(Debug) << "Module graph:" << std::endl;
LOG(Debug) << graph << std::endl;
auto con = graph.getConnectedComponents();
// constrained topological sort using a genetic algorithm
// LOG(Message) << "Scheduling computation..." << std::endl;
// LOG(Message) << " #module= " << graph.size() << std::endl;
// LOG(Message) << " population size= " << par_.genetic.popSize << std::endl;
// LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl;
// LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl;
// LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl;
// unsigned int k = 0, gen, prevPeak, nCstPeak = 0;
// std::random_device rd;
// GeneticScheduler<unsigned int>::Parameters par;
// par.popSize = par_.genetic.popSize;
// par.mutationRate = par_.genetic.mutationRate;
// par.seed = rd();
// memPeak_ = 0;
// CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed));
for (unsigned int i = 0; i < con.size(); ++i)
if (!scheduled_ and !loadedSchedule_)
{
// GeneticScheduler<unsigned int> scheduler(con[i], memPeak, par);
// gen = 0;
// do
// {
// LOG(Debug) << "Generation " << gen << ":" << std::endl;
// scheduler.nextGeneration();
// if (gen != 0)
// {
// if (prevPeak == scheduler.getMinValue())
// {
// nCstPeak++;
// }
// else
// {
// nCstPeak = 0;
// }
// }
// prevPeak = scheduler.getMinValue();
// if (gen % 10 == 0)
// {
// LOG(Iterative) << "Generation " << gen << ": "
// << MEM_MSG(scheduler.getMinValue()) << std::endl;
// }
// gen++;
// } while ((gen < par_.genetic.maxGen)
// and (nCstPeak < par_.genetic.maxCstGen));
// auto &t = scheduler.getMinSchedule();
// if (scheduler.getMinValue() > memPeak_)
// {
// memPeak_ = scheduler.getMinValue();
// }
auto t = con[i].topoSort();
for (unsigned int j = 0; j < t.size(); ++j)
{
program_.push_back(t[j]);
}
program_ = vm().schedule(par_.genetic);
scheduled_ = true;
}
scheduled_ = true;
}
void Application::saveSchedule(const std::string filename)
@ -274,8 +188,6 @@ void Application::saveSchedule(const std::string filename)
void Application::loadSchedule(const std::string filename)
{
//DEFINE_MEMPEAK;
TextReader reader(filename);
std::vector<std::string> program;
@ -287,8 +199,7 @@ void Application::loadSchedule(const std::string filename)
{
program_.push_back(vm().getModuleAddress(name));
}
scheduled_ = true;
//memPeak_ = memPeak(program_);
loadedSchedule_ = true;
}
void Application::printSchedule(void)
@ -297,7 +208,8 @@ void Application::printSchedule(void)
{
HADRON_ERROR(Definition, "Computation not scheduled");
}
LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):"
auto peak = vm().memoryNeeded(program_);
LOG(Message) << "Schedule (memory needed: " << sizeString(peak) << "):"
<< std::endl;
for (unsigned int i = 0; i < program_.size(); ++i)
{

View File

@ -50,25 +50,13 @@ public:
unsigned int, end,
unsigned int, step);
};
class GeneticPar: Serializable
{
public:
GeneticPar(void):
popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {};
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar,
unsigned int, popSize,
unsigned int, maxGen,
unsigned int, maxCstGen,
double , mutationRate);
};
class GlobalPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
TrajRange, trajCounter,
GeneticPar, genetic,
std::string, seed);
TrajRange, trajCounter,
VirtualMachine::GeneticPar, genetic,
std::string, seed);
};
public:
// constructors
@ -103,12 +91,11 @@ private:
// virtual machine shortcut
DEFINE_VM_ALIAS;
private:
long unsigned int locVol_;
std::string parameterFileName_{""};
GlobalPar par_;
std::vector<unsigned int> program_;
Environment::Size memPeak_;
bool scheduled_{false};
long unsigned int locVol_;
std::string parameterFileName_{""};
GlobalPar par_;
VirtualMachine::Program program_;
bool scheduled_{false}, loadedSchedule_{false};
};
/******************************************************************************
@ -119,6 +106,7 @@ template <typename M>
void Application::createModule(const std::string name)
{
vm().createModule<M>(name);
scheduled_ = false;
}
template <typename M>
@ -126,6 +114,7 @@ void Application::createModule(const std::string name,
const typename M::Par &par)
{
vm().createModule<M>(name, par);
scheduled_ = false;
}
END_HADRONS_NAMESPACE

View File

@ -38,13 +38,13 @@ BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Scheduler based on a genetic algorithm *
******************************************************************************/
template <typename T>
template <typename V, typename T>
class GeneticScheduler
{
public:
typedef std::vector<T> Gene;
typedef std::pair<Gene *, Gene *> GenePair;
typedef std::function<int(const Gene &)> ObjFunc;
typedef std::vector<T> Gene;
typedef std::pair<Gene *, Gene *> GenePair;
typedef std::function<V(const Gene &)> ObjFunc;
struct Parameters
{
double mutationRate;
@ -65,7 +65,7 @@ public:
void benchmarkCrossover(const unsigned int nIt);
// print population
friend std::ostream & operator<<(std::ostream &out,
const GeneticScheduler<T> &s)
const GeneticScheduler<V, T> &s)
{
out << "[";
for (auto &p: s.population_)
@ -87,19 +87,19 @@ private:
void mutation(Gene &m, const Gene &c);
private:
Graph<T> &graph_;
const ObjFunc &func_;
const Parameters par_;
std::multimap<int, Gene> population_;
std::mt19937 gen_;
Graph<T> &graph_;
const ObjFunc &func_;
const Parameters par_;
std::multimap<V, Gene> population_;
std::mt19937 gen_;
};
/******************************************************************************
* template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename T>
GeneticScheduler<T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
template <typename V, typename T>
GeneticScheduler<V, T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
const Parameters &par)
: graph_(graph)
, func_(func)
@ -109,22 +109,22 @@ GeneticScheduler<T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
}
// access //////////////////////////////////////////////////////////////////////
template <typename T>
const typename GeneticScheduler<T>::Gene &
GeneticScheduler<T>::getMinSchedule(void)
template <typename V, typename T>
const typename GeneticScheduler<V, T>::Gene &
GeneticScheduler<V, T>::getMinSchedule(void)
{
return population_.begin()->second;
}
template <typename T>
int GeneticScheduler<T>::getMinValue(void)
template <typename V, typename T>
int GeneticScheduler<V, T>::getMinValue(void)
{
return population_.begin()->first;
}
// breed a new generation //////////////////////////////////////////////////////
template <typename T>
void GeneticScheduler<T>::nextGeneration(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::nextGeneration(void)
{
// random initialization of the population if necessary
if (population_.size() != par_.popSize)
@ -158,8 +158,8 @@ void GeneticScheduler<T>::nextGeneration(void)
}
// evolution steps /////////////////////////////////////////////////////////////
template <typename T>
void GeneticScheduler<T>::initPopulation(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::initPopulation(void)
{
population_.clear();
for (unsigned int i = 0; i < par_.popSize; ++i)
@ -170,8 +170,8 @@ void GeneticScheduler<T>::initPopulation(void)
}
}
template <typename T>
void GeneticScheduler<T>::doCrossover(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::doCrossover(void)
{
auto p = selectPair();
Gene &p1 = *(p.first), &p2 = *(p.second);
@ -185,8 +185,8 @@ void GeneticScheduler<T>::doCrossover(void)
}
}
template <typename T>
void GeneticScheduler<T>::doMutation(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::doMutation(void)
{
std::uniform_real_distribution<double> mdis(0., 1.);
std::uniform_int_distribution<unsigned int> pdis(0, population_.size() - 1);
@ -206,8 +206,8 @@ void GeneticScheduler<T>::doMutation(void)
}
// genetic operators ///////////////////////////////////////////////////////////
template <typename T>
typename GeneticScheduler<T>::GenePair GeneticScheduler<T>::selectPair(void)
template <typename V, typename T>
typename GeneticScheduler<V, T>::GenePair GeneticScheduler<V, T>::selectPair(void)
{
std::vector<double> prob;
unsigned int ind;
@ -233,8 +233,8 @@ typename GeneticScheduler<T>::GenePair GeneticScheduler<T>::selectPair(void)
return std::make_pair(p1, p2);
}
template <typename T>
void GeneticScheduler<T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
template <typename V, typename T>
void GeneticScheduler<V, T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
const Gene &p2)
{
Gene buf;
@ -268,8 +268,8 @@ void GeneticScheduler<T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
}
}
template <typename T>
void GeneticScheduler<T>::mutation(Gene &m, const Gene &c)
template <typename V, typename T>
void GeneticScheduler<V, T>::mutation(Gene &m, const Gene &c)
{
Gene buf;
std::uniform_int_distribution<unsigned int> dis(0, c.size() - 1);
@ -298,8 +298,8 @@ void GeneticScheduler<T>::mutation(Gene &m, const Gene &c)
}
}
template <typename T>
void GeneticScheduler<T>::benchmarkCrossover(const unsigned int nIt)
template <typename V, typename T>
void GeneticScheduler<V, T>::benchmarkCrossover(const unsigned int nIt)
{
Gene p1, p2, c1, c2;
double neg = 0., eq = 0., pos = 0., total;

View File

@ -27,6 +27,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
/* END LEGAL */
#include <Grid/Hadrons/VirtualMachine.hpp>
#include <Grid/Hadrons/GeneticScheduler.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
using namespace Grid;
@ -133,6 +134,8 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt)
}
}
}
graphOutdated_ = true;
memoryProfileOutdated_ = true;
}
else
{
@ -364,6 +367,7 @@ void VirtualMachine::updateProfile(const unsigned int address)
if (env().hasCreatedObject(a) and (profile_.object[a].module == -1))
{
profile_.object[a].size = env().getObjectSize(a);
profile_.object[a].storage = env().getObjectStorage(a);
profile_.object[a].module = address;
profile_.module[address][a] = profile_.object[a].size;
if (env().getObjectModule(a) < 0)
@ -419,37 +423,130 @@ void VirtualMachine::memoryProfile(const std::string name)
}
// garbage collector ///////////////////////////////////////////////////////////
VirtualMachine::GarbageSchedule
VirtualMachine::makeGarbageSchedule(const std::vector<unsigned int> &p) const
VirtualMachine::GarbageSchedule
VirtualMachine::makeGarbageSchedule(const Program &p) const
{
GarbageSchedule freeProg;
freeProg.resize(p.size());
for (unsigned int i = 0; i < env().getMaxAddress(); ++i)
for (unsigned int a = 0; a < env().getMaxAddress(); ++a)
{
auto pred = [i, this](const unsigned int j)
if (env().getObjectStorage(a) == Environment::Storage::temporary)
{
auto &in = module_[j].input;
auto it = std::find(in.begin(), in.end(), i);
return (it != in.end()) or (j == env().getObjectModule(i));
};
auto it = std::find_if(p.rbegin(), p.rend(), pred);
if (it != p.rend())
auto it = std::find(p.begin(), p.end(), env().getObjectModule(a));
if (it != p.end())
{
freeProg[std::distance(p.begin(), it)].insert(a);
}
}
else if (env().getObjectStorage(a) == Environment::Storage::object)
{
freeProg[std::distance(it, p.rend()) - 1].insert(i);
auto pred = [a, this](const unsigned int b)
{
auto &in = module_[b].input;
auto it = std::find(in.begin(), in.end(), a);
return (it != in.end()) or (b == env().getObjectModule(a));
};
auto it = std::find_if(p.rbegin(), p.rend(), pred);
if (it != p.rend())
{
freeProg[std::distance(it, p.rend()) - 1].insert(a);
}
}
}
return freeProg;
}
// high-water memory function //////////////////////////////////////////////////
VirtualMachine::Size VirtualMachine::memoryNeeded(const Program &p)
{
const MemoryProfile &profile = getMemoryProfile();
GarbageSchedule freep = makeGarbageSchedule(p);
Size current = 0, max = 0;
for (unsigned int i = 0; i < p.size(); ++i)
{
for (auto &o: profile.module[p[i]])
{
current += o.second;
}
max = std::max(current, max);
for (auto &o: freep[i])
{
current -= profile.object[o].size;
}
}
return max;
}
// genetic scheduler ///////////////////////////////////////////////////////////
VirtualMachine::Program VirtualMachine::schedule(const GeneticPar &par)
{
typedef GeneticScheduler<Size, unsigned int> Scheduler;
auto graph = getModuleGraph();
//constrained topological sort using a genetic algorithm
LOG(Message) << "Scheduling computation..." << std::endl;
LOG(Message) << " #module= " << graph.size() << std::endl;
LOG(Message) << " population size= " << par.popSize << std::endl;
LOG(Message) << " max. generation= " << par.maxGen << std::endl;
LOG(Message) << " max. cst. generation= " << par.maxCstGen << std::endl;
LOG(Message) << " mutation rate= " << par.mutationRate << std::endl;
unsigned int k = 0, gen, prevPeak, nCstPeak = 0;
std::random_device rd;
Scheduler::Parameters gpar;
gpar.popSize = par.popSize;
gpar.mutationRate = par.mutationRate;
gpar.seed = rd();
CartesianCommunicator::BroadcastWorld(0, &(gpar.seed), sizeof(gpar.seed));
Scheduler::ObjFunc memPeak = [this](const Program &p)->Size
{
return memoryNeeded(p);
};
Scheduler scheduler(graph, memPeak, gpar);
gen = 0;
do
{
LOG(Debug) << "Generation " << gen << ":" << std::endl;
scheduler.nextGeneration();
if (gen != 0)
{
if (prevPeak == scheduler.getMinValue())
{
nCstPeak++;
}
else
{
nCstPeak = 0;
}
}
prevPeak = scheduler.getMinValue();
if (gen % 10 == 0)
{
LOG(Iterative) << "Generation " << gen << ": "
<< sizeString(scheduler.getMinValue()) << std::endl;
}
gen++;
} while ((gen < par.maxGen) and (nCstPeak < par.maxCstGen));
return scheduler.getMinSchedule();
}
// general execution ///////////////////////////////////////////////////////////
#define BIG_SEP "==============="
#define SEP "---------------"
#define MEM_MSG(size) sizeString(size)
void VirtualMachine::executeProgram(const std::vector<unsigned int> &p) const
void VirtualMachine::executeProgram(const Program &p) const
{
Size memPeak = 0, sizeBefore, sizeAfter;
GarbageSchedule freeProg;
@ -481,15 +578,6 @@ void VirtualMachine::executeProgram(const std::vector<unsigned int> &p) const
{
env().freeObject(j);
}
// free temporaries
for (unsigned int i = 0; i < env().getMaxAddress(); ++i)
{
if ((env().getObjectStorage(i) == Environment::Storage::temporary)
and env().hasCreatedObject(i))
{
env().freeObject(i);
}
}
// print used memory after garbage collection if necessary
sizeAfter = env().getTotalSize();
if (sizeBefore != sizeAfter)
@ -506,7 +594,7 @@ void VirtualMachine::executeProgram(const std::vector<unsigned int> &p) const
void VirtualMachine::executeProgram(const std::vector<std::string> &p) const
{
std::vector<unsigned int> pAddress;
Program pAddress;
for (auto &n: p)
{

View File

@ -51,19 +51,33 @@ class VirtualMachine
{
SINGLETON_DEFCTOR(VirtualMachine);
public:
typedef SITE_SIZE_TYPE Size;
typedef std::unique_ptr<ModuleBase> ModPt;
typedef std::vector<std::set<unsigned int>> GarbageSchedule;
typedef SITE_SIZE_TYPE Size;
typedef std::unique_ptr<ModuleBase> ModPt;
typedef std::vector<std::set<unsigned int>> GarbageSchedule;
typedef std::vector<unsigned int> Program;
struct MemoryPrint
{
Size size;
unsigned int module;
Size size;
Environment::Storage storage;
unsigned int module;
};
struct MemoryProfile
{
std::vector<std::map<unsigned int, Size>> module;
std::vector<MemoryPrint> object;
};
class GeneticPar: Serializable
{
public:
GeneticPar(void):
popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {};
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar,
unsigned int, popSize,
unsigned int, maxGen,
unsigned int, maxCstGen,
double , mutationRate);
};
private:
struct ModuleInfo
{
@ -109,13 +123,13 @@ public:
// memory profile
const MemoryProfile &getMemoryProfile(void);
// garbage collector
GarbageSchedule makeGarbageSchedule(const std::vector<unsigned int> &p) const;
GarbageSchedule makeGarbageSchedule(const Program &p) const;
// high-water memory function
Size memoryNeeded(const std::vector<unsigned int> &p,
const GarbageSchedule &g);
Size memoryNeeded(const std::vector<unsigned int> &p);
Size memoryNeeded(const Program &p);
// genetic scheduler
Program schedule(const GeneticPar &par);
// general execution
void executeProgram(const std::vector<unsigned int> &p) const;
void executeProgram(const Program &p) const;
void executeProgram(const std::vector<std::string> &p) const;
private:
// environment shortcut