1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Updating the feature/clover branch with the newest Hadron package

This commit is contained in:
Guido Cossu 2018-01-12 13:35:51 +00:00
commit 3923683e9b
158 changed files with 9663 additions and 5976 deletions

3
.gitignore vendored
View File

@ -93,6 +93,7 @@ build*/*
*.xcodeproj/*
build.sh
.vscode
*.code-workspace
# Eigen source #
################
@ -123,6 +124,8 @@ make-bin-BUCK.sh
lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc
# vs code editor files #
########################
.vscode/
.vscode/settings.json
settings.json

36
TODO
View File

@ -1,20 +1,36 @@
TODO:
---------------
Large item work list:
Code item work list
a) namespaces & indentation
GRID_BEGIN_NAMESPACE();
GRID_END_NAMESPACE();
-- delete QCD namespace
b) GPU branch
- start branch
- Increase Macro use in core library support; prepare for change
- Audit volume of "device" code
- Virtual function audit
- Start port once Nvidia box is up
- Cut down volume of code for first port? How?
Physics item work list:
1)- BG/Q port and check ; Andrew says ok.
2)- Christoph's local basis expansion Lanczos
--
3a)- RNG I/O in ILDG/SciDAC (minor)
3b)- Precision conversion and sort out localConvert <-- partial/easy
3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
4)- Physical propagator interface
5)- Conserved currents
6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
7)- HDCR resume
2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
3)- Physical propagator interface
4)- Multigrid Wilson and DWF, compare to other Multigrid implementations
5)- HDCR resume
----------------------------
Recent DONE
-- RNG I/O in ILDG/SciDAC (minor)
-- Precision conversion and sort out localConvert <-- partial/easy
-- Conserved currents (Andrew)
-- Split grid
-- Christoph's local basis expansion Lanczos
-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
-- GaugeFix into central location <-- DONE

View File

@ -106,7 +106,7 @@ int main (int argc, char ** argv)
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
std::vector<CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
@ -202,7 +202,7 @@ int main (int argc, char ** argv)
int recv_from_rank;
{
std::vector<CartesianCommunicator::CommsRequest_t> requests;
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
@ -215,7 +215,7 @@ int main (int argc, char ** argv)
comm_proc = mpi_layout[mu]-1;
{
std::vector<CartesianCommunicator::CommsRequest_t> requests;
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
@ -290,7 +290,7 @@ int main (int argc, char ** argv)
dbytes=0;
ncomm=0;
std::vector<CartesianCommunicator::CommsRequest_t> requests;
std::vector<CommsRequest_t> requests;
for(int mu=0;mu<4;mu++){
@ -383,7 +383,7 @@ int main (int argc, char ** argv)
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
std::vector<CommsRequest_t> requests;
dbytes=0;
ncomm=0;
for(int mu=0;mu<4;mu++){
@ -481,7 +481,7 @@ int main (int argc, char ** argv)
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
std::vector<CommsRequest_t> requests;
dbytes=0;
ncomm=0;

View File

@ -3,9 +3,7 @@
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'
echo "-- deploying Eigen source..."
wget ${EIGEN_URL} --no-check-certificate
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
rm `basename ${EIGEN_URL}`
wget ${EIGEN_URL} --no-check-certificate && ./scripts/update_eigen.sh `basename ${EIGEN_URL}` && rm `basename ${EIGEN_URL}`
echo '-- generating Make.inc files...'
./scripts/filelist

View File

@ -337,15 +337,11 @@ case ${ac_PRECISION} in
esac
###################### Shared memory allocation technique under MPI3
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs],
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs],
[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen])
case ${ac_SHM} in
shmget)
AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
;;
shmopen)
AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
;;
@ -367,7 +363,7 @@ AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing])
############### communication type selection
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto],
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
case ${ac_COMMS} in
@ -375,22 +371,10 @@ case ${ac_COMMS} in
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
comms_type='none'
;;
mpi3*)
mpi*)
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
comms_type='mpi3'
;;
mpit)
AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] )
comms_type='mpit'
;;
mpi*)
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
comms_type='mpi'
;;
shmem)
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
comms_type='shmem'
;;
*)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;;

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Application.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -43,6 +42,7 @@ using namespace Hadrons;
// constructors ////////////////////////////////////////////////////////////////
Application::Application(void)
{
initLogger();
LOG(Message) << "Modules available:" << std::endl;
auto list = ModuleFactory::getInstance().getBuilderList();
for (auto &m: list)
@ -73,12 +73,6 @@ Application::Application(const std::string parameterFileName)
parameterFileName_ = parameterFileName;
}
// environment shortcut ////////////////////////////////////////////////////////
Environment & Application::env(void) const
{
return Environment::getInstance();
}
// access //////////////////////////////////////////////////////////////////////
void Application::setPar(const Application::GlobalPar &par)
{
@ -94,14 +88,13 @@ const Application::GlobalPar & Application::getPar(void)
// execute /////////////////////////////////////////////////////////////////////
void Application::run(void)
{
if (!parameterFileName_.empty() and (env().getNModule() == 0))
if (!parameterFileName_.empty() and (vm().getNModule() == 0))
{
parseParameterFile(parameterFileName_);
}
if (!scheduled_)
{
schedule();
}
vm().printContent();
env().printContent();
schedule();
printSchedule();
configLoop();
}
@ -124,12 +117,20 @@ void Application::parseParameterFile(const std::string parameterFileName)
LOG(Message) << "Building application from '" << parameterFileName << "'..." << std::endl;
read(reader, "parameters", par);
setPar(par);
push(reader, "modules");
push(reader, "module");
if (!push(reader, "modules"))
{
HADRON_ERROR(Parsing, "Cannot open node 'modules' in parameter file '"
+ parameterFileName + "'");
}
if (!push(reader, "module"))
{
HADRON_ERROR(Parsing, "Cannot open node 'modules/module' in parameter file '"
+ parameterFileName + "'");
}
do
{
read(reader, "id", id);
env().createModule(id.name, id.type, reader);
vm().createModule(id.name, id.type, reader);
} while (reader.nextElement("module"));
pop(reader);
pop(reader);
@ -139,7 +140,7 @@ void Application::saveParameterFile(const std::string parameterFileName)
{
XmlWriter writer(parameterFileName);
ObjectId id;
const unsigned int nMod = env().getNModule();
const unsigned int nMod = vm().getNModule();
LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
write(writer, "parameters", getPar());
@ -147,10 +148,10 @@ void Application::saveParameterFile(const std::string parameterFileName)
for (unsigned int i = 0; i < nMod; ++i)
{
push(writer, "module");
id.name = env().getModuleName(i);
id.type = env().getModule(i)->getRegisteredName();
id.name = vm().getModuleName(i);
id.type = vm().getModule(i)->getRegisteredName();
write(writer, "id", id);
env().getModule(i)->saveParameters(writer, "options");
vm().getModule(i)->saveParameters(writer, "options");
pop(writer);
}
pop(writer);
@ -158,95 +159,13 @@ void Application::saveParameterFile(const std::string parameterFileName)
}
// schedule computation ////////////////////////////////////////////////////////
#define MEM_MSG(size)\
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \
GeneticScheduler<unsigned int>::ObjFunc memPeak = \
[this](const std::vector<unsigned int> &program)\
{\
unsigned int memPeak;\
bool msg;\
\
msg = HadronsLogMessage.isActive();\
HadronsLogMessage.Active(false);\
env().dryRun(true);\
memPeak = env().executeProgram(program);\
env().dryRun(false);\
env().freeAll();\
HadronsLogMessage.Active(true);\
\
return memPeak;\
}
void Application::schedule(void)
{
DEFINE_MEMPEAK;
// build module dependency graph
LOG(Message) << "Building module graph..." << std::endl;
auto graph = env().makeModuleGraph();
auto con = graph.getConnectedComponents();
// constrained topological sort using a genetic algorithm
LOG(Message) << "Scheduling computation..." << std::endl;
LOG(Message) << " #module= " << graph.size() << std::endl;
LOG(Message) << " population size= " << par_.genetic.popSize << std::endl;
LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl;
LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl;
LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl;
unsigned int k = 0, gen, prevPeak, nCstPeak = 0;
std::random_device rd;
GeneticScheduler<unsigned int>::Parameters par;
par.popSize = par_.genetic.popSize;
par.mutationRate = par_.genetic.mutationRate;
par.seed = rd();
memPeak_ = 0;
CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed));
for (unsigned int i = 0; i < con.size(); ++i)
if (!scheduled_ and !loadedSchedule_)
{
GeneticScheduler<unsigned int> scheduler(con[i], memPeak, par);
gen = 0;
do
{
LOG(Debug) << "Generation " << gen << ":" << std::endl;
scheduler.nextGeneration();
if (gen != 0)
{
if (prevPeak == scheduler.getMinValue())
{
nCstPeak++;
}
else
{
nCstPeak = 0;
}
}
prevPeak = scheduler.getMinValue();
if (gen % 10 == 0)
{
LOG(Iterative) << "Generation " << gen << ": "
<< MEM_MSG(scheduler.getMinValue()) << std::endl;
}
gen++;
} while ((gen < par_.genetic.maxGen)
and (nCstPeak < par_.genetic.maxCstGen));
auto &t = scheduler.getMinSchedule();
if (scheduler.getMinValue() > memPeak_)
{
memPeak_ = scheduler.getMinValue();
}
for (unsigned int j = 0; j < t.size(); ++j)
{
program_.push_back(t[j]);
}
program_ = vm().schedule(par_.genetic);
scheduled_ = true;
}
scheduled_ = true;
}
void Application::saveSchedule(const std::string filename)
@ -256,21 +175,19 @@ void Application::saveSchedule(const std::string filename)
if (!scheduled_)
{
HADRON_ERROR("Computation not scheduled");
HADRON_ERROR(Definition, "Computation not scheduled");
}
LOG(Message) << "Saving current schedule to '" << filename << "'..."
<< std::endl;
for (auto address: program_)
{
program.push_back(env().getModuleName(address));
program.push_back(vm().getModuleName(address));
}
write(writer, "schedule", program);
}
void Application::loadSchedule(const std::string filename)
{
DEFINE_MEMPEAK;
TextReader reader(filename);
std::vector<std::string> program;
@ -280,24 +197,24 @@ void Application::loadSchedule(const std::string filename)
program_.clear();
for (auto &name: program)
{
program_.push_back(env().getModuleAddress(name));
program_.push_back(vm().getModuleAddress(name));
}
scheduled_ = true;
memPeak_ = memPeak(program_);
loadedSchedule_ = true;
}
void Application::printSchedule(void)
{
if (!scheduled_)
{
HADRON_ERROR("Computation not scheduled");
HADRON_ERROR(Definition, "Computation not scheduled");
}
LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):"
auto peak = vm().memoryNeeded(program_);
LOG(Message) << "Schedule (memory needed: " << sizeString(peak) << "):"
<< std::endl;
for (unsigned int i = 0; i < program_.size(); ++i)
{
LOG(Message) << std::setw(4) << i + 1 << ": "
<< env().getModuleName(program_[i]) << std::endl;
<< vm().getModuleName(program_[i]) << std::endl;
}
}
@ -310,8 +227,8 @@ void Application::configLoop(void)
{
LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t
<< " " << BIG_SEP << std::endl;
env().setTrajectory(t);
env().executeProgram(program_);
vm().setTrajectory(t);
vm().executeProgram(program_);
}
LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl;
env().freeAll();

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Application.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -31,8 +30,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
#define Hadrons_Application_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Environment.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
#include <Grid/Hadrons/VirtualMachine.hpp>
#include <Grid/Hadrons/Modules.hpp>
BEGIN_HADRONS_NAMESPACE
@ -51,25 +49,13 @@ public:
unsigned int, end,
unsigned int, step);
};
class GeneticPar: Serializable
{
public:
GeneticPar(void):
popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {};
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar,
unsigned int, popSize,
unsigned int, maxGen,
unsigned int, maxCstGen,
double , mutationRate);
};
class GlobalPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
TrajRange, trajCounter,
GeneticPar, genetic,
std::string, seed);
TrajRange, trajCounter,
VirtualMachine::GeneticPar, genetic,
std::string, seed);
};
public:
// constructors
@ -100,14 +86,15 @@ public:
void configLoop(void);
private:
// environment shortcut
Environment & env(void) const;
DEFINE_ENV_ALIAS;
// virtual machine shortcut
DEFINE_VM_ALIAS;
private:
long unsigned int locVol_;
std::string parameterFileName_{""};
GlobalPar par_;
std::vector<unsigned int> program_;
Environment::Size memPeak_;
bool scheduled_{false};
long unsigned int locVol_;
std::string parameterFileName_{""};
GlobalPar par_;
VirtualMachine::Program program_;
bool scheduled_{false}, loadedSchedule_{false};
};
/******************************************************************************
@ -117,14 +104,16 @@ private:
template <typename M>
void Application::createModule(const std::string name)
{
env().createModule<M>(name);
vm().createModule<M>(name);
scheduled_ = false;
}
template <typename M>
void Application::createModule(const std::string name,
const typename M::Par &par)
{
env().createModule<M>(name, par);
vm().createModule<M>(name, par);
scheduled_ = false;
}
END_HADRONS_NAMESPACE

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Environment.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -35,6 +34,9 @@ using namespace Grid;
using namespace QCD;
using namespace Hadrons;
#define ERROR_NO_ADDRESS(address)\
HADRON_ERROR(Definition, "no object with address " + std::to_string(address));
/******************************************************************************
* Environment implementation *
******************************************************************************/
@ -56,28 +58,6 @@ Environment::Environment(void)
rng4d_.reset(new GridParallelRNG(grid4d_.get()));
}
// dry run /////////////////////////////////////////////////////////////////////
void Environment::dryRun(const bool isDry)
{
dryRun_ = isDry;
}
bool Environment::isDryRun(void) const
{
return dryRun_;
}
// trajectory number ///////////////////////////////////////////////////////////
void Environment::setTrajectory(const unsigned int traj)
{
traj_ = traj;
}
unsigned int Environment::getTrajectory(void) const
{
return traj_;
}
// grids ///////////////////////////////////////////////////////////////////////
void Environment::createGrid(const unsigned int Ls)
{
@ -105,7 +85,7 @@ GridCartesian * Environment::getGrid(const unsigned int Ls) const
}
catch(std::out_of_range &)
{
HADRON_ERROR("no grid with Ls= " << Ls);
HADRON_ERROR(Definition, "no grid with Ls= " + std::to_string(Ls));
}
}
@ -124,7 +104,7 @@ GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const
}
catch(std::out_of_range &)
{
HADRON_ERROR("no red-black 5D grid with Ls= " << Ls);
HADRON_ERROR(Definition, "no red-black 5D grid with Ls= " + std::to_string(Ls));
}
}
@ -143,6 +123,11 @@ int Environment::getDim(const unsigned int mu) const
return dim_[mu];
}
unsigned long int Environment::getLocalVolume(void) const
{
return locVol_;
}
// random number generator /////////////////////////////////////////////////////
void Environment::setSeed(const std::vector<int> &seed)
{
@ -154,291 +139,6 @@ GridParallelRNG * Environment::get4dRng(void) const
return rng4d_.get();
}
// module management ///////////////////////////////////////////////////////////
void Environment::pushModule(Environment::ModPt &pt)
{
std::string name = pt->getName();
if (!hasModule(name))
{
std::vector<unsigned int> inputAddress;
unsigned int address;
ModuleInfo m;
m.data = std::move(pt);
m.type = typeIdPt(*m.data.get());
m.name = name;
auto input = m.data->getInput();
for (auto &in: input)
{
if (!hasObject(in))
{
addObject(in , -1);
}
m.input.push_back(objectAddress_[in]);
}
auto output = m.data->getOutput();
module_.push_back(std::move(m));
address = static_cast<unsigned int>(module_.size() - 1);
moduleAddress_[name] = address;
for (auto &out: output)
{
if (!hasObject(out))
{
addObject(out, address);
}
else
{
if (object_[objectAddress_[out]].module < 0)
{
object_[objectAddress_[out]].module = address;
}
else
{
HADRON_ERROR("object '" + out
+ "' is already produced by module '"
+ module_[object_[getObjectAddress(out)].module].name
+ "' (while pushing module '" + name + "')");
}
}
}
}
else
{
HADRON_ERROR("module '" + name + "' already exists");
}
}
unsigned int Environment::getNModule(void) const
{
return module_.size();
}
void Environment::createModule(const std::string name, const std::string type,
XmlReader &reader)
{
auto &factory = ModuleFactory::getInstance();
auto pt = factory.create(type, name);
pt->parseParameters(reader, "options");
pushModule(pt);
}
ModuleBase * Environment::getModule(const unsigned int address) const
{
if (hasModule(address))
{
return module_[address].data.get();
}
else
{
HADRON_ERROR("no module with address " + std::to_string(address));
}
}
ModuleBase * Environment::getModule(const std::string name) const
{
return getModule(getModuleAddress(name));
}
unsigned int Environment::getModuleAddress(const std::string name) const
{
if (hasModule(name))
{
return moduleAddress_.at(name);
}
else
{
HADRON_ERROR("no module with name '" + name + "'");
}
}
std::string Environment::getModuleName(const unsigned int address) const
{
if (hasModule(address))
{
return module_[address].name;
}
else
{
HADRON_ERROR("no module with address " + std::to_string(address));
}
}
std::string Environment::getModuleType(const unsigned int address) const
{
if (hasModule(address))
{
return typeName(module_[address].type);
}
else
{
HADRON_ERROR("no module with address " + std::to_string(address));
}
}
std::string Environment::getModuleType(const std::string name) const
{
return getModuleType(getModuleAddress(name));
}
std::string Environment::getModuleNamespace(const unsigned int address) const
{
std::string type = getModuleType(address), ns;
auto pos2 = type.rfind("::");
auto pos1 = type.rfind("::", pos2 - 2);
return type.substr(pos1 + 2, pos2 - pos1 - 2);
}
std::string Environment::getModuleNamespace(const std::string name) const
{
return getModuleNamespace(getModuleAddress(name));
}
bool Environment::hasModule(const unsigned int address) const
{
return (address < module_.size());
}
bool Environment::hasModule(const std::string name) const
{
return (moduleAddress_.find(name) != moduleAddress_.end());
}
Graph<unsigned int> Environment::makeModuleGraph(void) const
{
Graph<unsigned int> moduleGraph;
for (unsigned int i = 0; i < module_.size(); ++i)
{
moduleGraph.addVertex(i);
for (auto &j: module_[i].input)
{
moduleGraph.addEdge(object_[j].module, i);
}
}
return moduleGraph;
}
#define BIG_SEP "==============="
#define SEP "---------------"
#define MEM_MSG(size)\
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
Environment::Size
Environment::executeProgram(const std::vector<unsigned int> &p)
{
Size memPeak = 0, sizeBefore, sizeAfter;
std::vector<std::set<unsigned int>> freeProg;
bool continueCollect, nothingFreed;
// build garbage collection schedule
freeProg.resize(p.size());
for (unsigned int i = 0; i < object_.size(); ++i)
{
auto pred = [i, this](const unsigned int j)
{
auto &in = module_[j].input;
auto it = std::find(in.begin(), in.end(), i);
return (it != in.end()) or (j == object_[i].module);
};
auto it = std::find_if(p.rbegin(), p.rend(), pred);
if (it != p.rend())
{
freeProg[p.rend() - it - 1].insert(i);
}
}
// program execution
for (unsigned int i = 0; i < p.size(); ++i)
{
// execute module
if (!isDryRun())
{
LOG(Message) << SEP << " Measurement step " << i+1 << "/"
<< p.size() << " (module '" << module_[p[i]].name
<< "') " << SEP << std::endl;
}
(*module_[p[i]].data)();
sizeBefore = getTotalSize();
// print used memory after execution
if (!isDryRun())
{
LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore)
<< std::endl;
}
if (sizeBefore > memPeak)
{
memPeak = sizeBefore;
}
// garbage collection for step i
if (!isDryRun())
{
LOG(Message) << "Garbage collection..." << std::endl;
}
nothingFreed = true;
do
{
continueCollect = false;
auto toFree = freeProg[i];
for (auto &j: toFree)
{
// continue garbage collection while there are still
// objects without owners
continueCollect = continueCollect or !hasOwners(j);
if(freeObject(j))
{
// if an object has been freed, remove it from
// the garbage collection schedule
freeProg[i].erase(j);
nothingFreed = false;
}
}
} while (continueCollect);
// any remaining objects in step i garbage collection schedule
// is scheduled for step i + 1
if (i + 1 < p.size())
{
for (auto &j: freeProg[i])
{
freeProg[i + 1].insert(j);
}
}
// print used memory after garbage collection if necessary
if (!isDryRun())
{
sizeAfter = getTotalSize();
if (sizeBefore != sizeAfter)
{
LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter)
<< std::endl;
}
else
{
LOG(Message) << "Nothing to free" << std::endl;
}
}
}
return memPeak;
}
Environment::Size Environment::executeProgram(const std::vector<std::string> &p)
{
std::vector<unsigned int> pAddress;
for (auto &n: p)
{
pAddress.push_back(getModuleAddress(n));
}
return executeProgram(pAddress);
}
// general memory management ///////////////////////////////////////////////////
void Environment::addObject(const std::string name, const int moduleAddress)
{
@ -448,46 +148,25 @@ void Environment::addObject(const std::string name, const int moduleAddress)
info.name = name;
info.module = moduleAddress;
info.data = nullptr;
object_.push_back(std::move(info));
objectAddress_[name] = static_cast<unsigned int>(object_.size() - 1);
}
else
{
HADRON_ERROR("object '" + name + "' already exists");
HADRON_ERROR(Definition, "object '" + name + "' already exists");
}
}
void Environment::registerObject(const unsigned int address,
const unsigned int size, const unsigned int Ls)
void Environment::setObjectModule(const unsigned int objAddress,
const int modAddress)
{
if (!hasRegisteredObject(address))
{
if (hasObject(address))
{
object_[address].size = size;
object_[address].Ls = Ls;
object_[address].isRegistered = true;
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
else
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " already registered");
}
object_[objAddress].module = modAddress;
}
void Environment::registerObject(const std::string name,
const unsigned int size, const unsigned int Ls)
unsigned int Environment::getMaxAddress(void) const
{
if (!hasObject(name))
{
addObject(name);
}
registerObject(getObjectAddress(name), size, Ls);
return object_.size();
}
unsigned int Environment::getObjectAddress(const std::string name) const
@ -498,7 +177,7 @@ unsigned int Environment::getObjectAddress(const std::string name) const
}
else
{
HADRON_ERROR("no object with name '" + name + "'");
HADRON_ERROR(Definition, "no object with name '" + name + "'");
}
}
@ -510,13 +189,13 @@ std::string Environment::getObjectName(const unsigned int address) const
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
ERROR_NO_ADDRESS(address);
}
}
std::string Environment::getObjectType(const unsigned int address) const
{
if (hasRegisteredObject(address))
if (hasObject(address))
{
if (object_[address].type)
{
@ -527,14 +206,9 @@ std::string Environment::getObjectType(const unsigned int address) const
return "<no type>";
}
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
ERROR_NO_ADDRESS(address);
}
}
@ -545,18 +219,13 @@ std::string Environment::getObjectType(const std::string name) const
Environment::Size Environment::getObjectSize(const unsigned int address) const
{
if (hasRegisteredObject(address))
if (hasObject(address))
{
return object_[address].size;
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
ERROR_NO_ADDRESS(address);
}
}
@ -565,7 +234,24 @@ Environment::Size Environment::getObjectSize(const std::string name) const
return getObjectSize(getObjectAddress(name));
}
unsigned int Environment::getObjectModule(const unsigned int address) const
Environment::Storage Environment::getObjectStorage(const unsigned int address) const
{
if (hasObject(address))
{
return object_[address].storage;
}
else
{
ERROR_NO_ADDRESS(address);
}
}
Environment::Storage Environment::getObjectStorage(const std::string name) const
{
return getObjectStorage(getObjectAddress(name));
}
int Environment::getObjectModule(const unsigned int address) const
{
if (hasObject(address))
{
@ -573,29 +259,24 @@ unsigned int Environment::getObjectModule(const unsigned int address) const
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
ERROR_NO_ADDRESS(address);
}
}
unsigned int Environment::getObjectModule(const std::string name) const
int Environment::getObjectModule(const std::string name) const
{
return getObjectModule(getObjectAddress(name));
}
unsigned int Environment::getObjectLs(const unsigned int address) const
{
if (hasRegisteredObject(address))
if (hasObject(address))
{
return object_[address].Ls;
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address)
+ " exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
ERROR_NO_ADDRESS(address);
}
}
@ -616,30 +297,6 @@ bool Environment::hasObject(const std::string name) const
return ((it != objectAddress_.end()) and hasObject(it->second));
}
bool Environment::hasRegisteredObject(const unsigned int address) const
{
if (hasObject(address))
{
return object_[address].isRegistered;
}
else
{
return false;
}
}
bool Environment::hasRegisteredObject(const std::string name) const
{
if (hasObject(name))
{
return hasRegisteredObject(getObjectAddress(name));
}
else
{
return false;
}
}
bool Environment::hasCreatedObject(const unsigned int address) const
{
if (hasObject(address))
@ -680,92 +337,27 @@ Environment::Size Environment::getTotalSize(void) const
for (auto &o: object_)
{
if (o.isRegistered)
{
size += o.size;
}
size += o.size;
}
return size;
}
void Environment::addOwnership(const unsigned int owner,
const unsigned int property)
void Environment::freeObject(const unsigned int address)
{
if (hasObject(property))
if (hasCreatedObject(address))
{
object_[property].owners.insert(owner);
}
else
{
HADRON_ERROR("no object with address " + std::to_string(property));
}
if (hasObject(owner))
{
object_[owner].properties.insert(property);
}
else
{
HADRON_ERROR("no object with address " + std::to_string(owner));
LOG(Message) << "Destroying object '" << object_[address].name
<< "'" << std::endl;
}
object_[address].size = 0;
object_[address].type = nullptr;
object_[address].data.reset(nullptr);
}
void Environment::addOwnership(const std::string owner,
const std::string property)
void Environment::freeObject(const std::string name)
{
addOwnership(getObjectAddress(owner), getObjectAddress(property));
}
bool Environment::hasOwners(const unsigned int address) const
{
if (hasObject(address))
{
return (!object_[address].owners.empty());
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
bool Environment::hasOwners(const std::string name) const
{
return hasOwners(getObjectAddress(name));
}
bool Environment::freeObject(const unsigned int address)
{
if (!hasOwners(address))
{
if (!isDryRun() and object_[address].isRegistered)
{
LOG(Message) << "Destroying object '" << object_[address].name
<< "'" << std::endl;
}
for (auto &p: object_[address].properties)
{
object_[p].owners.erase(address);
}
object_[address].size = 0;
object_[address].Ls = 0;
object_[address].isRegistered = false;
object_[address].type = nullptr;
object_[address].owners.clear();
object_[address].properties.clear();
object_[address].data.reset(nullptr);
return true;
}
else
{
return false;
}
}
bool Environment::freeObject(const std::string name)
{
return freeObject(getObjectAddress(name));
freeObject(getObjectAddress(name));
}
void Environment::freeAll(void)
@ -776,18 +368,24 @@ void Environment::freeAll(void)
}
}
void Environment::printContent(void)
void Environment::protectObjects(const bool protect)
{
LOG(Message) << "Modules: " << std::endl;
for (unsigned int i = 0; i < module_.size(); ++i)
{
LOG(Message) << std::setw(4) << i << ": "
<< getModuleName(i) << std::endl;
}
LOG(Message) << "Objects: " << std::endl;
protect_ = protect;
}
bool Environment::objectsProtected(void) const
{
return protect_;
}
// print environment content ///////////////////////////////////////////////////
void Environment::printContent(void) const
{
LOG(Debug) << "Objects: " << std::endl;
for (unsigned int i = 0; i < object_.size(); ++i)
{
LOG(Message) << std::setw(4) << i << ": "
<< getObjectName(i) << std::endl;
LOG(Debug) << std::setw(4) << i << ": "
<< getObjectName(i) << " ("
<< sizeString(getObjectSize(i)) << ")" << std::endl;
}
}

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Environment.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -31,20 +30,12 @@ See the full license in the file "LICENSE" in the top level distribution directo
#define Hadrons_Environment_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Graph.hpp>
#ifndef SITE_SIZE_TYPE
#define SITE_SIZE_TYPE unsigned int
#endif
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Global environment *
******************************************************************************/
// forward declaration of Module
class ModuleBase;
class Object
{
public:
@ -66,123 +57,78 @@ private:
std::unique_ptr<T> objPt_{nullptr};
};
#define DEFINE_ENV_ALIAS \
inline Environment & env(void) const\
{\
return Environment::getInstance();\
}
class Environment
{
SINGLETON(Environment);
public:
typedef SITE_SIZE_TYPE Size;
typedef std::unique_ptr<ModuleBase> ModPt;
typedef std::unique_ptr<GridCartesian> GridPt;
typedef std::unique_ptr<GridRedBlackCartesian> GridRbPt;
typedef std::unique_ptr<GridParallelRNG> RngPt;
typedef std::unique_ptr<LatticeBase> LatticePt;
enum class Storage {object, cache, temporary};
private:
struct ModuleInfo
{
const std::type_info *type{nullptr};
std::string name;
ModPt data{nullptr};
std::vector<unsigned int> input;
};
struct ObjInfo
{
Size size{0};
Storage storage{Storage::object};
unsigned int Ls{0};
bool isRegistered{false};
const std::type_info *type{nullptr};
std::string name;
int module{-1};
std::set<unsigned int> owners, properties;
std::unique_ptr<Object> data{nullptr};
};
public:
// dry run
void dryRun(const bool isDry);
bool isDryRun(void) const;
// trajectory number
void setTrajectory(const unsigned int traj);
unsigned int getTrajectory(void) const;
// grids
void createGrid(const unsigned int Ls);
GridCartesian * getGrid(const unsigned int Ls = 1) const;
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
std::vector<int> getDim(void) const;
int getDim(const unsigned int mu) const;
unsigned long int getLocalVolume(void) const;
unsigned int getNd(void) const;
// random number generator
void setSeed(const std::vector<int> &seed);
GridParallelRNG * get4dRng(void) const;
// module management
void pushModule(ModPt &pt);
template <typename M>
void createModule(const std::string name);
template <typename M>
void createModule(const std::string name,
const typename M::Par &par);
void createModule(const std::string name,
const std::string type,
XmlReader &reader);
unsigned int getNModule(void) const;
ModuleBase * getModule(const unsigned int address) const;
ModuleBase * getModule(const std::string name) const;
template <typename M>
M * getModule(const unsigned int address) const;
template <typename M>
M * getModule(const std::string name) const;
unsigned int getModuleAddress(const std::string name) const;
std::string getModuleName(const unsigned int address) const;
std::string getModuleType(const unsigned int address) const;
std::string getModuleType(const std::string name) const;
std::string getModuleNamespace(const unsigned int address) const;
std::string getModuleNamespace(const std::string name) const;
bool hasModule(const unsigned int address) const;
bool hasModule(const std::string name) const;
Graph<unsigned int> makeModuleGraph(void) const;
Size executeProgram(const std::vector<unsigned int> &p);
Size executeProgram(const std::vector<std::string> &p);
// general memory management
void addObject(const std::string name,
const int moduleAddress = -1);
void registerObject(const unsigned int address,
const unsigned int size,
const unsigned int Ls = 1);
void registerObject(const std::string name,
const unsigned int size,
const unsigned int Ls = 1);
template <typename T>
unsigned int lattice4dSize(void) const;
template <typename T>
void registerLattice(const unsigned int address,
const unsigned int Ls = 1);
template <typename T>
void registerLattice(const std::string name,
const unsigned int Ls = 1);
template <typename T>
void setObject(const unsigned int address, T *object);
template <typename T>
void setObject(const std::string name, T *object);
template <typename B, typename T, typename ... Ts>
void createDerivedObject(const std::string name,
const Environment::Storage storage,
const unsigned int Ls,
Ts && ... args);
template <typename T, typename ... Ts>
void createObject(const std::string name,
const Environment::Storage storage,
const unsigned int Ls,
Ts && ... args);
void setObjectModule(const unsigned int objAddress,
const int modAddress);
template <typename T>
T * getObject(const unsigned int address) const;
template <typename T>
T * getObject(const std::string name) const;
template <typename T>
T * createLattice(const unsigned int address);
template <typename T>
T * createLattice(const std::string name);
unsigned int getMaxAddress(void) const;
unsigned int getObjectAddress(const std::string name) const;
std::string getObjectName(const unsigned int address) const;
std::string getObjectType(const unsigned int address) const;
std::string getObjectType(const std::string name) const;
Size getObjectSize(const unsigned int address) const;
Size getObjectSize(const std::string name) const;
unsigned int getObjectModule(const unsigned int address) const;
unsigned int getObjectModule(const std::string name) const;
Storage getObjectStorage(const unsigned int address) const;
Storage getObjectStorage(const std::string name) const;
int getObjectModule(const unsigned int address) const;
int getObjectModule(const std::string name) const;
unsigned int getObjectLs(const unsigned int address) const;
unsigned int getObjectLs(const std::string name) const;
bool hasObject(const unsigned int address) const;
bool hasObject(const std::string name) const;
bool hasRegisteredObject(const unsigned int address) const;
bool hasRegisteredObject(const std::string name) const;
bool hasCreatedObject(const unsigned int address) const;
bool hasCreatedObject(const std::string name) const;
bool isObject5d(const unsigned int address) const;
@ -192,20 +138,17 @@ public:
template <typename T>
bool isObjectOfType(const std::string name) const;
Environment::Size getTotalSize(void) const;
void addOwnership(const unsigned int owner,
const unsigned int property);
void addOwnership(const std::string owner,
const std::string property);
bool hasOwners(const unsigned int address) const;
bool hasOwners(const std::string name) const;
bool freeObject(const unsigned int address);
bool freeObject(const std::string name);
void freeObject(const unsigned int address);
void freeObject(const std::string name);
void freeAll(void);
void printContent(void);
void protectObjects(const bool protect);
bool objectsProtected(void) const;
// print environment content
void printContent(void) const;
private:
// general
bool dryRun_{false};
unsigned int traj_, locVol_;
unsigned long int locVol_;
bool protect_{true};
// grids
std::vector<int> dim_;
GridPt grid4d_;
@ -215,11 +158,6 @@ private:
unsigned int nd_;
// random number generator
RngPt rng4d_;
// module and related maps
std::vector<ModuleInfo> module_;
std::map<std::string, unsigned int> moduleAddress_;
// lattice store
std::map<unsigned int, LatticePt> lattice_;
// object store
std::vector<ObjInfo> object_;
std::map<std::string, unsigned int> objectAddress_;
@ -256,116 +194,85 @@ void Holder<T>::reset(T *pt)
/******************************************************************************
* Environment template implementation *
******************************************************************************/
// module management ///////////////////////////////////////////////////////////
template <typename M>
void Environment::createModule(const std::string name)
// general memory management ///////////////////////////////////////////////////
template <typename B, typename T, typename ... Ts>
void Environment::createDerivedObject(const std::string name,
const Environment::Storage storage,
const unsigned int Ls,
Ts && ... args)
{
ModPt pt(new M(name));
if (!hasObject(name))
{
addObject(name);
}
pushModule(pt);
}
template <typename M>
void Environment::createModule(const std::string name,
const typename M::Par &par)
{
ModPt pt(new M(name));
unsigned int address = getObjectAddress(name);
static_cast<M *>(pt.get())->setPar(par);
pushModule(pt);
}
template <typename M>
M * Environment::getModule(const unsigned int address) const
{
if (auto *pt = dynamic_cast<M *>(getModule(address)))
if (!object_[address].data or !objectsProtected())
{
return pt;
MemoryStats memStats;
if (!MemoryProfiler::stats)
{
MemoryProfiler::stats = &memStats;
}
size_t initMem = MemoryProfiler::stats->currentlyAllocated;
object_[address].storage = storage;
object_[address].Ls = Ls;
object_[address].data.reset(new Holder<B>(new T(std::forward<Ts>(args)...)));
object_[address].size = MemoryProfiler::stats->maxAllocated - initMem;
object_[address].type = &typeid(T);
if (MemoryProfiler::stats == &memStats)
{
MemoryProfiler::stats = nullptr;
}
}
else
// object already exists, no error if it is a cache, error otherwise
else if ((object_[address].storage != Storage::cache) or
(object_[address].storage != storage) or
(object_[address].name != name) or
(object_[address].type != &typeid(T)))
{
HADRON_ERROR("module '" + module_[address].name
+ "' does not have type " + typeid(M).name()
+ "(object type: " + getModuleType(address) + ")");
HADRON_ERROR(Definition, "object '" + name + "' already allocated");
}
}
template <typename M>
M * Environment::getModule(const std::string name) const
template <typename T, typename ... Ts>
void Environment::createObject(const std::string name,
const Environment::Storage storage,
const unsigned int Ls,
Ts && ... args)
{
return getModule<M>(getModuleAddress(name));
}
template <typename T>
unsigned int Environment::lattice4dSize(void) const
{
return sizeof(typename T::vector_object)/getGrid()->Nsimd();
}
template <typename T>
void Environment::registerLattice(const unsigned int address,
const unsigned int Ls)
{
createGrid(Ls);
registerObject(address, Ls*lattice4dSize<T>(), Ls);
}
template <typename T>
void Environment::registerLattice(const std::string name, const unsigned int Ls)
{
createGrid(Ls);
registerObject(name, Ls*lattice4dSize<T>(), Ls);
}
template <typename T>
void Environment::setObject(const unsigned int address, T *object)
{
if (hasRegisteredObject(address))
{
object_[address].data.reset(new Holder<T>(object));
object_[address].type = &typeid(T);
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address) +
" exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
template <typename T>
void Environment::setObject(const std::string name, T *object)
{
setObject(getObjectAddress(name), object);
createDerivedObject<T, T>(name, storage, Ls, std::forward<Ts>(args)...);
}
template <typename T>
T * Environment::getObject(const unsigned int address) const
{
if (hasRegisteredObject(address))
if (hasObject(address))
{
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
if (hasCreatedObject(address))
{
return h->getPt();
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
{
return h->getPt();
}
else
{
HADRON_ERROR(Definition, "object with address " + std::to_string(address) +
" does not have type '" + typeName(&typeid(T)) +
"' (has type '" + getObjectType(address) + "')");
}
}
else
{
HADRON_ERROR("object with address " + std::to_string(address) +
" does not have type '" + typeName(&typeid(T)) +
"' (has type '" + getObjectType(address) + "')");
HADRON_ERROR(Definition, "object with address " + std::to_string(address) +
" is empty");
}
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address) +
" exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
HADRON_ERROR(Definition, "no object with address " + std::to_string(address));
}
}
@ -375,26 +282,10 @@ T * Environment::getObject(const std::string name) const
return getObject<T>(getObjectAddress(name));
}
template <typename T>
T * Environment::createLattice(const unsigned int address)
{
GridCartesian *g = getGrid(getObjectLs(address));
setObject(address, new T(g));
return getObject<T>(address);
}
template <typename T>
T * Environment::createLattice(const std::string name)
{
return createLattice<T>(getObjectAddress(name));
}
template <typename T>
bool Environment::isObjectOfType(const unsigned int address) const
{
if (hasRegisteredObject(address))
if (hasObject(address))
{
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
{
@ -405,14 +296,9 @@ bool Environment::isObjectOfType(const unsigned int address) const
return false;
}
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address) +
" exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
HADRON_ERROR(Definition, "no object with address " + std::to_string(address));
}
}

View File

@ -0,0 +1,57 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Exceptions.cc
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Exceptions.hpp>
#ifndef ERR_SUFF
#define ERR_SUFF " (" + loc + ")"
#endif
#define CONST_EXC(name, init) \
name::name(std::string msg, std::string loc)\
:init\
{}
using namespace Grid;
using namespace Hadrons;
using namespace Exceptions;
// logic errors
CONST_EXC(Logic, logic_error(msg + ERR_SUFF))
CONST_EXC(Definition, Logic("definition error: " + msg, loc))
CONST_EXC(Implementation, Logic("implementation error: " + msg, loc))
CONST_EXC(Range, Logic("range error: " + msg, loc))
CONST_EXC(Size, Logic("size error: " + msg, loc))
// runtime errors
CONST_EXC(Runtime, runtime_error(msg + ERR_SUFF))
CONST_EXC(Argument, Runtime("argument error: " + msg, loc))
CONST_EXC(Io, Runtime("IO error: " + msg, loc))
CONST_EXC(Memory, Runtime("memory error: " + msg, loc))
CONST_EXC(Parsing, Runtime("parsing error: " + msg, loc))
CONST_EXC(Program, Runtime("program error: " + msg, loc))
CONST_EXC(System, Runtime("system error: " + msg, loc))

View File

@ -0,0 +1,72 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Exceptions.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Exceptions_hpp_
#define Hadrons_Exceptions_hpp_
#include <stdexcept>
#ifndef Hadrons_Global_hpp_
#include <Grid/Hadrons/Global.hpp>
#endif
#define SRC_LOC std::string(__FUNCTION__) + " at " + std::string(__FILE__) + ":"\
+ std::to_string(__LINE__)
#define HADRON_ERROR(exc, msg)\
LOG(Error) << msg << std::endl;\
throw(Exceptions::exc(msg, SRC_LOC));
#define DECL_EXC(name, base) \
class name: public base\
{\
public:\
name(std::string msg, std::string loc);\
}
BEGIN_HADRONS_NAMESPACE
namespace Exceptions
{
// logic errors
DECL_EXC(Logic, std::logic_error);
DECL_EXC(Definition, Logic);
DECL_EXC(Implementation, Logic);
DECL_EXC(Range, Logic);
DECL_EXC(Size, Logic);
// runtime errors
DECL_EXC(Runtime, std::runtime_error);
DECL_EXC(Argument, Runtime);
DECL_EXC(Io, Runtime);
DECL_EXC(Memory, Runtime);
DECL_EXC(Parsing, Runtime);
DECL_EXC(Program, Runtime);
DECL_EXC(System, Runtime);
}
END_HADRONS_NAMESPACE
#endif // Hadrons_Exceptions_hpp_

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Factory.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -95,7 +94,7 @@ std::unique_ptr<T> Factory<T>::create(const std::string type,
}
catch (std::out_of_range &)
{
HADRON_ERROR("object of type '" + type + "' unknown");
HADRON_ERROR(Argument, "object of type '" + type + "' unknown");
}
return func(name);

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/GeneticScheduler.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -38,13 +37,13 @@ BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Scheduler based on a genetic algorithm *
******************************************************************************/
template <typename T>
template <typename V, typename T>
class GeneticScheduler
{
public:
typedef std::vector<T> Gene;
typedef std::pair<Gene *, Gene *> GenePair;
typedef std::function<int(const Gene &)> ObjFunc;
typedef std::vector<T> Gene;
typedef std::pair<Gene *, Gene *> GenePair;
typedef std::function<V(const Gene &)> ObjFunc;
struct Parameters
{
double mutationRate;
@ -65,7 +64,7 @@ public:
void benchmarkCrossover(const unsigned int nIt);
// print population
friend std::ostream & operator<<(std::ostream &out,
const GeneticScheduler<T> &s)
const GeneticScheduler<V, T> &s)
{
out << "[";
for (auto &p: s.population_)
@ -87,19 +86,19 @@ private:
void mutation(Gene &m, const Gene &c);
private:
Graph<T> &graph_;
const ObjFunc &func_;
const Parameters par_;
std::multimap<int, Gene> population_;
std::mt19937 gen_;
Graph<T> &graph_;
const ObjFunc &func_;
const Parameters par_;
std::multimap<V, Gene> population_;
std::mt19937 gen_;
};
/******************************************************************************
* template implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename T>
GeneticScheduler<T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
template <typename V, typename T>
GeneticScheduler<V, T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
const Parameters &par)
: graph_(graph)
, func_(func)
@ -109,22 +108,22 @@ GeneticScheduler<T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
}
// access //////////////////////////////////////////////////////////////////////
template <typename T>
const typename GeneticScheduler<T>::Gene &
GeneticScheduler<T>::getMinSchedule(void)
template <typename V, typename T>
const typename GeneticScheduler<V, T>::Gene &
GeneticScheduler<V, T>::getMinSchedule(void)
{
return population_.begin()->second;
}
template <typename T>
int GeneticScheduler<T>::getMinValue(void)
template <typename V, typename T>
int GeneticScheduler<V, T>::getMinValue(void)
{
return population_.begin()->first;
}
// breed a new generation //////////////////////////////////////////////////////
template <typename T>
void GeneticScheduler<T>::nextGeneration(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::nextGeneration(void)
{
// random initialization of the population if necessary
if (population_.size() != par_.popSize)
@ -158,8 +157,8 @@ void GeneticScheduler<T>::nextGeneration(void)
}
// evolution steps /////////////////////////////////////////////////////////////
template <typename T>
void GeneticScheduler<T>::initPopulation(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::initPopulation(void)
{
population_.clear();
for (unsigned int i = 0; i < par_.popSize; ++i)
@ -170,8 +169,8 @@ void GeneticScheduler<T>::initPopulation(void)
}
}
template <typename T>
void GeneticScheduler<T>::doCrossover(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::doCrossover(void)
{
auto p = selectPair();
Gene &p1 = *(p.first), &p2 = *(p.second);
@ -185,8 +184,8 @@ void GeneticScheduler<T>::doCrossover(void)
}
}
template <typename T>
void GeneticScheduler<T>::doMutation(void)
template <typename V, typename T>
void GeneticScheduler<V, T>::doMutation(void)
{
std::uniform_real_distribution<double> mdis(0., 1.);
std::uniform_int_distribution<unsigned int> pdis(0, population_.size() - 1);
@ -206,40 +205,35 @@ void GeneticScheduler<T>::doMutation(void)
}
// genetic operators ///////////////////////////////////////////////////////////
template <typename T>
typename GeneticScheduler<T>::GenePair GeneticScheduler<T>::selectPair(void)
template <typename V, typename T>
typename GeneticScheduler<V, T>::GenePair GeneticScheduler<V, T>::selectPair(void)
{
std::vector<double> prob;
unsigned int ind;
Gene *p1, *p2;
const double max = population_.rbegin()->first;
for (auto &c: population_)
{
prob.push_back(1./c.first);
}
do
{
double probCpy;
std::discrete_distribution<unsigned int> dis1(prob.begin(), prob.end());
auto rIt = population_.begin();
ind = dis1(gen_);
std::advance(rIt, ind);
p1 = &(rIt->second);
probCpy = prob[ind];
prob[ind] = 0.;
std::discrete_distribution<unsigned int> dis2(prob.begin(), prob.end());
rIt = population_.begin();
std::advance(rIt, dis2(gen_));
p2 = &(rIt->second);
prob[ind] = probCpy;
} while (p1 == p2);
prob.push_back(std::exp((c.first-1.)/max));
}
std::discrete_distribution<unsigned int> dis1(prob.begin(), prob.end());
auto rIt = population_.begin();
ind = dis1(gen_);
std::advance(rIt, ind);
p1 = &(rIt->second);
prob[ind] = 0.;
std::discrete_distribution<unsigned int> dis2(prob.begin(), prob.end());
rIt = population_.begin();
std::advance(rIt, dis2(gen_));
p2 = &(rIt->second);
return std::make_pair(p1, p2);
}
template <typename T>
void GeneticScheduler<T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
template <typename V, typename T>
void GeneticScheduler<V, T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
const Gene &p2)
{
Gene buf;
@ -273,8 +267,8 @@ void GeneticScheduler<T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
}
}
template <typename T>
void GeneticScheduler<T>::mutation(Gene &m, const Gene &c)
template <typename V, typename T>
void GeneticScheduler<V, T>::mutation(Gene &m, const Gene &c)
{
Gene buf;
std::uniform_int_distribution<unsigned int> dis(0, c.size() - 1);
@ -303,8 +297,8 @@ void GeneticScheduler<T>::mutation(Gene &m, const Gene &c)
}
}
template <typename T>
void GeneticScheduler<T>::benchmarkCrossover(const unsigned int nIt)
template <typename V, typename T>
void GeneticScheduler<V, T>::benchmarkCrossover(const unsigned int nIt)
{
Gene p1, p2, c1, c2;
double neg = 0., eq = 0., pos = 0., total;

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Global.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -39,31 +38,19 @@ HadronsLogger Hadrons::HadronsLogMessage(1,"Message");
HadronsLogger Hadrons::HadronsLogIterative(1,"Iterative");
HadronsLogger Hadrons::HadronsLogDebug(1,"Debug");
// pretty size formatting //////////////////////////////////////////////////////
std::string Hadrons::sizeString(long unsigned int bytes)
void Hadrons::initLogger(void)
{
constexpr unsigned int bufSize = 256;
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
char buf[256];
long unsigned int s = 0;
double count = bytes;
while (count >= 1024 && s < 7)
{
s++;
count /= 1024;
}
if (count - floor(count) == 0.0)
{
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
}
else
{
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
}
return std::string(buf);
auto w = std::string("Hadrons").length();
GridLogError.setTopWidth(w);
GridLogWarning.setTopWidth(w);
GridLogMessage.setTopWidth(w);
GridLogIterative.setTopWidth(w);
GridLogDebug.setTopWidth(w);
HadronsLogError.Active(GridLogError.isActive());
HadronsLogWarning.Active(GridLogWarning.isActive());
HadronsLogMessage.Active(GridLogMessage.isActive());
HadronsLogIterative.Active(GridLogIterative.isActive());
HadronsLogDebug.Active(GridLogDebug.isActive());
}
// type utilities //////////////////////////////////////////////////////////////
@ -80,3 +67,10 @@ std::string Hadrons::typeName(const std::type_info *info)
return name;
}
// default writers/readers /////////////////////////////////////////////////////
#ifdef HAVE_HDF5
const std::string Hadrons::resultFileExt = "h5";
#else
const std::string Hadrons::resultFileExt = "xml";
#endif

View File

@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Global.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -35,6 +35,10 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Grid/Grid.h>
#include <cxxabi.h>
#ifndef SITE_SIZE_TYPE
#define SITE_SIZE_TYPE size_t
#endif
#define BEGIN_HADRONS_NAMESPACE \
namespace Grid {\
using namespace QCD;\
@ -57,17 +61,19 @@ using Grid::operator<<;
#ifndef SIMPL
#define SIMPL ScalarImplCR
#endif
#ifndef GIMPL
#define GIMPL GimplTypesR
#endif
BEGIN_HADRONS_NAMESPACE
// type aliases
#define FERM_TYPE_ALIASES(FImpl, suffix)\
typedef FermionOperator<FImpl> FMat##suffix; \
typedef typename FImpl::FermionField FermionField##suffix; \
typedef typename FImpl::PropagatorField PropagatorField##suffix; \
typedef typename FImpl::SitePropagator SitePropagator##suffix; \
typedef std::vector<typename FImpl::SitePropagator::scalar_object> \
SlicedPropagator##suffix;
typedef FermionOperator<FImpl> FMat##suffix; \
typedef typename FImpl::FermionField FermionField##suffix; \
typedef typename FImpl::PropagatorField PropagatorField##suffix; \
typedef typename FImpl::SitePropagator::scalar_object SitePropagator##suffix; \
typedef std::vector<SitePropagator##suffix> SlicedPropagator##suffix;
#define GAUGE_TYPE_ALIASES(FImpl, suffix)\
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;
@ -81,7 +87,8 @@ typedef std::function<void(FermionField##suffix &,\
const FermionField##suffix &)> SolverFn##suffix;
#define SINK_TYPE_ALIASES(suffix)\
typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix;
typedef std::function<SlicedPropagator##suffix\
(const PropagatorField##suffix &)> SinkFn##suffix;
#define FGS_TYPE_ALIASES(FImpl, suffix)\
FERM_TYPE_ALIASES(FImpl, suffix)\
@ -97,11 +104,6 @@ public:
};
#define LOG(channel) std::cout << HadronsLog##channel
#define HADRON_ERROR(msg)\
LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\
<< __LINE__ << ")" << std::endl;\
abort();
#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl;
extern HadronsLogger HadronsLogError;
@ -110,6 +112,8 @@ extern HadronsLogger HadronsLogMessage;
extern HadronsLogger HadronsLogIterative;
extern HadronsLogger HadronsLogDebug;
void initLogger(void);
// singleton pattern
#define SINGLETON(name)\
public:\
@ -135,9 +139,6 @@ public:\
private:\
name(void) = default;
// pretty size formating
std::string sizeString(long unsigned int bytes);
// type utilities
template <typename T>
const std::type_info * typeIdPt(const T &x)
@ -166,14 +167,21 @@ std::string typeName(void)
}
// default writers/readers
extern const std::string resultFileExt;
#ifdef HAVE_HDF5
typedef Hdf5Reader CorrReader;
typedef Hdf5Writer CorrWriter;
typedef Hdf5Reader ResultReader;
typedef Hdf5Writer ResultWriter;
#else
typedef XmlReader CorrReader;
typedef XmlWriter CorrWriter;
typedef XmlReader ResultReader;
typedef XmlWriter ResultWriter;
#endif
#define RESULT_FILE_NAME(name) \
name + "." + std::to_string(vm().getTrajectory()) + "." + resultFileExt
END_HADRONS_NAMESPACE
#include <Grid/Hadrons/Exceptions.hpp>
#endif // Hadrons_Global_hpp_

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Graph.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -185,7 +184,7 @@ void Graph<T>::removeVertex(const T &value)
}
else
{
HADRON_ERROR("vertex " << value << " does not exists");
HADRON_ERROR(Range, "vertex does not exists");
}
// remove all edges containing the vertex
@ -214,7 +213,7 @@ void Graph<T>::removeEdge(const Edge &e)
}
else
{
HADRON_ERROR("edge " << e << " does not exists");
HADRON_ERROR(Range, "edge does not exists");
}
}
@ -260,7 +259,7 @@ void Graph<T>::mark(const T &value, const bool doMark)
}
else
{
HADRON_ERROR("vertex " << value << " does not exists");
HADRON_ERROR(Range, "vertex does not exists");
}
}
@ -298,7 +297,7 @@ bool Graph<T>::isMarked(const T &value) const
}
else
{
HADRON_ERROR("vertex " << value << " does not exists");
HADRON_ERROR(Range, "vertex does not exists");
return false;
}
@ -430,7 +429,7 @@ std::vector<T> Graph<T>::getAdjacentVertices(const T &value) const
{
return ((e.first == value) or (e.second == value));
};
auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
auto eIt = std::find_if(edgeSet_.begin(), edgeSet_.end(), pred);
while (eIt != edgeSet_.end())
{
@ -442,7 +441,7 @@ std::vector<T> Graph<T>::getAdjacentVertices(const T &value) const
{
adjacentVertex.push_back((*eIt).first);
}
eIt = find_if(++eIt, edgeSet_.end(), pred);
eIt = std::find_if(++eIt, edgeSet_.end(), pred);
}
return adjacentVertex;
@ -458,12 +457,12 @@ std::vector<T> Graph<T>::getChildren(const T &value) const
{
return (e.first == value);
};
auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
auto eIt = std::find_if(edgeSet_.begin(), edgeSet_.end(), pred);
while (eIt != edgeSet_.end())
{
child.push_back((*eIt).second);
eIt = find_if(++eIt, edgeSet_.end(), pred);
eIt = std::find_if(++eIt, edgeSet_.end(), pred);
}
return child;
@ -479,12 +478,12 @@ std::vector<T> Graph<T>::getParents(const T &value) const
{
return (e.second == value);
};
auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
auto eIt = std::find_if(edgeSet_.begin(), edgeSet_.end(), pred);
while (eIt != edgeSet_.end())
{
parent.push_back((*eIt).first);
eIt = find_if(++eIt, edgeSet_.end(), pred);
eIt = std::find_if(++eIt, edgeSet_.end(), pred);
}
return parent;
@ -544,7 +543,7 @@ std::vector<T> Graph<T>::topoSort(void)
{
if (tmpMarked.at(v))
{
HADRON_ERROR("cannot topologically sort a cyclic graph");
HADRON_ERROR(Range, "cannot topologically sort a cyclic graph");
}
if (!isMarked(v))
{
@ -603,7 +602,7 @@ std::vector<T> Graph<T>::topoSort(Gen &gen)
{
if (tmpMarked.at(v))
{
HADRON_ERROR("cannot topologically sort a cyclic graph");
HADRON_ERROR(Range, "cannot topologically sort a cyclic graph");
}
if (!isMarked(v))
{

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/HadronsXmlRun.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -55,12 +54,6 @@ int main(int argc, char *argv[])
// initialization
Grid_init(&argc, &argv);
HadronsLogError.Active(GridLogError.isActive());
HadronsLogWarning.Active(GridLogWarning.isActive());
HadronsLogMessage.Active(GridLogMessage.isActive());
HadronsLogIterative.Active(GridLogIterative.isActive());
HadronsLogDebug.Active(GridLogDebug.isActive());
LOG(Message) << "Grid initialized" << std::endl;
// execution
Application application(parameterFileName);

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/HadronsXmlSchedule.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -49,12 +48,6 @@ int main(int argc, char *argv[])
// initialization
Grid_init(&argc, &argv);
HadronsLogError.Active(GridLogError.isActive());
HadronsLogWarning.Active(GridLogWarning.isActive());
HadronsLogMessage.Active(GridLogMessage.isActive());
HadronsLogIterative.Active(GridLogIterative.isActive());
HadronsLogDebug.Active(GridLogDebug.isActive());
LOG(Message) << "Grid initialized" << std::endl;
// execution
Application application;

View File

@ -7,20 +7,24 @@ libHadrons_a_SOURCES = \
$(modules_cc) \
Application.cc \
Environment.cc \
Exceptions.cc \
Global.cc \
Module.cc
Module.cc \
VirtualMachine.cc
libHadrons_adir = $(pkgincludedir)/Hadrons
nobase_libHadrons_a_HEADERS = \
$(modules_hpp) \
Application.hpp \
Environment.hpp \
Exceptions.hpp \
Factory.hpp \
GeneticScheduler.hpp \
Global.hpp \
Graph.hpp \
Module.hpp \
Modules.hpp \
ModuleFactory.hpp
ModuleFactory.hpp \
VirtualMachine.hpp
HadronsXmlRun_SOURCES = HadronsXmlRun.cc
HadronsXmlRun_LDADD = libHadrons.a -lGrid

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Module.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -39,7 +38,6 @@ using namespace Hadrons;
// constructor /////////////////////////////////////////////////////////////////
ModuleBase::ModuleBase(const std::string name)
: name_(name)
, env_(Environment::getInstance())
{}
// access //////////////////////////////////////////////////////////////////////
@ -48,15 +46,10 @@ std::string ModuleBase::getName(void) const
return name_;
}
Environment & ModuleBase::env(void) const
{
return env_;
}
// get factory registration name if available
std::string ModuleBase::getRegisteredName(void)
{
HADRON_ERROR("module '" + getName() + "' has a type not registered"
HADRON_ERROR(Definition, "module '" + getName() + "' has no registered type"
+ " in the factory");
}
@ -64,8 +57,5 @@ std::string ModuleBase::getRegisteredName(void)
void ModuleBase::operator()(void)
{
setup();
if (!env().isDryRun())
{
execute();
}
execute();
}

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Module.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -31,7 +30,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
#define Hadrons_Module_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Environment.hpp>
#include <Grid/Hadrons/VirtualMachine.hpp>
BEGIN_HADRONS_NAMESPACE
@ -87,6 +86,56 @@ public:\
static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance;
#define ARG(...) __VA_ARGS__
#define MACRO_REDIRECT(arg1, arg2, arg3, macro, ...) macro
#define envGet(type, name)\
*env().template getObject<type>(name)
#define envGetTmp(type, var)\
type &var = *env().template getObject<type>(getName() + "_tmp_" + #var)
#define envHasType(type, name)\
env().template isObjectOfType<type>(name)
#define envCreate(type, name, Ls, ...)\
env().template createObject<type>(name, Environment::Storage::object, Ls, __VA_ARGS__)
#define envCreateDerived(base, type, name, Ls, ...)\
env().template createDerivedObject<base, type>(name, Environment::Storage::object, Ls, __VA_ARGS__)
#define envCreateLat4(type, name)\
envCreate(type, name, 1, env().getGrid())
#define envCreateLat5(type, name, Ls)\
envCreate(type, name, Ls, env().getGrid(Ls))
#define envCreateLat(...)\
MACRO_REDIRECT(__VA_ARGS__, envCreateLat5, envCreateLat4)(__VA_ARGS__)
#define envCache(type, name, Ls, ...)\
env().template createObject<type>(name, Environment::Storage::cache, Ls, __VA_ARGS__)
#define envCacheLat4(type, name)\
envCache(type, name, 1, env().getGrid())
#define envCacheLat5(type, name, Ls)\
envCache(type, name, Ls, env().getGrid(Ls))
#define envCacheLat(...)\
MACRO_REDIRECT(__VA_ARGS__, envCacheLat5, envCacheLat4)(__VA_ARGS__)
#define envTmp(type, name, Ls, ...)\
env().template createObject<type>(getName() + "_tmp_" + name, \
Environment::Storage::temporary, Ls, __VA_ARGS__)
#define envTmpLat4(type, name)\
envTmp(type, name, 1, env().getGrid())
#define envTmpLat5(type, name, Ls)\
envTmp(type, name, Ls, env().getGrid(Ls))
#define envTmpLat(...)\
MACRO_REDIRECT(__VA_ARGS__, envTmpLat5, envTmpLat4)(__VA_ARGS__)
/******************************************************************************
* Module class *
@ -101,23 +150,30 @@ public:
virtual ~ModuleBase(void) = default;
// access
std::string getName(void) const;
Environment &env(void) const;
// get factory registration name if available
virtual std::string getRegisteredName(void);
// dependencies/products
virtual std::vector<std::string> getInput(void) = 0;
virtual std::vector<std::string> getReference(void)
{
return std::vector<std::string>(0);
};
virtual std::vector<std::string> getOutput(void) = 0;
// parse parameters
virtual void parseParameters(XmlReader &reader, const std::string name) = 0;
virtual void saveParameters(XmlWriter &writer, const std::string name) = 0;
// setup
virtual void setup(void) {};
virtual void execute(void) = 0;
// execution
void operator()(void);
virtual void execute(void) = 0;
protected:
// environment shortcut
DEFINE_ENV_ALIAS;
// virtual machine shortcut
DEFINE_VM_ALIAS;
private:
std::string name_;
Environment &env_;
};
// derived class, templating the parameter class

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/ModuleFactory.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>

View File

@ -1,27 +1,64 @@
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Grid/Hadrons/Modules/MContraction/WardIdentity.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
#include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Grid/Hadrons/Modules/MSource/Point.hpp>
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
#include <Grid/Hadrons/Modules/MSource/Z2.hpp>
#include <Grid/Hadrons/Modules/MFermion/GaugeProp.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqConserved.hpp>
#include <Grid/Hadrons/Modules/MSink/Smear.hpp>
#include <Grid/Hadrons/Modules/MSink/Point.hpp>
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
#include <Grid/Hadrons/Modules/MGauge/FundtoHirep.hpp>
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MAction/WilsonClover.hpp>
#include <Grid/Hadrons/Modules/MUtilities/TestSeqGamma.hpp>
#include <Grid/Hadrons/Modules/MUtilities/TestSeqConserved.hpp>
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
#include <Grid/Hadrons/Modules/MAction/DWF.hpp>
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
#include <Grid/Hadrons/Modules/MAction/WilsonClover.hpp>
#include <Grid/Hadrons/Modules/MScalarSUN/TrMag.hpp>
#include <Grid/Hadrons/Modules/MScalarSUN/TwoPoint.hpp>
#include <Grid/Hadrons/Modules/MScalarSUN/TrPhi.hpp>
#include <Grid/Hadrons/Modules/MIO/LoadNersc.hpp>
#include <Grid/Hadrons/Modules/MIO/LoadBinary.hpp>

View File

@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MAction/DWF.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -65,6 +65,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -103,35 +104,29 @@ std::vector<std::string> TDWF<FImpl>::getOutput(void)
template <typename FImpl>
void TDWF<FImpl>::setup(void)
{
unsigned int size;
size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
env().registerObject(getName(), size, par().Ls);
LOG(Message) << "Setting up domain wall fermion matrix with m= "
<< par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
env().createGrid(par().Ls);
auto &U = envGet(LatticeGaugeField, par().gauge);
auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
envCreateDerived(FMat, DomainWallFermion<FImpl>, getName(), par().Ls, U, g5,
grb5, g4, grb4, par().mass, par().M5, implParams);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDWF<FImpl>::execute(void)
{
LOG(Message) << "Setting up domain wall fermion matrix with m= "
<< par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
env().createGrid(par().Ls);
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
par().mass, par().M5,
implParams);
env().setObject(getName(), fMatPt);
}
{}
END_MODULE_NAMESPACE

View File

@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MAction/Wilson.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -63,6 +63,7 @@ public:
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -101,29 +102,24 @@ std::vector<std::string> TWilson<FImpl>::getOutput(void)
template <typename FImpl>
void TWilson<FImpl>::setup(void)
{
unsigned int size;
size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
env().registerObject(getName(), size);
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
auto &U = envGet(LatticeGaugeField, par().gauge);
auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid();
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
envCreateDerived(FMat, WilsonFermion<FImpl>, getName(), 1, U, grid, gridRb,
par().mass, implParams);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWilson<FImpl>::execute()
{
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid();
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
implParams);
env().setObject(getName(), fMatPt);
}
{}
END_MODULE_NAMESPACE

View File

@ -105,34 +105,45 @@ std::vector<std::string> TWilsonClover<FImpl>::getOutput(void)
template <typename FImpl>
void TWilsonClover<FImpl>::setup(void)
{
unsigned int size;
//unsigned int size;
// size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
// env().registerObject(getName(), size);
LOG(Message) << "Setting up TWilsonClover fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
LOG(Message) << "Clover term csw_r: " << par().csw_r
<< " csw_t: " << par().csw_t
<< std::endl;
auto &U = envGet(LatticeGaugeField, par().gauge);
auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid();
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename WilsonCloverFermion<FImpl>::ImplParams implParams(boundary);
envCreateDerived(FMat, WilsonCloverFermion<FImpl>, getName(), 1, U, grid, gridRb, par().mass,
par().csw_r,
par().csw_t,
par().clover_anisotropy,
implParams);
//FMat *fMatPt = new WilsonCloverFermion<FImpl>(U, grid, gridRb, par().mass,
// par().csw_r,
// par().csw_t,
// par().clover_anisotropy,
// implParams);
//env().setObject(getName(), fMatPt);
size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
env().registerObject(getName(), size);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWilsonClover<FImpl>::execute()
{
LOG(Message) << "Setting up TWilsonClover fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
LOG(Message) << "clover term csw_r= " << par().csw_r
<< " csw_t= " << par().csw_t
<< std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid();
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename WilsonCloverFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new WilsonCloverFermion<FImpl>(U, grid, gridRb, par().mass,
par().csw_r,
par().csw_t,
par().clover_anisotropy,
implParams);
env().setObject(getName(), fMatPt);
}
END_MODULE_NAMESPACE

View File

@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Baryon.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -72,6 +72,9 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
@ -99,11 +102,18 @@ std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getInput(void)
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
std::vector<std::string> out = {};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TBaryon<FImpl1, FImpl2, FImpl3>::setup(void)
{
envTmpLat(LatticeComplex, "c");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
@ -112,14 +122,12 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
<< par().q3 << "'" << std::endl;
std::string output_name = par().output + "." + std::to_string(env().getTrajectory());
CorrWriter writer(output_name);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);
LatticeComplex c(env().getGrid());
Result result;
ResultWriter writer(RESULT_FILE_NAME(par().output));
auto &q1 = envGet(PropagatorField1, par().q1);
auto &q2 = envGet(PropagatorField2, par().q2);
auto &q3 = envGet(PropagatorField3, par().q2);
envGetTmp(LatticeComplex, c);
Result result;
// FIXME: do contractions

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -68,6 +69,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -97,7 +99,7 @@ std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
std::vector<std::string> out = {};
return out;
}
@ -106,7 +108,7 @@ std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
template <typename FImpl>
void TDiscLoop<FImpl>::setup(void)
{
envTmpLat(LatticeComplex, "c");
}
// execution ///////////////////////////////////////////////////////////////////
@ -117,13 +119,13 @@ void TDiscLoop<FImpl>::execute(void)
<< "' using '" << par().q_loop << "' with " << par().gamma
<< " insertion." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
LatticeComplex c(env().getGrid());
ResultWriter writer(RESULT_FILE_NAME(par().output));
auto &q_loop = envGet(PropagatorField, par().q_loop);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
envGetTmp(LatticeComplex, c);
c = trace(gamma*q_loop);
sliceSum(c, buf, Tp);

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -51,6 +52,14 @@ BEGIN_HADRONS_NAMESPACE
* q1
*
* trace(g5*q1*adj(q2)*g5*gamma*q3)
*
* options:
* - q1: sink smeared propagator, source at i
* - q2: propagator, source at i
* - q3: propagator, source at f
* - gamma: gamma matrix to insert
* - tSnk: sink position for propagator q1.
*
*/
/******************************************************************************
@ -66,6 +75,7 @@ public:
std::string, q2,
std::string, q3,
Gamma::Algebra, gamma,
unsigned int, tSnk,
std::string, output);
};
@ -90,6 +100,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -119,7 +130,7 @@ std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
std::vector<std::string> out = {};
return out;
}
@ -128,7 +139,7 @@ std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
{
envTmpLat(LatticeComplex, "c");
}
// execution ///////////////////////////////////////////////////////////////////
@ -140,17 +151,22 @@ void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
<< par().q3 << "', with " << par().gamma << " insertion."
<< std::endl;
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
LatticeComplex c(env().getGrid());
// Initialise variables. q2 and q3 are normal propagators, q1 may be
// sink smeared.
ResultWriter writer(RESULT_FILE_NAME(par().output));
auto &q1 = envGet(SlicedPropagator1, par().q1);
auto &q2 = envGet(PropagatorField2, par().q2);
auto &q3 = envGet(PropagatorField2, par().q3);
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
// Extract relevant timeslice of sinked propagator q1, then contract &
// sum over all spacial positions of gamma insertion.
SitePropagator1 q1Snk = q1[par().tSnk];
envGetTmp(LatticeComplex, c);
c = trace(g5*q1Snk*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;

View File

@ -4,12 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -51,8 +49,7 @@ BEGIN_HADRONS_NAMESPACE
in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>").
Special values: "all" - perform all possible contractions.
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."),
given as multiples of (2*pi) / L.
- sink: module to compute the sink to use in contraction (string).
*/
/******************************************************************************
@ -98,6 +95,9 @@ public:
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
virtual void parseGammaString(std::vector<GammaPair> &gammaList);
protected:
// execution
virtual void setup(void);
// execution
virtual void execute(void);
};
@ -125,7 +125,7 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void)
template <typename FImpl1, typename FImpl2>
std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
{
std::vector<std::string> output = {getName()};
std::vector<std::string> output = {};
return output;
}
@ -151,9 +151,15 @@ void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
{
// Parse individual contractions from input string.
gammaList = strToVec<GammaPair>(par().gammas);
}
}
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::setup(void)
{
envTmpLat(LatticeComplex, "c");
}
// execution ///////////////////////////////////////////////////////////////////
#define mesonConnected(q1, q2, gSnk, gSrc) \
@ -166,9 +172,7 @@ void TMeson<FImpl1, FImpl2>::execute(void)
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'"
<< std::endl;
std::string output_name = par().output + "." + std::to_string(env().getTrajectory());
CorrWriter writer(output_name);
ResultWriter writer(RESULT_FILE_NAME(par().output));
std::vector<TComplex> buf;
std::vector<Result> result;
Gamma g5(Gamma::Algebra::Gamma5);
@ -183,11 +187,11 @@ void TMeson<FImpl1, FImpl2>::execute(void)
result[i].gamma_src = gammaList[i].second;
result[i].corr.resize(nt);
}
if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and
env().template isObjectOfType<SlicedPropagator2>(par().q2))
if (envHasType(SlicedPropagator1, par().q1) and
envHasType(SlicedPropagator2, par().q2))
{
SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1);
SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2);
auto &q1 = envGet(SlicedPropagator1, par().q1);
auto &q2 = envGet(SlicedPropagator2, par().q2);
LOG(Message) << "(propagator already sinked)" << std::endl;
for (unsigned int i = 0; i < result.size(); ++i)
@ -203,10 +207,10 @@ void TMeson<FImpl1, FImpl2>::execute(void)
}
else
{
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
auto &q1 = envGet(PropagatorField1, par().q1);
auto &q2 = envGet(PropagatorField2, par().q2);
envGetTmp(LatticeComplex, c);
LOG(Message) << "(using sink '" << par().sink << "')" << std::endl;
for (unsigned int i = 0; i < result.size(); ++i)
{
@ -214,18 +218,17 @@ void TMeson<FImpl1, FImpl2>::execute(void)
Gamma gSrc(gammaList[i].second);
std::string ns;
ns = env().getModuleNamespace(env().getObjectModule(par().sink));
ns = vm().getModuleNamespace(env().getObjectModule(par().sink));
if (ns == "MSource")
{
PropagatorField1 &sink =
*env().template getObject<PropagatorField1>(par().sink);
PropagatorField1 &sink = envGet(PropagatorField1, par().sink);
c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink);
sliceSum(c, buf, Tp);
}
else if (ns == "MSink")
{
SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink);
SinkFnScalar &sink = envGet(SinkFnScalar, par().sink);
c = trace(mesonConnected(q1, q2, gSnk, gSrc));
buf = sink(c);

View File

@ -0,0 +1,224 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WardIdentity.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_WardIdentity_hpp_
#define Hadrons_MContraction_WardIdentity_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Ward Identity contractions
-----------------------------
* options:
- q: propagator, 5D if available (string)
- action: action module used for propagator solution (string)
- mass: mass of quark (double)
- test_axial: whether or not to test PCAC relation.
*/
/******************************************************************************
* WardIdentity *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class WardIdentityPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WardIdentityPar,
std::string, q,
std::string, action,
double, mass,
bool, test_axial);
};
template <typename FImpl>
class TWardIdentity: public Module<WardIdentityPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TWardIdentity(const std::string name);
// destructor
virtual ~TWardIdentity(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
unsigned int Ls_;
};
MODULE_REGISTER_NS(WardIdentity, TWardIdentity<FIMPL>, MContraction);
/******************************************************************************
* TWardIdentity implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TWardIdentity<FImpl>::TWardIdentity(const std::string name)
: Module<WardIdentityPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TWardIdentity<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().action};
return in;
}
template <typename FImpl>
std::vector<std::string> TWardIdentity<FImpl>::getOutput(void)
{
std::vector<std::string> out = {};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWardIdentity<FImpl>::setup(void)
{
Ls_ = env().getObjectLs(par().q);
if (Ls_ != env().getObjectLs(par().action))
{
HADRON_ERROR(Size, "Ls mismatch between quark action and propagator");
}
envTmpLat(PropagatorField, "tmp");
envTmpLat(PropagatorField, "vector_WI");
if (par().test_axial)
{
envTmpLat(PropagatorField, "psi");
envTmpLat(LatticeComplex, "PP");
envTmpLat(LatticeComplex, "axial_defect");
envTmpLat(LatticeComplex, "PJ5q");
}
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWardIdentity<FImpl>::execute(void)
{
LOG(Message) << "Performing Ward Identity checks for quark '" << par().q
<< "'." << std::endl;
auto &q = envGet(PropagatorField, par().q);
auto &act = envGet(FMat, par().action);
Gamma g5(Gamma::Algebra::Gamma5);
// Compute D_mu V_mu, D here is backward derivative.
envGetTmp(PropagatorField, tmp);
envGetTmp(PropagatorField, vector_WI);
vector_WI = zero;
for (unsigned int mu = 0; mu < Nd; ++mu)
{
act.ContractConservedCurrent(q, q, tmp, Current::Vector, mu);
tmp -= Cshift(tmp, mu, -1);
vector_WI += tmp;
}
// Test ward identity D_mu V_mu = 0;
LOG(Message) << "Vector Ward Identity check Delta_mu V_mu = "
<< norm2(vector_WI) << std::endl;
if (par().test_axial)
{
envGetTmp(PropagatorField, psi);
envGetTmp(LatticeComplex, PP);
envGetTmp(LatticeComplex, axial_defect);
envGetTmp(LatticeComplex, PJ5q);
std::vector<TComplex> axial_buf;
// Compute <P|D_mu A_mu>, D is backwards derivative.
axial_defect = zero;
for (unsigned int mu = 0; mu < Nd; ++mu)
{
act.ContractConservedCurrent(q, q, tmp, Current::Axial, mu);
tmp -= Cshift(tmp, mu, -1);
axial_defect += trace(g5*tmp);
}
// Get <P|J5q> for 5D (zero for 4D) and <P|P>.
PJ5q = zero;
if (Ls_ > 1)
{
// <P|P>
ExtractSlice(tmp, q, 0, 0);
psi = 0.5 * (tmp - g5*tmp);
ExtractSlice(tmp, q, Ls_ - 1, 0);
psi += 0.5 * (tmp + g5*tmp);
PP = trace(adj(psi)*psi);
// <P|5Jq>
ExtractSlice(tmp, q, Ls_/2 - 1, 0);
psi = 0.5 * (tmp + g5*tmp);
ExtractSlice(tmp, q, Ls_/2, 0);
psi += 0.5 * (tmp - g5*tmp);
PJ5q = trace(adj(psi)*psi);
}
else
{
PP = trace(adj(q)*q);
}
// Test ward identity <P|D_mu A_mu> = 2m<P|P> + 2<P|J5q>
LOG(Message) << "|D_mu A_mu|^2 = " << norm2(axial_defect) << std::endl;
LOG(Message) << "|PP|^2 = " << norm2(PP) << std::endl;
LOG(Message) << "|PJ5q|^2 = " << norm2(PJ5q) << std::endl;
LOG(Message) << "Axial Ward Identity defect Delta_mu A_mu = "
<< norm2(axial_defect) << std::endl;
// Axial defect by timeslice.
axial_defect -= 2.*(par().mass*PP + PJ5q);
LOG(Message) << "Check Axial defect by timeslice" << std::endl;
sliceSum(axial_defect, axial_buf, Tp);
for (int t = 0; t < axial_buf.size(); ++t)
{
LOG(Message) << "t = " << t << ": "
<< TensorRemove(axial_buf[t]) << std::endl;
}
}
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WardIdentity_hpp_

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -76,6 +77,7 @@ public:
std::string, q2,
std::string, q3,
std::string, q4,
unsigned int, tSnk,
std::string, output);
};
@ -99,11 +101,13 @@ public:\
/* dependency relation */ \
virtual std::vector<std::string> getInput(void);\
virtual std::vector<std::string> getOutput(void);\
public:\
std::vector<std::string> VA_label = {"V", "A"};\
protected:\
/* setup */ \
virtual void setup(void);\
/* execution */ \
virtual void execute(void);\
std::vector<std::string> VA_label = {"V", "A"};\
};\
MODULE_REGISTER_NS(modname, T##modname, MContraction);

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -54,6 +55,8 @@ using namespace MContraction;
*
* S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2])
* E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2])
*
* Note q1 must be sink smeared.
*/
/******************************************************************************
@ -74,7 +77,7 @@ std::vector<std::string> TWeakHamiltonianEye::getInput(void)
std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
std::vector<std::string> out = {};
return out;
}
@ -82,7 +85,15 @@ std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::setup(void)
{
unsigned int ndim = env().getNd();
envTmpLat(LatticeComplex, "expbuf");
envTmpLat(PropagatorField, "tmp1");
envTmpLat(LatticeComplex, "tmp2");
envTmp(std::vector<PropagatorField>, "S_body", 1, ndim, PropagatorField(env().getGrid()));
envTmp(std::vector<PropagatorField>, "S_loop", 1, ndim, PropagatorField(env().getGrid()));
envTmp(std::vector<LatticeComplex>, "E_body", 1, ndim, LatticeComplex(env().getGrid()));
envTmp(std::vector<LatticeComplex>, "E_loop", 1, ndim, LatticeComplex(env().getGrid()));
}
// execution ///////////////////////////////////////////////////////////////////
@ -93,28 +104,31 @@ void TWeakHamiltonianEye::execute(void)
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_eye_diag);
unsigned int ndim = env().getNd();
ResultWriter writer(RESULT_FILE_NAME(par().output));
auto &q1 = envGet(SlicedPropagator, par().q1);
auto &q2 = envGet(PropagatorField, par().q2);
auto &q3 = envGet(PropagatorField, par().q3);
auto &q4 = envGet(PropagatorField, par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_eye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> S_body(ndim, tmp1);
std::vector<PropagatorField> S_loop(ndim, tmp1);
std::vector<LatticeComplex> E_body(ndim, tmp2);
std::vector<LatticeComplex> E_loop(ndim, tmp2);
envGetTmp(LatticeComplex, expbuf);
envGetTmp(PropagatorField, tmp1);
envGetTmp(LatticeComplex, tmp2);
envGetTmp(std::vector<PropagatorField>, S_body);
envGetTmp(std::vector<PropagatorField>, S_loop);
envGetTmp(std::vector<LatticeComplex>, E_body);
envGetTmp(std::vector<LatticeComplex>, E_loop);
// Get sink timeslice of q1.
SitePropagator q1Snk = q1[par().tSnk];
// Setup for S-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu]));
S_body[mu] = MAKE_SE_BODY(q1Snk, q2, q3, GammaL(Gamma::gmu[mu]));
S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu]));
}

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -76,7 +77,7 @@ std::vector<std::string> TWeakHamiltonianNonEye::getInput(void)
std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
std::vector<std::string> out = {};
return out;
}
@ -84,7 +85,15 @@ std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::setup(void)
{
unsigned int ndim = env().getNd();
envTmpLat(LatticeComplex, "expbuf");
envTmpLat(PropagatorField, "tmp1");
envTmpLat(LatticeComplex, "tmp2");
envTmp(std::vector<PropagatorField>, "C_i_side_loop", 1, ndim, PropagatorField(env().getGrid()));
envTmp(std::vector<PropagatorField>, "C_f_side_loop", 1, ndim, PropagatorField(env().getGrid()));
envTmp(std::vector<LatticeComplex>, "W_i_side_loop", 1, ndim, LatticeComplex(env().getGrid()));
envTmp(std::vector<LatticeComplex>, "W_f_side_loop", 1, ndim, LatticeComplex(env().getGrid()));
}
// execution ///////////////////////////////////////////////////////////////////
@ -95,23 +104,23 @@ void TWeakHamiltonianNonEye::execute(void)
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
ResultWriter writer(RESULT_FILE_NAME(par().output));
auto &q1 = envGet(PropagatorField, par().q1);
auto &q2 = envGet(PropagatorField, par().q2);
auto &q3 = envGet(PropagatorField, par().q3);
auto &q4 = envGet(PropagatorField, par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_noneye_diag);
unsigned int ndim = env().getNd();
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> C_i_side_loop(ndim, tmp1);
std::vector<PropagatorField> C_f_side_loop(ndim, tmp1);
std::vector<LatticeComplex> W_i_side_loop(ndim, tmp2);
std::vector<LatticeComplex> W_f_side_loop(ndim, tmp2);
envGetTmp(LatticeComplex, expbuf);
envGetTmp(PropagatorField, tmp1);
envGetTmp(LatticeComplex, tmp2);
envGetTmp(std::vector<PropagatorField>, C_i_side_loop);
envGetTmp(std::vector<PropagatorField>, C_f_side_loop);
envGetTmp(std::vector<LatticeComplex>, W_i_side_loop);
envGetTmp(std::vector<LatticeComplex>, W_f_side_loop);
// Setup for C-type contractions.
for (int mu = 0; mu < ndim; ++mu)

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -78,7 +79,7 @@ std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
{
std::vector<std::string> out = {getName()};
std::vector<std::string> out = {};
return out;
}
@ -86,7 +87,13 @@ std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
// setup ///////////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::setup(void)
{
unsigned int ndim = env().getNd();
envTmpLat(LatticeComplex, "expbuf");
envTmpLat(PropagatorField, "tmp");
envTmpLat(LatticeComplex, "curr");
envTmp(std::vector<PropagatorField>, "meson", 1, ndim, PropagatorField(env().getGrid()));
envTmp(std::vector<PropagatorField>, "loop", 1, ndim, PropagatorField(env().getGrid()));
}
// execution ///////////////////////////////////////////////////////////////////
@ -97,21 +104,21 @@ void TWeakNeutral4ptDisc::execute(void)
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
ResultWriter writer(RESULT_FILE_NAME(par().output));
auto &q1 = envGet(PropagatorField, par().q1);
auto &q2 = envGet(PropagatorField, par().q2);
auto &q3 = envGet(PropagatorField, par().q3);
auto &q4 = envGet(PropagatorField, par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_neut_disc_diag);
unsigned int ndim = env().getNd();
unsigned int ndim = env().getNd();
PropagatorField tmp(env().getGrid());
std::vector<PropagatorField> meson(ndim, tmp);
std::vector<PropagatorField> loop(ndim, tmp);
LatticeComplex curr(env().getGrid());
envGetTmp(LatticeComplex, expbuf);
envGetTmp(PropagatorField, tmp);
envGetTmp(LatticeComplex, curr);
envGetTmp(std::vector<PropagatorField>, meson);
envGetTmp(std::vector<PropagatorField>, loop);
// Setup for type 1 contractions.
for (int mu = 0; mu < ndim; ++mu)

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@ -1,3 +1,32 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MFermion/GaugeProp.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MFermion_GaugeProp_hpp_
#define Hadrons_MFermion_GaugeProp_hpp_
@ -7,6 +36,27 @@
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* 5D -> 4D and 4D -> 5D conversions. *
******************************************************************************/
template<class vobj> // Note that 5D object is modified.
inline void make_4D(Lattice<vobj> &in_5d, Lattice<vobj> &out_4d, int Ls)
{
axpby_ssp_pminus(in_5d, 0., in_5d, 1., in_5d, 0, 0);
axpby_ssp_pplus(in_5d, 1., in_5d, 1., in_5d, 0, Ls-1);
ExtractSlice(out_4d, in_5d, 0, 0);
}
template<class vobj>
inline void make_5D(Lattice<vobj> &in_4d, Lattice<vobj> &out_5d, int Ls)
{
out_5d = zero;
InsertSlice(in_4d, out_5d, 0, 0);
InsertSlice(in_4d, out_5d, Ls-1, 0);
axpby_ssp_pplus(out_5d, 0., out_5d, 1., out_5d, 0, 0);
axpby_ssp_pminus(out_5d, 0., out_5d, 1., out_5d, Ls-1, Ls-1);
}
/******************************************************************************
* GaugeProp *
******************************************************************************/
@ -33,6 +83,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -74,10 +125,13 @@ template <typename FImpl>
void TGaugeProp<FImpl>::setup(void)
{
Ls_ = env().getObjectLs(par().solver);
env().template registerLattice<PropagatorField>(getName());
envCreateLat(PropagatorField, getName());
envTmpLat(FermionField, "source", Ls_);
envTmpLat(FermionField, "sol", Ls_);
envTmpLat(FermionField, "tmp");
if (Ls_ > 1)
{
env().template registerLattice<PropagatorField>(getName() + "_5d", Ls_);
envCreateLat(PropagatorField, getName() + "_5d", Ls_);
}
}
@ -86,26 +140,23 @@ template <typename FImpl>
void TGaugeProp<FImpl>::execute(void)
{
LOG(Message) << "Computing quark propagator '" << getName() << "'"
<< std::endl;
<< std::endl;
FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)),
tmp(env().getGrid());
std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d");
PropagatorField &prop = *env().template createLattice<PropagatorField>(propName);
PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source);
SolverFn &solver = *env().template getObject<SolverFn>(par().solver);
if (Ls_ > 1)
{
env().template createLattice<PropagatorField>(getName());
}
std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d");
auto &prop = envGet(PropagatorField, propName);
auto &fullSrc = envGet(PropagatorField, par().source);
auto &solver = envGet(SolverFn, par().solver);
envGetTmp(FermionField, source);
envGetTmp(FermionField, sol);
envGetTmp(FermionField, tmp);
LOG(Message) << "Inverting using solver '" << par().solver
<< "' on source '" << par().source << "'" << std::endl;
<< "' on source '" << par().source << "'" << std::endl;
for (unsigned int s = 0; s < Ns; ++s)
for (unsigned int c = 0; c < FImpl::Dimension; ++c)
{
LOG(Message) << "Inversion for spin= " << s << ", color= " << c
<< std::endl;
<< std::endl;
// source conversion for 4D sources
if (!env().isObject5d(par().source))
{
@ -115,12 +166,8 @@ void TGaugeProp<FImpl>::execute(void)
}
else
{
source = zero;
PropToFerm<FImpl>(tmp, fullSrc, s, c);
InsertSlice(tmp, source, 0, 0);
InsertSlice(tmp, source, Ls_-1, 0);
axpby_ssp_pplus(source, 0., source, 1., source, 0, 0);
axpby_ssp_pminus(source, 0., source, 1., source, Ls_-1, Ls_-1);
make_5D(tmp, source, Ls_);
}
}
// source conversion for 5D sources
@ -128,7 +175,7 @@ void TGaugeProp<FImpl>::execute(void)
{
if (Ls_ != env().getObjectLs(par().source))
{
HADRON_ERROR("Ls mismatch between quark action and source");
HADRON_ERROR(Size, "Ls mismatch between quark action and source");
}
else
{
@ -141,12 +188,8 @@ void TGaugeProp<FImpl>::execute(void)
// create 4D propagators from 5D one if necessary
if (Ls_ > 1)
{
PropagatorField &p4d =
*env().template getObject<PropagatorField>(getName());
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1);
ExtractSlice(tmp, sol, 0, 0);
PropagatorField &p4d = envGet(PropagatorField, getName());
make_4D(sol, tmp, Ls_);
FermToProp<FImpl>(p4d, tmp, s, c);
}
}

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Random.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -44,7 +43,9 @@ TRandom::TRandom(const std::string name)
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TRandom::getInput(void)
{
return std::vector<std::string>();
std::vector<std::string> in;
return in;
}
std::vector<std::string> TRandom::getOutput(void)
@ -57,13 +58,14 @@ std::vector<std::string> TRandom::getOutput(void)
// setup ///////////////////////////////////////////////////////////////////////
void TRandom::setup(void)
{
env().registerLattice<LatticeGaugeField>(getName());
envCreateLat(LatticeGaugeField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TRandom::execute(void)
{
LOG(Message) << "Generating random gauge configuration" << std::endl;
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
auto &U = envGet(LatticeGaugeField, getName());
SU3::HotConfiguration(*env().get4dRng(), U);
}

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Random.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -51,6 +50,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution

View File

@ -4,9 +4,9 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/StochEm.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -57,32 +57,28 @@ std::vector<std::string> TStochEm::getOutput(void)
// setup ///////////////////////////////////////////////////////////////////////
void TStochEm::setup(void)
{
if (!env().hasRegisteredObject("_" + getName() + "_weight"))
if (!env().hasCreatedObject("_" + getName() + "_weight"))
{
env().registerLattice<EmComp>("_" + getName() + "_weight");
envCacheLat(EmComp, "_" + getName() + "_weight");
}
env().registerLattice<EmField>(getName());
envCreateLat(EmField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TStochEm::execute(void)
{
LOG(Message) << "Generating stochatic EM potential..." << std::endl;
PhotonR photon(par().gauge, par().zmScheme);
EmField &a = *env().createLattice<EmField>(getName());
EmComp *w;
auto &a = envGet(EmField, getName());
auto &w = envGet(EmComp, "_" + getName() + "_weight");
if (!env().hasCreatedObject("_" + getName() + "_weight"))
{
LOG(Message) << "Caching stochatic EM potential weight (gauge: "
<< par().gauge << ", zero-mode scheme: "
<< par().zmScheme << ")..." << std::endl;
w = env().createLattice<EmComp>("_" + getName() + "_weight");
photon.StochasticWeight(*w);
photon.StochasticWeight(w);
}
else
{
w = env().getObject<EmComp>("_" + getName() + "_weight");
}
LOG(Message) << "Generating stochatic EM potential..." << std::endl;
photon.StochasticField(a, *env().get4dRng(), *w);
photon.StochasticField(a, *env().get4dRng(), w);
}

View File

@ -4,9 +4,9 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -60,6 +60,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Unit.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -57,13 +56,14 @@ std::vector<std::string> TUnit::getOutput(void)
// setup ///////////////////////////////////////////////////////////////////////
void TUnit::setup(void)
{
env().registerLattice<LatticeGaugeField>(getName());
envCreateLat(LatticeGaugeField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TUnit::execute(void)
{
LOG(Message) << "Creating unit gauge configuration" << std::endl;
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
auto &U = envGet(LatticeGaugeField, getName());
SU3::ColdConfiguration(*env().get4dRng(), U);
}

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Unit.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -51,6 +50,7 @@ public:
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution

View File

@ -0,0 +1,140 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MIO/LoadBinary.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MIO_LoadBinary_hpp_
#define Hadrons_MIO_LoadBinary_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Load a binary configurations *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MIO)
class LoadBinaryPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LoadBinaryPar,
std::string, file,
std::string, format);
};
template <typename Impl>
class TLoadBinary: public Module<LoadBinaryPar>
{
public:
typedef typename Impl::Field Field;
typedef typename Impl::Simd Simd;
typedef typename Field::vector_object vobj;
typedef typename vobj::scalar_object sobj;
typedef typename sobj::DoublePrecision sobj_double;
typedef BinarySimpleMunger<sobj_double, sobj> Munger;
public:
// constructor
TLoadBinary(const std::string name);
// destructor
virtual ~TLoadBinary(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(LoadBinary, TLoadBinary<GIMPL>, MIO);
MODULE_REGISTER_NS(LoadBinaryScalarSU2, TLoadBinary<ScalarNxNAdjImplR<2>>, MIO);
MODULE_REGISTER_NS(LoadBinaryScalarSU3, TLoadBinary<ScalarNxNAdjImplR<3>>, MIO);
MODULE_REGISTER_NS(LoadBinaryScalarSU4, TLoadBinary<ScalarNxNAdjImplR<4>>, MIO);
MODULE_REGISTER_NS(LoadBinaryScalarSU5, TLoadBinary<ScalarNxNAdjImplR<5>>, MIO);
MODULE_REGISTER_NS(LoadBinaryScalarSU6, TLoadBinary<ScalarNxNAdjImplR<6>>, MIO);
/******************************************************************************
* TLoadBinary implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename Impl>
TLoadBinary<Impl>::TLoadBinary(const std::string name)
: Module<LoadBinaryPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename Impl>
std::vector<std::string> TLoadBinary<Impl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename Impl>
std::vector<std::string> TLoadBinary<Impl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename Impl>
void TLoadBinary<Impl>::setup(void)
{
envCreateLat(Field, getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename Impl>
void TLoadBinary<Impl>::execute(void)
{
Munger munge;
uint32_t nersc_csum, scidac_csuma, scidac_csumb;
auto &U = envGet(Field, getName());
std::string filename = par().file + "."
+ std::to_string(vm().getTrajectory());
LOG(Message) << "Loading " << par().format
<< " binary configuration from file '" << filename
<< "'" << std::endl;
BinaryIO::readLatticeObject<vobj, sobj_double>(U, filename, munge, 0,
par().format, nersc_csum,
scidac_csuma, scidac_csumb);
LOG(Message) << "Checksums:" << std::endl;
LOG(Message) << " NERSC " << nersc_csum << std::endl;
LOG(Message) << " SciDAC A " << scidac_csuma << std::endl;
LOG(Message) << " SciDAC B " << scidac_csumb << std::endl;
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MIO_LoadBinary_hpp_

View File

@ -2,10 +2,9 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Load.cc
Source file: extras/Hadrons/Modules/MIO/LoadNersc.cc
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -26,30 +25,29 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MIO/LoadNersc.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MGauge;
using namespace MIO;
/******************************************************************************
* TLoad implementation *
* TLoadNersc implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TLoad::TLoad(const std::string name)
: Module<LoadPar>(name)
TLoadNersc::TLoadNersc(const std::string name)
: Module<LoadNerscPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TLoad::getInput(void)
std::vector<std::string> TLoadNersc::getInput(void)
{
std::vector<std::string> in;
return in;
}
std::vector<std::string> TLoad::getOutput(void)
std::vector<std::string> TLoadNersc::getOutput(void)
{
std::vector<std::string> out = {getName()};
@ -57,22 +55,21 @@ std::vector<std::string> TLoad::getOutput(void)
}
// setup ///////////////////////////////////////////////////////////////////////
void TLoad::setup(void)
void TLoadNersc::setup(void)
{
env().registerLattice<LatticeGaugeField>(getName());
envCreateLat(LatticeGaugeField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TLoad::execute(void)
void TLoadNersc::execute(void)
{
FieldMetaData header;
std::string fileName = par().file + "ckpoint_lat."
+ std::to_string(env().getTrajectory());
FieldMetaData header;
std::string fileName = par().file + "."
+ std::to_string(vm().getTrajectory());
LOG(Message) << "Loading NERSC configuration from file '" << fileName
<< "'" << std::endl;
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
auto &U = envGet(LatticeGaugeField, getName());
NerscIO::readConfiguration(U, header, fileName);
LOG(Message) << "NERSC header:" << std::endl;
dump_meta_data(header, LOG(Message));

View File

@ -2,10 +2,9 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MGauge/Load.hpp
Source file: extras/Hadrons/Modules/MIO/LoadNersc.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -26,9 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MGauge_Load_hpp_
#define Hadrons_MGauge_Load_hpp_
#ifndef Hadrons_MIO_LoadNersc_hpp_
#define Hadrons_MIO_LoadNersc_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
@ -37,24 +35,24 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Load a NERSC configuration *
* Load a NERSC configuration *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MGauge)
BEGIN_MODULE_NAMESPACE(MIO)
class LoadPar: Serializable
class LoadNerscPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LoadPar,
GRID_SERIALIZABLE_CLASS_MEMBERS(LoadNerscPar,
std::string, file);
};
class TLoad: public Module<LoadPar>
class TLoadNersc: public Module<LoadNerscPar>
{
public:
// constructor
TLoad(const std::string name);
TLoadNersc(const std::string name);
// destructor
virtual ~TLoad(void) = default;
virtual ~TLoadNersc(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
@ -64,10 +62,10 @@ public:
virtual void execute(void);
};
MODULE_REGISTER_NS(Load, TLoad, MGauge);
MODULE_REGISTER_NS(LoadNersc, TLoadNersc, MIO);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MGauge_Load_hpp_
#endif // Hadrons_MIO_LoadNersc_hpp_

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -74,6 +75,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -112,16 +114,16 @@ std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void)
template <typename FImpl>
void TNoiseLoop<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
envCreateLat(PropagatorField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::execute(void)
{
PropagatorField &loop = *env().template createLattice<PropagatorField>(getName());
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
PropagatorField &eta = *env().template getObject<PropagatorField>(par().eta);
auto &loop = envGet(PropagatorField, getName());
auto &q = envGet(PropagatorField, par().q);
auto &eta = envGet(PropagatorField, par().eta);
loop = q*adj(eta);
}

View File

@ -1,3 +1,31 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalar/ChargedProp.cc
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: James Harrison <jch1g10@soton.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MScalar/ChargedProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
@ -37,90 +65,44 @@ void TChargedProp::setup(void)
{
phaseName_.push_back("_shiftphase_" + std::to_string(mu));
}
GFSrcName_ = "_" + getName() + "_DinvSrc";
if (!env().hasRegisteredObject(freeMomPropName_))
GFSrcName_ = getName() + "_DinvSrc";
fftName_ = getName() + "_fft";
freeMomPropDone_ = env().hasCreatedObject(freeMomPropName_);
GFSrcDone_ = env().hasCreatedObject(GFSrcName_);
phasesDone_ = env().hasCreatedObject(phaseName_[0]);
envCacheLat(ScalarField, freeMomPropName_);
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
env().registerLattice<ScalarField>(freeMomPropName_);
envCacheLat(ScalarField, phaseName_[mu]);
}
if (!env().hasRegisteredObject(phaseName_[0]))
{
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
env().registerLattice<ScalarField>(phaseName_[mu]);
}
}
if (!env().hasRegisteredObject(GFSrcName_))
{
env().registerLattice<ScalarField>(GFSrcName_);
}
env().registerLattice<ScalarField>(getName());
envCacheLat(ScalarField, GFSrcName_);
envCreateLat(ScalarField, getName());
envTmpLat(ScalarField, "buf");
envTmpLat(ScalarField, "result");
envTmpLat(ScalarField, "Amu");
envCache(FFT, fftName_, 1, env().getGrid());
}
// execution ///////////////////////////////////////////////////////////////////
void TChargedProp::execute(void)
{
// CACHING ANALYTIC EXPRESSIONS
ScalarField &source = *env().getObject<ScalarField>(par().source);
Complex ci(0.0,1.0);
FFT fft(env().getGrid());
// cache free scalar propagator
if (!env().hasCreatedObject(freeMomPropName_))
{
LOG(Message) << "Caching momentum space free scalar propagator"
<< " (mass= " << par().mass << ")..." << std::endl;
freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_);
SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass);
}
else
{
freeMomProp_ = env().getObject<ScalarField>(freeMomPropName_);
}
// cache G*F*src
if (!env().hasCreatedObject(GFSrcName_))
{
GFSrc_ = env().createLattice<ScalarField>(GFSrcName_);
fft.FFT_all_dim(*GFSrc_, source, FFT::forward);
*GFSrc_ = (*freeMomProp_)*(*GFSrc_);
}
else
{
GFSrc_ = env().getObject<ScalarField>(GFSrcName_);
}
// cache phases
if (!env().hasCreatedObject(phaseName_[0]))
{
std::vector<int> &l = env().getGrid()->_fdimensions;
LOG(Message) << "Caching shift phases..." << std::endl;
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Real twoPiL = M_PI*2./l[mu];
phase_.push_back(env().createLattice<ScalarField>(phaseName_[mu]));
LatticeCoordinate(*(phase_[mu]), mu);
*(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu])));
}
}
else
{
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
phase_.push_back(env().getObject<ScalarField>(phaseName_[mu]));
}
}
makeCaches();
// PROPAGATOR CALCULATION
LOG(Message) << "Computing charged scalar propagator"
<< " (mass= " << par().mass
<< ", charge= " << par().charge << ")..." << std::endl;
ScalarField &prop = *env().createLattice<ScalarField>(getName());
ScalarField buf(env().getGrid());
ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_;
double q = par().charge;
auto &prop = envGet(ScalarField, getName());
auto &GFSrc = envGet(ScalarField, GFSrcName_);
auto &G = envGet(ScalarField, freeMomPropName_);
auto &fft = envGet(FFT, fftName_);
double q = par().charge;
envGetTmp(ScalarField, result);
envGetTmp(ScalarField, buf);
// G*F*Src
prop = GFSrc;
@ -146,12 +128,12 @@ void TChargedProp::execute(void)
if (!par().output.empty())
{
std::string filename = par().output + "." +
std::to_string(env().getTrajectory());
std::to_string(vm().getTrajectory());
LOG(Message) << "Saving zero-momentum projection to '"
<< filename << "'..." << std::endl;
CorrWriter writer(filename);
ResultWriter writer(RESULT_FILE_NAME(par().output));
std::vector<TComplex> vecBuf;
std::vector<Complex> result;
@ -166,15 +148,55 @@ void TChargedProp::execute(void)
}
}
void TChargedProp::makeCaches(void)
{
auto &freeMomProp = envGet(ScalarField, freeMomPropName_);
auto &GFSrc = envGet(ScalarField, GFSrcName_);
auto &fft = envGet(FFT, fftName_);
if (!freeMomPropDone_)
{
LOG(Message) << "Caching momentum space free scalar propagator"
<< " (mass= " << par().mass << ")..." << std::endl;
SIMPL::MomentumSpacePropagator(freeMomProp, par().mass);
}
if (!GFSrcDone_)
{
FFT fft(env().getGrid());
auto &source = envGet(ScalarField, par().source);
LOG(Message) << "Caching G*F*src..." << std::endl;
fft.FFT_all_dim(GFSrc, source, FFT::forward);
GFSrc = freeMomProp*GFSrc;
}
if (!phasesDone_)
{
std::vector<int> &l = env().getGrid()->_fdimensions;
Complex ci(0.0,1.0);
LOG(Message) << "Caching shift phases..." << std::endl;
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Real twoPiL = M_PI*2./l[mu];
auto &phmu = envGet(ScalarField, phaseName_[mu]);
LatticeCoordinate(phmu, mu);
phmu = exp(ci*twoPiL*phmu);
phase_.push_back(&phmu);
}
}
}
void TChargedProp::momD1(ScalarField &s, FFT &fft)
{
EmField &A = *env().getObject<EmField>(par().emField);
ScalarField buf(env().getGrid()), result(env().getGrid()),
Amu(env().getGrid());
auto &A = envGet(EmField, par().emField);
Complex ci(0.0,1.0);
result = zero;
envGetTmp(ScalarField, buf);
envGetTmp(ScalarField, result);
envGetTmp(ScalarField, Amu);
result = zero;
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Amu = peekLorentz(A, mu);
@ -198,12 +220,13 @@ void TChargedProp::momD1(ScalarField &s, FFT &fft)
void TChargedProp::momD2(ScalarField &s, FFT &fft)
{
EmField &A = *env().getObject<EmField>(par().emField);
ScalarField buf(env().getGrid()), result(env().getGrid()),
Amu(env().getGrid());
auto &A = envGet(EmField, par().emField);
envGetTmp(ScalarField, buf);
envGetTmp(ScalarField, result);
envGetTmp(ScalarField, Amu);
result = zero;
for (unsigned int mu = 0; mu < env().getNd(); ++mu)
{
Amu = peekLorentz(A, mu);

View File

@ -1,3 +1,30 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalar/ChargedProp.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MScalar_ChargedProp_hpp_
#define Hadrons_MScalar_ChargedProp_hpp_
@ -37,19 +64,20 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
void makeCaches(void);
void momD1(ScalarField &s, FFT &fft);
void momD2(ScalarField &s, FFT &fft);
private:
std::string freeMomPropName_, GFSrcName_;
bool freeMomPropDone_, GFSrcDone_, phasesDone_;
std::string freeMomPropName_, GFSrcName_, fftName_;
std::vector<std::string> phaseName_;
ScalarField *freeMomProp_, *GFSrc_;
std::vector<ScalarField *> phase_;
EmField *A;
};
MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar);

View File

@ -1,3 +1,30 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalar/FreeProp.cc
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
@ -33,38 +60,31 @@ void TFreeProp::setup(void)
{
freeMomPropName_ = FREEMOMPROP(par().mass);
if (!env().hasRegisteredObject(freeMomPropName_))
{
env().registerLattice<ScalarField>(freeMomPropName_);
}
env().registerLattice<ScalarField>(getName());
freePropDone_ = env().hasCreatedObject(freeMomPropName_);
envCacheLat(ScalarField, freeMomPropName_);
envCreateLat(ScalarField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
void TFreeProp::execute(void)
{
ScalarField &prop = *env().createLattice<ScalarField>(getName());
ScalarField &source = *env().getObject<ScalarField>(par().source);
ScalarField *freeMomProp;
auto &freeMomProp = envGet(ScalarField, freeMomPropName_);
auto &prop = envGet(ScalarField, getName());
auto &source = envGet(ScalarField, par().source);
if (!env().hasCreatedObject(freeMomPropName_))
if (!freePropDone_)
{
LOG(Message) << "Caching momentum space free scalar propagator"
<< " (mass= " << par().mass << ")..." << std::endl;
freeMomProp = env().createLattice<ScalarField>(freeMomPropName_);
SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass);
}
else
{
freeMomProp = env().getObject<ScalarField>(freeMomPropName_);
SIMPL::MomentumSpacePropagator(freeMomProp, par().mass);
}
LOG(Message) << "Computing free scalar propagator..." << std::endl;
SIMPL::FreePropagator(source, prop, *freeMomProp);
SIMPL::FreePropagator(source, prop, freeMomProp);
if (!par().output.empty())
{
TextWriter writer(par().output + "." +
std::to_string(env().getTrajectory()));
std::to_string(vm().getTrajectory()));
std::vector<TComplex> buf;
std::vector<Complex> result;

View File

@ -1,3 +1,30 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalar/FreeProp.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MScalar_FreeProp_hpp_
#define Hadrons_MScalar_FreeProp_hpp_
@ -33,12 +60,14 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
std::string freeMomPropName_;
bool freePropDone_;
};
MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar);

View File

@ -1,3 +1,30 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalar/Scalar.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Scalar_hpp_
#define Hadrons_Scalar_hpp_

View File

@ -0,0 +1,146 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalarSUN/TrMag.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MScalarSUN_TrMag_hpp_
#define Hadrons_MScalarSUN_TrMag_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Module to compute tr(mag^n) *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MScalarSUN)
class TrMagPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TrMagPar,
std::string, field,
unsigned int, maxPow,
std::string, output);
};
template <typename SImpl>
class TTrMag: public Module<TrMagPar>
{
public:
typedef typename SImpl::Field Field;
typedef typename SImpl::ComplexField ComplexField;
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
std::string, op,
Real, value);
};
public:
// constructor
TTrMag(const std::string name);
// destructor
virtual ~TTrMag(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(TrMagSU2, TTrMag<ScalarNxNAdjImplR<2>>, MScalarSUN);
MODULE_REGISTER_NS(TrMagSU3, TTrMag<ScalarNxNAdjImplR<3>>, MScalarSUN);
MODULE_REGISTER_NS(TrMagSU4, TTrMag<ScalarNxNAdjImplR<4>>, MScalarSUN);
MODULE_REGISTER_NS(TrMagSU5, TTrMag<ScalarNxNAdjImplR<5>>, MScalarSUN);
MODULE_REGISTER_NS(TrMagSU6, TTrMag<ScalarNxNAdjImplR<6>>, MScalarSUN);
/******************************************************************************
* TTrMag implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename SImpl>
TTrMag<SImpl>::TTrMag(const std::string name)
: Module<TrMagPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename SImpl>
std::vector<std::string> TTrMag<SImpl>::getInput(void)
{
std::vector<std::string> in = {par().field};
return in;
}
template <typename SImpl>
std::vector<std::string> TTrMag<SImpl>::getOutput(void)
{
std::vector<std::string> out = {};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename SImpl>
void TTrMag<SImpl>::setup(void)
{}
// execution ///////////////////////////////////////////////////////////////////
template <typename SImpl>
void TTrMag<SImpl>::execute(void)
{
LOG(Message) << "Computing tr(mag^n) for n even up to " << par().maxPow
<< "..." << std::endl;
std::vector<Result> result;
ResultWriter writer(RESULT_FILE_NAME(par().output));
auto &phi = envGet(Field, par().field);
auto m2 = sum(phi), mn = m2;
m2 = -m2*m2;
mn = 1.;
for (unsigned int n = 2; n <= par().maxPow; n += 2)
{
Result r;
mn = mn*m2;
r.op = "tr(mag^" + std::to_string(n) + ")";
r.value = TensorRemove(trace(mn)).real();
result.push_back(r);
}
write(writer, "trmag", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MScalarSUN_TrMag_hpp_

View File

@ -0,0 +1,182 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MScalarSUN_TrPhi_hpp_
#define Hadrons_MScalarSUN_TrPhi_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Module to compute tr(phi^n) *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MScalarSUN)
class TrPhiPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TrPhiPar,
std::string, field,
unsigned int, maxPow,
std::string, output);
};
template <typename SImpl>
class TTrPhi: public Module<TrPhiPar>
{
public:
typedef typename SImpl::Field Field;
typedef typename SImpl::ComplexField ComplexField;
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
std::string, op,
Real, value);
};
public:
// constructor
TTrPhi(const std::string name);
// destructor
virtual ~TTrPhi(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
// output name generator
std::string outName(const unsigned int n);
};
MODULE_REGISTER_NS(TrPhiSU2, TTrPhi<ScalarNxNAdjImplR<2>>, MScalarSUN);
MODULE_REGISTER_NS(TrPhiSU3, TTrPhi<ScalarNxNAdjImplR<3>>, MScalarSUN);
MODULE_REGISTER_NS(TrPhiSU4, TTrPhi<ScalarNxNAdjImplR<4>>, MScalarSUN);
MODULE_REGISTER_NS(TrPhiSU5, TTrPhi<ScalarNxNAdjImplR<5>>, MScalarSUN);
MODULE_REGISTER_NS(TrPhiSU6, TTrPhi<ScalarNxNAdjImplR<6>>, MScalarSUN);
/******************************************************************************
* TTrPhi implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename SImpl>
TTrPhi<SImpl>::TTrPhi(const std::string name)
: Module<TrPhiPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename SImpl>
std::vector<std::string> TTrPhi<SImpl>::getInput(void)
{
std::vector<std::string> in = {par().field};
return in;
}
template <typename SImpl>
std::vector<std::string> TTrPhi<SImpl>::getOutput(void)
{
std::vector<std::string> out;
for (unsigned int n = 2; n <= par().maxPow; n += 2)
{
out.push_back(outName(n));
}
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename SImpl>
void TTrPhi<SImpl>::setup(void)
{
if (par().maxPow < 2)
{
HADRON_ERROR(Size, "'maxPow' should be at least equal to 2");
}
envTmpLat(Field, "phi2");
envTmpLat(Field, "buf");
for (unsigned int n = 2; n <= par().maxPow; n += 2)
{
envCreateLat(ComplexField, outName(n));
}
}
// execution ///////////////////////////////////////////////////////////////////
template <typename SImpl>
void TTrPhi<SImpl>::execute(void)
{
LOG(Message) << "Computing tr(phi^n) for n even up to " << par().maxPow
<< "..." << std::endl;
std::vector<Result> result;
auto &phi = envGet(Field, par().field);
envGetTmp(Field, phi2);
envGetTmp(Field, buf);
buf = 1.;
phi2 = -phi*phi;
for (unsigned int n = 2; n <= par().maxPow; n += 2)
{
auto &phin = envGet(ComplexField, outName(n));
buf = buf*phi2;
phin = trace(buf);
if (!par().output.empty())
{
Result r;
r.op = "tr(phi^" + std::to_string(n) + ")";
r.value = TensorRemove(sum(phin)).real();
result.push_back(r);
}
}
if (result.size() > 0)
{
ResultWriter writer(RESULT_FILE_NAME(par().output));
write(writer, "trphi", result);
}
}
// output name generator ///////////////////////////////////////////////////////
template <typename SImpl>
std::string TTrPhi<SImpl>::outName(const unsigned int n)
{
return getName() + "_" + std::to_string(n);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MScalarSUN_TrPhi_hpp_

View File

@ -0,0 +1,184 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MScalarSUN_TwoPoint_hpp_
#define Hadrons_MScalarSUN_TwoPoint_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* 2-pt functions for a given set of operators *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MScalarSUN)
class TwoPointPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TwoPointPar,
std::vector<std::string>, op,
std::string, output);
};
template <typename SImpl>
class TTwoPoint: public Module<TwoPointPar>
{
public:
typedef typename SImpl::Field Field;
typedef typename SImpl::ComplexField ComplexField;
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
std::string, sink,
std::string, source,
std::vector<Complex>, data);
};
public:
// constructor
TTwoPoint(const std::string name);
// destructor
virtual ~TTwoPoint(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
// make 2-pt function
template <class SinkSite, class SourceSite>
std::vector<Complex> makeTwoPoint(const std::vector<SinkSite> &sink,
const std::vector<SourceSite> &source);
};
MODULE_REGISTER_NS(TwoPointSU2, TTwoPoint<ScalarNxNAdjImplR<2>>, MScalarSUN);
MODULE_REGISTER_NS(TwoPointSU3, TTwoPoint<ScalarNxNAdjImplR<3>>, MScalarSUN);
MODULE_REGISTER_NS(TwoPointSU4, TTwoPoint<ScalarNxNAdjImplR<4>>, MScalarSUN);
MODULE_REGISTER_NS(TwoPointSU5, TTwoPoint<ScalarNxNAdjImplR<5>>, MScalarSUN);
MODULE_REGISTER_NS(TwoPointSU6, TTwoPoint<ScalarNxNAdjImplR<6>>, MScalarSUN);
/******************************************************************************
* TTwoPoint implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename SImpl>
TTwoPoint<SImpl>::TTwoPoint(const std::string name)
: Module<TwoPointPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename SImpl>
std::vector<std::string> TTwoPoint<SImpl>::getInput(void)
{
return par().op;
}
template <typename SImpl>
std::vector<std::string> TTwoPoint<SImpl>::getOutput(void)
{
std::vector<std::string> out = {};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename SImpl>
void TTwoPoint<SImpl>::setup(void)
{
const unsigned int nt = env().getDim().back();
envTmp(std::vector<std::vector<TComplex>>, "slicedOp", 1, par().op.size(),
std::vector<TComplex>(nt));
}
// execution ///////////////////////////////////////////////////////////////////
template <typename SImpl>
void TTwoPoint<SImpl>::execute(void)
{
LOG(Message) << "Computing 2-point functions for operators:" << std::endl;
for (auto &o: par().op)
{
LOG(Message) << " '" << o << "'" << std::endl;
}
ResultWriter writer(RESULT_FILE_NAME(par().output));
const unsigned int nd = env().getDim().size();
std::vector<Result> result;
envGetTmp(std::vector<std::vector<TComplex>>, slicedOp);
for (unsigned int i = 0; i < par().op.size(); ++i)
{
auto &op = envGet(ComplexField, par().op[i]);
sliceSum(op, slicedOp[i], nd - 1);
}
for (unsigned int i = 0; i < par().op.size(); ++i)
for (unsigned int j = 0; j < par().op.size(); ++j)
{
Result r;
r.sink = par().op[i];
r.source = par().op[j];
r.data = makeTwoPoint(slicedOp[i], slicedOp[j]);
result.push_back(r);
}
write(writer, "twopt", result);
}
// make 2-pt function //////////////////////////////////////////////////////////
template <class SImpl>
template <class SinkSite, class SourceSite>
std::vector<Complex> TTwoPoint<SImpl>::makeTwoPoint(
const std::vector<SinkSite> &sink,
const std::vector<SourceSite> &source)
{
assert(sink.size() == source.size());
unsigned int nt = sink.size();
std::vector<Complex> res(nt, 0.);
for (unsigned int dt = 0; dt < nt; ++dt)
{
for (unsigned int t = 0; t < nt; ++t)
{
res[dt] += TensorRemove(trace(sink[(t+dt)%nt]*source[t]));
}
res[dt] *= 1./static_cast<double>(nt);
}
return res;
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MScalarSUN_TwoPoint_hpp_

View File

@ -1,3 +1,32 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSink/Point.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MSink_Point_hpp_
#define Hadrons_MSink_Point_hpp_
@ -33,10 +62,14 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
bool hasPhase_{false};
std::string momphName_;
};
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSink);
@ -49,6 +82,7 @@ MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink);
template <typename FImpl>
TPoint<FImpl>::TPoint(const std::string name)
: Module<PointPar>(name)
, momphName_ (name + "_momph")
{}
// dependencies/products ///////////////////////////////////////////////////////
@ -72,30 +106,37 @@ std::vector<std::string> TPoint<FImpl>::getOutput(void)
template <typename FImpl>
void TPoint<FImpl>::setup(void)
{
unsigned int size;
size = env().template lattice4dSize<LatticeComplex>();
env().registerObject(getName(), size);
envTmpLat(LatticeComplex, "coor");
envCacheLat(LatticeComplex, momphName_);
envCreate(SinkFn, getName(), 1, nullptr);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::execute(void)
{
std::vector<Real> p = strToVec<Real>(par().mom);
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Complex i(0.0,1.0);
{
LOG(Message) << "Setting up point sink function for momentum ["
<< par().mom << "]" << std::endl;
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
auto &ph = envGet(LatticeComplex, momphName_);
if (!hasPhase_)
{
LatticeCoordinate(coor, mu);
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
Complex i(0.0,1.0);
std::vector<Real> p;
envGetTmp(LatticeComplex, coor);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
}
ph = exp((Real)(2*M_PI)*i*ph);
hasPhase_ = true;
}
ph = exp((Real)(2*M_PI)*i*ph);
auto sink = [ph](const PropagatorField &field)
auto sink = [&ph](const PropagatorField &field)
{
SlicedPropagator res;
PropagatorField tmp = ph*field;
@ -104,7 +145,7 @@ void TPoint<FImpl>::execute(void)
return res;
};
env().setObject(getName(), new SinkFn(sink));
envGet(SinkFn, getName()) = sink;
}
END_MODULE_NAMESPACE

View File

@ -0,0 +1,127 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSink/Smear.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MSink_Smear_hpp_
#define Hadrons_MSink_Smear_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Smear *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSink)
class SmearPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(SmearPar,
std::string, q,
std::string, sink);
};
template <typename FImpl>
class TSmear: public Module<SmearPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
SINK_TYPE_ALIASES();
public:
// constructor
TSmear(const std::string name);
// destructor
virtual ~TSmear(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Smear, TSmear<FIMPL>, MSink);
/******************************************************************************
* TSmear implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TSmear<FImpl>::TSmear(const std::string name)
: Module<SmearPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TSmear<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().sink};
return in;
}
template <typename FImpl>
std::vector<std::string> TSmear<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSmear<FImpl>::setup(void)
{
envCreate(SlicedPropagator, getName(), 1, env().getDim(Tp));
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSmear<FImpl>::execute(void)
{
LOG(Message) << "Sink smearing propagator '" << par().q
<< "' using sink function '" << par().sink << "'."
<< std::endl;
auto &sink = envGet(SinkFn, par().sink);
auto &q = envGet(PropagatorField, par().q);
auto &out = envGet(SlicedPropagator, getName());
out = sink(q);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MSink_Smear_hpp_

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSolver/RBPrecCG.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -61,7 +60,9 @@ public:
virtual ~TRBPrecCG(void) = default;
// dependencies/products
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getReference(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -83,11 +84,19 @@ TRBPrecCG<FImpl>::TRBPrecCG(const std::string name)
template <typename FImpl>
std::vector<std::string> TRBPrecCG<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().action};
std::vector<std::string> in = {};
return in;
}
template <typename FImpl>
std::vector<std::string> TRBPrecCG<FImpl>::getReference(void)
{
std::vector<std::string> ref = {par().action};
return ref;
}
template <typename FImpl>
std::vector<std::string> TRBPrecCG<FImpl>::getOutput(void)
{
@ -100,17 +109,12 @@ std::vector<std::string> TRBPrecCG<FImpl>::getOutput(void)
template <typename FImpl>
void TRBPrecCG<FImpl>::setup(void)
{
auto Ls = env().getObjectLs(par().action);
env().registerObject(getName(), 0, Ls);
env().addOwnership(getName(), par().action);
}
LOG(Message) << "setting up Schur red-black preconditioned CG for"
<< " action '" << par().action << "' with residual "
<< par().residual << std::endl;
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TRBPrecCG<FImpl>::execute(void)
{
auto &mat = *(env().template getObject<FMat>(par().action));
auto Ls = env().getObjectLs(par().action);
auto &mat = envGet(FMat, par().action);
auto solver = [&mat, this](FermionField &sol, const FermionField &source)
{
ConjugateGradient<FermionField> cg(par().residual, 10000);
@ -118,13 +122,14 @@ void TRBPrecCG<FImpl>::execute(void)
schurSolver(mat, source, sol);
};
LOG(Message) << "setting up Schur red-black preconditioned CG for"
<< " action '" << par().action << "' with residual "
<< par().residual << std::endl;
env().setObject(getName(), new SolverFn(solver));
envCreate(SolverFn, getName(), Ls, solver);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TRBPrecCG<FImpl>::execute(void)
{}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE

View File

@ -4,10 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Point.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -72,6 +72,7 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
@ -111,19 +112,20 @@ std::vector<std::string> TPoint<FImpl>::getOutput(void)
template <typename FImpl>
void TPoint<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
envCreateLat(PropagatorField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::execute(void)
{
std::vector<int> position = strToVec<int>(par().position);
typename SitePropagator::scalar_object id;
LOG(Message) << "Creating point source at position [" << par().position
<< "]" << std::endl;
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
<< "]" << std::endl;
std::vector<int> position = strToVec<int>(par().position);
auto &src = envGet(PropagatorField, getName());
SitePropagator id;
id = 1.;
src = zero;
pokeSite(id, src, position);

View File

@ -0,0 +1,160 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/SeqConserved.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MSource_SeqConserved_hpp_
#define Hadrons_MSource_SeqConserved_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Sequential source
-----------------------------
* src_x = q_x * theta(x_3 - tA) * theta(tB - x_3) * J_mu * exp(i x.mom)
* options:
- q: input propagator (string)
- action: fermion action used for propagator q (string)
- tA: begin timeslice (integer)
- tB: end timesilce (integer)
- curr_type: type of conserved current to insert (Current)
- mu: Lorentz index of current to insert (integer)
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
*/
/******************************************************************************
* SeqConserved *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class SeqConservedPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(SeqConservedPar,
std::string, q,
std::string, action,
unsigned int, tA,
unsigned int, tB,
Current, curr_type,
unsigned int, mu,
std::string, mom);
};
template <typename FImpl>
class TSeqConserved: public Module<SeqConservedPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TSeqConserved(const std::string name);
// destructor
virtual ~TSeqConserved(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(SeqConserved, TSeqConserved<FIMPL>, MSource);
/******************************************************************************
* TSeqConserved implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TSeqConserved<FImpl>::TSeqConserved(const std::string name)
: Module<SeqConservedPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TSeqConserved<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().action};
return in;
}
template <typename FImpl>
std::vector<std::string> TSeqConserved<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSeqConserved<FImpl>::setup(void)
{
auto Ls_ = env().getObjectLs(par().action);
envCreateLat(PropagatorField, getName(), Ls_);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TSeqConserved<FImpl>::execute(void)
{
if (par().tA == par().tB)
{
LOG(Message) << "Generating sequential source with conserved "
<< par().curr_type << " current insertion (mu = "
<< par().mu << ") at " << "t = " << par().tA << std::endl;
}
else
{
LOG(Message) << "Generating sequential source with conserved "
<< par().curr_type << " current insertion (mu = "
<< par().mu << ") for " << par().tA << " <= t <= "
<< par().tB << std::endl;
}
auto &src = envGet(PropagatorField, getName());
auto &q = envGet(PropagatorField, par().q);
auto &mat = envGet(FMat, par().action);
std::vector<Real> mom = strToVec<Real>(par().mom);
mat.SeqConservedCurrent(q, src, par().curr_type, par().mu,
mom, par().tA, par().tB);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_SeqConserved_hpp_

View File

@ -4,11 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -81,10 +80,14 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
bool hasPhase_{false};
std::string momphName_, tName_;
};
MODULE_REGISTER_NS(SeqGamma, TSeqGamma<FIMPL>, MSource);
@ -96,6 +99,8 @@ MODULE_REGISTER_NS(SeqGamma, TSeqGamma<FIMPL>, MSource);
template <typename FImpl>
TSeqGamma<FImpl>::TSeqGamma(const std::string name)
: Module<SeqGammaPar>(name)
, momphName_ (name + "_momph")
, tName_ (name + "_t")
{}
// dependencies/products ///////////////////////////////////////////////////////
@ -119,7 +124,10 @@ std::vector<std::string> TSeqGamma<FImpl>::getOutput(void)
template <typename FImpl>
void TSeqGamma<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
envCreateLat(PropagatorField, getName());
envCacheLat(Lattice<iScalar<vInteger>>, tName_);
envCacheLat(LatticeComplex, momphName_);
envTmpLat(LatticeComplex, "coor");
}
// execution ///////////////////////////////////////////////////////////////////
@ -137,23 +145,29 @@ void TSeqGamma<FImpl>::execute(void)
<< " sequential source for "
<< par().tA << " <= t <= " << par().tB << std::endl;
}
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Gamma g(par().gamma);
std::vector<Real> p;
Complex i(0.0,1.0);
auto &src = envGet(PropagatorField, getName());
auto &q = envGet(PropagatorField, par().q);
auto &ph = envGet(LatticeComplex, momphName_);
auto &t = envGet(Lattice<iScalar<vInteger>>, tName_);
Gamma g(par().gamma);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
if (!hasPhase_)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
Complex i(0.0,1.0);
std::vector<Real> p;
envGetTmp(LatticeComplex, coor);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
hasPhase_ = true;
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
}

View File

@ -4,9 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Wall.hpp
Copyright (C) 2017
Copyright (C) 2015-2018
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -73,10 +74,14 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
bool hasPhase_{false};
std::string momphName_, tName_;
};
MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
@ -88,13 +93,15 @@ MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
template <typename FImpl>
TWall<FImpl>::TWall(const std::string name)
: Module<WallPar>(name)
, momphName_ (name + "_momph")
, tName_ (name + "_t")
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getInput(void)
{
std::vector<std::string> in;
std::vector<std::string> in = {};
return in;
}
@ -111,7 +118,7 @@ std::vector<std::string> TWall<FImpl>::getOutput(void)
template <typename FImpl>
void TWall<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
envCreateLat(PropagatorField, getName());
}
// execution ///////////////////////////////////////////////////////////////////
@ -121,21 +128,28 @@ void TWall<FImpl>::execute(void)
LOG(Message) << "Generating wall source at t = " << par().tW
<< " with momentum " << par().mom << std::endl;
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
std::vector<Real> p;
Complex i(0.0,1.0);
auto &src = envGet(PropagatorField, getName());
auto &ph = envGet(LatticeComplex, momphName_);
auto &t = envGet(Lattice<iScalar<vInteger>>, tName_);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < Nd; mu++)
if (!hasPhase_)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
Complex i(0.0,1.0);
std::vector<Real> p;
envGetTmp(LatticeComplex, coor);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
hasPhase_ = true;
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = 1.;
src = where((t == par().tW), src*ph, 0.*src);
}

View File

@ -4,8 +4,7 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Z2.hpp
Copyright (C) 2015
Copyright (C) 2016
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
@ -76,10 +75,14 @@ public:
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
private:
bool hasT_{false};
std::string tName_;
};
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
@ -92,6 +95,7 @@ MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource);
template <typename FImpl>
TZ2<FImpl>::TZ2(const std::string name)
: Module<Z2Par>(name)
, tName_ (name + "_t")
{}
// dependencies/products ///////////////////////////////////////////////////////
@ -115,29 +119,36 @@ std::vector<std::string> TZ2<FImpl>::getOutput(void)
template <typename FImpl>
void TZ2<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
envCreateLat(PropagatorField, getName());
envCacheLat(Lattice<iScalar<vInteger>>, tName_);
envTmpLat(LatticeComplex, "eta");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TZ2<FImpl>::execute(void)
{
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex eta(env().getGrid());
Complex shift(1., 1.);
if (par().tA == par().tB)
{
LOG(Message) << "Generating Z_2 wall source at t= " << par().tA
<< std::endl;
<< std::endl;
}
else
{
LOG(Message) << "Generating Z_2 band for " << par().tA << " <= t <= "
<< par().tB << std::endl;
<< par().tB << std::endl;
}
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
LatticeCoordinate(t, Tp);
auto &src = envGet(PropagatorField, getName());
auto &t = envGet(Lattice<iScalar<vInteger>>, tName_);
Complex shift(1., 1.);
if (!hasT_)
{
LatticeCoordinate(t, Tp);
hasT_ = true;
}
envGetTmp(LatticeComplex, eta);
bernoulli(*env().get4dRng(), eta);
eta = (2.*eta - shift)*(1./::sqrt(2.));
eta = where((t >= par().tA) and (t <= par().tB), eta, 0.*eta);

View File

@ -0,0 +1,186 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MUtilities_TestSeqConserved_hpp_
#define Hadrons_MUtilities_TestSeqConserved_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Ward Identity contractions using sequential propagators.
-----------------------------
* options:
- q: point source propagator, 5D if available (string)
- qSeq: result of sequential insertion of conserved current using q (string)
- action: action used for computation of q (string)
- origin: string giving point source origin of q (string)
- t_J: time at which sequential current is inserted (int)
- mu: Lorentz index of current inserted (int)
- curr: current type, e.g. vector/axial (Current)
*/
/******************************************************************************
* TestSeqConserved *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MUtilities)
class TestSeqConservedPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TestSeqConservedPar,
std::string, q,
std::string, qSeq,
std::string, action,
std::string, origin,
unsigned int, t_J,
unsigned int, mu,
Current, curr);
};
template <typename FImpl>
class TTestSeqConserved: public Module<TestSeqConservedPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TTestSeqConserved(const std::string name);
// destructor
virtual ~TTestSeqConserved(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(TestSeqConserved, TTestSeqConserved<FIMPL>, MUtilities);
/******************************************************************************
* TTestSeqConserved implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TTestSeqConserved<FImpl>::TTestSeqConserved(const std::string name)
: Module<TestSeqConservedPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TTestSeqConserved<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().qSeq, par().action};
return in;
}
template <typename FImpl>
std::vector<std::string> TTestSeqConserved<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TTestSeqConserved<FImpl>::setup(void)
{
auto Ls = env().getObjectLs(par().q);
if (Ls != env().getObjectLs(par().action))
{
HADRON_ERROR(Size, "Ls mismatch between quark action and propagator");
}
envTmpLat(PropagatorField, "tmp");
envTmpLat(LatticeComplex, "c");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TTestSeqConserved<FImpl>::execute(void)
{
// Check sequential insertion of current gives same result as conserved
// current sink upon contraction. Assume q uses a point source.
auto &q = envGet(PropagatorField, par().q);
auto &qSeq = envGet(PropagatorField, par().qSeq);
auto &act = envGet(FMat, par().action);
Gamma g5(Gamma::Algebra::Gamma5);
Gamma::Algebra gA = (par().curr == Current::Axial) ?
Gamma::Algebra::Gamma5 :
Gamma::Algebra::Identity;
Gamma g(gA);
SitePropagator qSite;
Complex test_S, test_V, check_S, check_V;
std::vector<TComplex> check_buf;
std::vector<int> siteCoord;
envGetTmp(PropagatorField, tmp);
envGetTmp(LatticeComplex, c);
siteCoord = strToVec<int>(par().origin);
peekSite(qSite, qSeq, siteCoord);
test_S = trace(qSite*g);
test_V = trace(qSite*g*Gamma::gmu[par().mu]);
act.ContractConservedCurrent(q, q, tmp, par().curr, par().mu);
c = trace(tmp*g);
sliceSum(c, check_buf, Tp);
check_S = TensorRemove(check_buf[par().t_J]);
c = trace(tmp*g*Gamma::gmu[par().mu]);
sliceSum(c, check_buf, Tp);
check_V = TensorRemove(check_buf[par().t_J]);
LOG(Message) << "Test S = " << abs(test_S) << std::endl;
LOG(Message) << "Test V = " << abs(test_V) << std::endl;
LOG(Message) << "Check S = " << abs(check_S) << std::endl;
LOG(Message) << "Check V = " << abs(check_V) << std::endl;
// Check difference = 0
check_S -= test_S;
check_V -= test_V;
LOG(Message) << "Consistency check for sequential conserved "
<< par().curr << " current insertion: " << std::endl;
LOG(Message) << "Diff S = " << abs(check_S) << std::endl;
LOG(Message) << "Diff V = " << abs(check_V) << std::endl;
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_TestSeqConserved_hpp_

View File

@ -0,0 +1,150 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MUtilities/TestSeqGamma.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Lanny91 <andrew.lawson@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MUtilities_TestSeqGamma_hpp_
#define Hadrons_MUtilities_TestSeqGamma_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* TestSeqGamma *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MUtilities)
class TestSeqGammaPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TestSeqGammaPar,
std::string, q,
std::string, qSeq,
std::string, origin,
Gamma::Algebra, gamma,
unsigned int, t_g);
};
template <typename FImpl>
class TTestSeqGamma: public Module<TestSeqGammaPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
public:
// constructor
TTestSeqGamma(const std::string name);
// destructor
virtual ~TTestSeqGamma(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
protected:
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(TestSeqGamma, TTestSeqGamma<FIMPL>, MUtilities);
/******************************************************************************
* TTestSeqGamma implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TTestSeqGamma<FImpl>::TTestSeqGamma(const std::string name)
: Module<TestSeqGammaPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TTestSeqGamma<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().qSeq};
return in;
}
template <typename FImpl>
std::vector<std::string> TTestSeqGamma<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TTestSeqGamma<FImpl>::setup(void)
{
envTmpLat(LatticeComplex, "c");
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TTestSeqGamma<FImpl>::execute(void)
{
auto &q = envGet(PropagatorField, par().q);
auto &qSeq = envGet(PropagatorField, par().qSeq);
Gamma g5(Gamma::Algebra::Gamma5);
Gamma g(par().gamma);
SitePropagator qSite;
Complex test, check;
std::vector<TComplex> check_buf;
std::vector<int> siteCoord;
// Check sequential insertion of gamma matrix gives same result as
// insertion of gamma at sink upon contraction. Assume q uses a point
// source.
envGetTmp(LatticeComplex, c);
siteCoord = strToVec<int>(par().origin);
peekSite(qSite, qSeq, siteCoord);
test = trace(g*qSite);
c = trace(adj(g)*g5*adj(q)*g5*g*q);
sliceSum(c, check_buf, Tp);
check = TensorRemove(check_buf[par().t_g]);
LOG(Message) << "Seq Result = " << abs(test) << std::endl;
LOG(Message) << "Reference = " << abs(check) << std::endl;
// Check difference = 0
check -= test;
LOG(Message) << "Consistency check for sequential " << par().gamma
<< " insertion = " << abs(check) << std::endl;
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_TestSeqGamma_hpp_

View File

@ -0,0 +1,622 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/VirtualMachine.cc
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/VirtualMachine.hpp>
#include <Grid/Hadrons/GeneticScheduler.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
/******************************************************************************
* VirtualMachine implementation *
******************************************************************************/
// trajectory counter //////////////////////////////////////////////////////////
void VirtualMachine::setTrajectory(const unsigned int traj)
{
traj_ = traj;
}
unsigned int VirtualMachine::getTrajectory(void) const
{
return traj_;
}
// module management ///////////////////////////////////////////////////////////
void VirtualMachine::pushModule(VirtualMachine::ModPt &pt)
{
std::string name = pt->getName();
if (!hasModule(name))
{
std::vector<unsigned int> inputAddress;
unsigned int address;
ModuleInfo m;
// module registration -------------------------------------------------
m.data = std::move(pt);
m.type = typeIdPt(*m.data.get());
m.name = name;
// input dependencies
for (auto &in: m.data->getInput())
{
if (!env().hasObject(in))
{
// if object does not exist, add it with no creator module
env().addObject(in , -1);
}
m.input.push_back(env().getObjectAddress(in));
}
// reference dependencies
for (auto &ref: m.data->getReference())
{
if (!env().hasObject(ref))
{
// if object does not exist, add it with no creator module
env().addObject(ref , -1);
}
m.input.push_back(env().getObjectAddress(ref));
}
auto inCopy = m.input;
// if module has inputs with references, they need to be added as
// an input
for (auto &in: inCopy)
{
int inm = env().getObjectModule(in);
if (inm > 0)
{
if (getModule(inm)->getReference().size() > 0)
{
for (auto &rin: getModule(inm)->getReference())
{
m.input.push_back(env().getObjectAddress(rin));
}
}
}
}
module_.push_back(std::move(m));
address = static_cast<unsigned int>(module_.size() - 1);
moduleAddress_[name] = address;
// connecting outputs to potential inputs ------------------------------
for (auto &out: getModule(address)->getOutput())
{
if (!env().hasObject(out))
{
// output does not exists, add it
env().addObject(out, address);
}
else
{
if (env().getObjectModule(env().getObjectAddress(out)) < 0)
{
// output exists but without creator, correct it
env().setObjectModule(env().getObjectAddress(out), address);
}
else
{
// output already fully registered, error
HADRON_ERROR(Definition, "object '" + out
+ "' is already produced by module '"
+ module_[env().getObjectModule(out)].name
+ "' (while pushing module '" + name + "')");
}
if (getModule(address)->getReference().size() > 0)
{
// module has references, dependency should be propagated
// to children modules; find module with `out` as an input
// and add references to their input
auto pred = [this, out](const ModuleInfo &n)
{
auto &in = n.input;
auto it = std::find(in.begin(), in.end(),
env().getObjectAddress(out));
return (it != in.end());
};
auto it = std::find_if(module_.begin(), module_.end(), pred);
while (it != module_.end())
{
for (auto &ref: getModule(address)->getReference())
{
it->input.push_back(env().getObjectAddress(ref));
}
it = std::find_if(++it, module_.end(), pred);
}
}
}
}
graphOutdated_ = true;
memoryProfileOutdated_ = true;
}
else
{
HADRON_ERROR(Definition, "module '" + name + "' already exists");
}
}
unsigned int VirtualMachine::getNModule(void) const
{
return module_.size();
}
void VirtualMachine::createModule(const std::string name, const std::string type,
XmlReader &reader)
{
auto &factory = ModuleFactory::getInstance();
auto pt = factory.create(type, name);
pt->parseParameters(reader, "options");
pushModule(pt);
}
ModuleBase * VirtualMachine::getModule(const unsigned int address) const
{
if (hasModule(address))
{
return module_[address].data.get();
}
else
{
HADRON_ERROR(Definition, "no module with address " + std::to_string(address));
}
}
ModuleBase * VirtualMachine::getModule(const std::string name) const
{
return getModule(getModuleAddress(name));
}
unsigned int VirtualMachine::getModuleAddress(const std::string name) const
{
if (hasModule(name))
{
return moduleAddress_.at(name);
}
else
{
HADRON_ERROR(Definition, "no module with name '" + name + "'");
}
}
std::string VirtualMachine::getModuleName(const unsigned int address) const
{
if (hasModule(address))
{
return module_[address].name;
}
else
{
HADRON_ERROR(Definition, "no module with address " + std::to_string(address));
}
}
std::string VirtualMachine::getModuleType(const unsigned int address) const
{
if (hasModule(address))
{
return typeName(module_[address].type);
}
else
{
HADRON_ERROR(Definition, "no module with address " + std::to_string(address));
}
}
std::string VirtualMachine::getModuleType(const std::string name) const
{
return getModuleType(getModuleAddress(name));
}
std::string VirtualMachine::getModuleNamespace(const unsigned int address) const
{
std::string type = getModuleType(address), ns;
auto pos2 = type.rfind("::");
auto pos1 = type.rfind("::", pos2 - 2);
return type.substr(pos1 + 2, pos2 - pos1 - 2);
}
std::string VirtualMachine::getModuleNamespace(const std::string name) const
{
return getModuleNamespace(getModuleAddress(name));
}
bool VirtualMachine::hasModule(const unsigned int address) const
{
return (address < module_.size());
}
bool VirtualMachine::hasModule(const std::string name) const
{
return (moduleAddress_.find(name) != moduleAddress_.end());
}
// print VM content ////////////////////////////////////////////////////////////
void VirtualMachine::printContent(void) const
{
LOG(Debug) << "Modules: " << std::endl;
for (unsigned int i = 0; i < module_.size(); ++i)
{
LOG(Debug) << std::setw(4) << i << ": "
<< getModuleName(i) << std::endl;
}
}
// module graph ////////////////////////////////////////////////////////////////
Graph<unsigned int> VirtualMachine::getModuleGraph(void)
{
if (graphOutdated_)
{
makeModuleGraph();
graphOutdated_ = false;
}
return graph_;
}
void VirtualMachine::makeModuleGraph(void)
{
Graph<unsigned int> graph;
// create vertices
for (unsigned int m = 0; m < module_.size(); ++m)
{
graph.addVertex(m);
}
// create edges
for (unsigned int m = 0; m < module_.size(); ++m)
{
for (auto &in: module_[m].input)
{
graph.addEdge(env().getObjectModule(in), m);
}
}
graph_ = graph;
}
// memory profile //////////////////////////////////////////////////////////////
const VirtualMachine::MemoryProfile & VirtualMachine::getMemoryProfile(void)
{
if (memoryProfileOutdated_)
{
makeMemoryProfile();
memoryProfileOutdated_ = false;
}
return profile_;
}
void VirtualMachine::makeMemoryProfile(void)
{
bool protect = env().objectsProtected();
bool hmsg = HadronsLogMessage.isActive();
bool gmsg = GridLogMessage.isActive();
bool err = HadronsLogError.isActive();
auto program = getModuleGraph().topoSort();
resetProfile();
profile_.module.resize(getNModule());
env().protectObjects(false);
GridLogMessage.Active(false);
HadronsLogMessage.Active(false);
HadronsLogError.Active(false);
for (auto it = program.rbegin(); it != program.rend(); ++it)
{
auto a = *it;
if (profile_.module[a].empty())
{
LOG(Debug) << "Profiling memory for module '" << module_[a].name
<< "' (" << a << ")..." << std::endl;
memoryProfile(a);
env().freeAll();
}
}
env().protectObjects(protect);
GridLogMessage.Active(gmsg);
HadronsLogMessage.Active(hmsg);
HadronsLogError.Active(err);
LOG(Debug) << "Memory profile:" << std::endl;
LOG(Debug) << "----------------" << std::endl;
for (unsigned int a = 0; a < profile_.module.size(); ++a)
{
LOG(Debug) << getModuleName(a) << " (" << a << ")" << std::endl;
for (auto &o: profile_.module[a])
{
LOG(Debug) << "|__ " << env().getObjectName(o.first) << " ("
<< sizeString(o.second) << ")" << std::endl;
}
LOG(Debug) << std::endl;
}
LOG(Debug) << "----------------" << std::endl;
}
void VirtualMachine::resetProfile(void)
{
profile_.module.clear();
profile_.object.clear();
}
void VirtualMachine::resizeProfile(void)
{
if (env().getMaxAddress() > profile_.object.size())
{
MemoryPrint empty;
empty.size = 0;
empty.module = -1;
profile_.object.resize(env().getMaxAddress(), empty);
}
}
void VirtualMachine::updateProfile(const unsigned int address)
{
resizeProfile();
for (unsigned int a = 0; a < env().getMaxAddress(); ++a)
{
if (env().hasCreatedObject(a) and (profile_.object[a].module == -1))
{
profile_.object[a].size = env().getObjectSize(a);
profile_.object[a].storage = env().getObjectStorage(a);
profile_.object[a].module = address;
profile_.module[address][a] = profile_.object[a].size;
if (env().getObjectModule(a) < 0)
{
env().setObjectModule(a, address);
}
}
}
}
void VirtualMachine::cleanEnvironment(void)
{
resizeProfile();
for (unsigned int a = 0; a < env().getMaxAddress(); ++a)
{
if (env().hasCreatedObject(a) and (profile_.object[a].module == -1))
{
env().freeObject(a);
}
}
}
void VirtualMachine::memoryProfile(const unsigned int address)
{
auto m = getModule(address);
LOG(Debug) << "Setting up module '" << m->getName()
<< "' (" << address << ")..." << std::endl;
try
{
m->setup();
updateProfile(address);
}
catch (Exceptions::Definition &)
{
cleanEnvironment();
for (auto &in: m->getInput())
{
memoryProfile(env().getObjectModule(in));
}
for (auto &ref: m->getReference())
{
memoryProfile(env().getObjectModule(ref));
}
m->setup();
updateProfile(address);
}
}
void VirtualMachine::memoryProfile(const std::string name)
{
memoryProfile(getModuleAddress(name));
}
// garbage collector ///////////////////////////////////////////////////////////
VirtualMachine::GarbageSchedule
VirtualMachine::makeGarbageSchedule(const Program &p) const
{
GarbageSchedule freeProg;
freeProg.resize(p.size());
for (unsigned int a = 0; a < env().getMaxAddress(); ++a)
{
if (env().getObjectStorage(a) == Environment::Storage::temporary)
{
auto it = std::find(p.begin(), p.end(), env().getObjectModule(a));
if (it != p.end())
{
freeProg[std::distance(p.begin(), it)].insert(a);
}
}
else if (env().getObjectStorage(a) == Environment::Storage::object)
{
auto pred = [a, this](const unsigned int b)
{
auto &in = module_[b].input;
auto it = std::find(in.begin(), in.end(), a);
return (it != in.end()) or (b == env().getObjectModule(a));
};
auto it = std::find_if(p.rbegin(), p.rend(), pred);
if (it != p.rend())
{
freeProg[std::distance(it, p.rend()) - 1].insert(a);
}
}
}
return freeProg;
}
// high-water memory function //////////////////////////////////////////////////
VirtualMachine::Size VirtualMachine::memoryNeeded(const Program &p)
{
const MemoryProfile &profile = getMemoryProfile();
GarbageSchedule freep = makeGarbageSchedule(p);
Size current = 0, max = 0;
for (unsigned int i = 0; i < p.size(); ++i)
{
for (auto &o: profile.module[p[i]])
{
current += o.second;
}
max = std::max(current, max);
for (auto &o: freep[i])
{
current -= profile.object[o].size;
}
}
return max;
}
// genetic scheduler ///////////////////////////////////////////////////////////
VirtualMachine::Program VirtualMachine::schedule(const GeneticPar &par)
{
typedef GeneticScheduler<Size, unsigned int> Scheduler;
auto graph = getModuleGraph();
//constrained topological sort using a genetic algorithm
LOG(Message) << "Scheduling computation..." << std::endl;
LOG(Message) << " #module= " << graph.size() << std::endl;
LOG(Message) << " population size= " << par.popSize << std::endl;
LOG(Message) << " max. generation= " << par.maxGen << std::endl;
LOG(Message) << " max. cst. generation= " << par.maxCstGen << std::endl;
LOG(Message) << " mutation rate= " << par.mutationRate << std::endl;
unsigned int k = 0, gen, prevPeak, nCstPeak = 0;
std::random_device rd;
Scheduler::Parameters gpar;
gpar.popSize = par.popSize;
gpar.mutationRate = par.mutationRate;
gpar.seed = rd();
CartesianCommunicator::BroadcastWorld(0, &(gpar.seed), sizeof(gpar.seed));
Scheduler::ObjFunc memPeak = [this](const Program &p)->Size
{
return memoryNeeded(p);
};
Scheduler scheduler(graph, memPeak, gpar);
gen = 0;
do
{
LOG(Debug) << "Generation " << gen << ":" << std::endl;
scheduler.nextGeneration();
if (gen != 0)
{
if (prevPeak == scheduler.getMinValue())
{
nCstPeak++;
}
else
{
nCstPeak = 0;
}
}
prevPeak = scheduler.getMinValue();
if (gen % 10 == 0)
{
LOG(Iterative) << "Generation " << gen << ": "
<< sizeString(scheduler.getMinValue()) << std::endl;
}
gen++;
} while ((gen < par.maxGen) and (nCstPeak < par.maxCstGen));
return scheduler.getMinSchedule();
}
// general execution ///////////////////////////////////////////////////////////
#define BIG_SEP "==============="
#define SEP "---------------"
#define MEM_MSG(size) sizeString(size)
void VirtualMachine::executeProgram(const Program &p) const
{
Size memPeak = 0, sizeBefore, sizeAfter;
GarbageSchedule freeProg;
// build garbage collection schedule
LOG(Debug) << "Building garbage collection schedule..." << std::endl;
freeProg = makeGarbageSchedule(p);
// program execution
LOG(Debug) << "Executing program..." << std::endl;
for (unsigned int i = 0; i < p.size(); ++i)
{
// execute module
LOG(Message) << SEP << " Measurement step " << i + 1 << "/"
<< p.size() << " (module '" << module_[p[i]].name
<< "') " << SEP << std::endl;
(*module_[p[i]].data)();
sizeBefore = env().getTotalSize();
// print used memory after execution
LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore)
<< std::endl;
if (sizeBefore > memPeak)
{
memPeak = sizeBefore;
}
// garbage collection for step i
LOG(Message) << "Garbage collection..." << std::endl;
for (auto &j: freeProg[i])
{
env().freeObject(j);
}
// print used memory after garbage collection if necessary
sizeAfter = env().getTotalSize();
if (sizeBefore != sizeAfter)
{
LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter)
<< std::endl;
}
else
{
LOG(Message) << "Nothing to free" << std::endl;
}
}
}
void VirtualMachine::executeProgram(const std::vector<std::string> &p) const
{
Program pAddress;
for (auto &n: p)
{
pAddress.push_back(getModuleAddress(n));
}
executeProgram(pAddress);
}

View File

@ -0,0 +1,207 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/VirtualMachine.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_VirtualMachine_hpp_
#define Hadrons_VirtualMachine_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Graph.hpp>
#include <Grid/Hadrons/Environment.hpp>
BEGIN_HADRONS_NAMESPACE
#define DEFINE_VM_ALIAS \
inline VirtualMachine & vm(void) const\
{\
return VirtualMachine::getInstance();\
}
/******************************************************************************
* Virtual machine for module execution *
******************************************************************************/
// forward declaration of Module
class ModuleBase;
class VirtualMachine
{
SINGLETON_DEFCTOR(VirtualMachine);
public:
typedef SITE_SIZE_TYPE Size;
typedef std::unique_ptr<ModuleBase> ModPt;
typedef std::vector<std::set<unsigned int>> GarbageSchedule;
typedef std::vector<unsigned int> Program;
struct MemoryPrint
{
Size size;
Environment::Storage storage;
int module;
};
struct MemoryProfile
{
std::vector<std::map<unsigned int, Size>> module;
std::vector<MemoryPrint> object;
};
class GeneticPar: Serializable
{
public:
GeneticPar(void):
popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {};
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar,
unsigned int, popSize,
unsigned int, maxGen,
unsigned int, maxCstGen,
double , mutationRate);
};
private:
struct ModuleInfo
{
const std::type_info *type{nullptr};
std::string name;
ModPt data{nullptr};
std::vector<unsigned int> input;
size_t maxAllocated;
};
public:
// trajectory counter
void setTrajectory(const unsigned int traj);
unsigned int getTrajectory(void) const;
// module management
void pushModule(ModPt &pt);
template <typename M>
void createModule(const std::string name);
template <typename M>
void createModule(const std::string name,
const typename M::Par &par);
void createModule(const std::string name,
const std::string type,
XmlReader &reader);
unsigned int getNModule(void) const;
ModuleBase * getModule(const unsigned int address) const;
ModuleBase * getModule(const std::string name) const;
template <typename M>
M * getModule(const unsigned int address) const;
template <typename M>
M * getModule(const std::string name) const;
unsigned int getModuleAddress(const std::string name) const;
std::string getModuleName(const unsigned int address) const;
std::string getModuleType(const unsigned int address) const;
std::string getModuleType(const std::string name) const;
std::string getModuleNamespace(const unsigned int address) const;
std::string getModuleNamespace(const std::string name) const;
bool hasModule(const unsigned int address) const;
bool hasModule(const std::string name) const;
// print VM content
void printContent(void) const;
// module graph (could be a const reference if topoSort was const)
Graph<unsigned int> getModuleGraph(void);
// memory profile
const MemoryProfile &getMemoryProfile(void);
// garbage collector
GarbageSchedule makeGarbageSchedule(const Program &p) const;
// high-water memory function
Size memoryNeeded(const Program &p);
// genetic scheduler
Program schedule(const GeneticPar &par);
// general execution
void executeProgram(const Program &p) const;
void executeProgram(const std::vector<std::string> &p) const;
private:
// environment shortcut
DEFINE_ENV_ALIAS;
// module graph
void makeModuleGraph(void);
// memory profile
void makeMemoryProfile(void);
void resetProfile(void);
void resizeProfile(void);
void updateProfile(const unsigned int address);
void cleanEnvironment(void);
void memoryProfile(const std::string name);
void memoryProfile(const unsigned int address);
private:
// general
unsigned int traj_;
// module and related maps
std::vector<ModuleInfo> module_;
std::map<std::string, unsigned int> moduleAddress_;
std::string currentModule_{""};
// module graph
bool graphOutdated_{true};
Graph<unsigned int> graph_;
// memory profile
bool memoryProfileOutdated_{true};
MemoryProfile profile_;
};
/******************************************************************************
* VirtualMachine template implementation *
******************************************************************************/
// module management ///////////////////////////////////////////////////////////
template <typename M>
void VirtualMachine::createModule(const std::string name)
{
ModPt pt(new M(name));
pushModule(pt);
}
template <typename M>
void VirtualMachine::createModule(const std::string name,
const typename M::Par &par)
{
ModPt pt(new M(name));
static_cast<M *>(pt.get())->setPar(par);
pushModule(pt);
}
template <typename M>
M * VirtualMachine::getModule(const unsigned int address) const
{
if (auto *pt = dynamic_cast<M *>(getModule(address)))
{
return pt;
}
else
{
HADRON_ERROR(Definition, "module '" + module_[address].name
+ "' does not have type " + typeid(M).name()
+ "(has type: " + getModuleType(address) + ")");
}
}
template <typename M>
M * VirtualMachine::getModule(const std::string name) const
{
return getModule<M>(getModuleAddress(name));
}
END_HADRONS_NAMESPACE
#endif // Hadrons_VirtualMachine_hpp_

View File

@ -2,40 +2,51 @@ modules_cc =\
Modules/MScalar/ChargedProp.cc \
Modules/MScalar/FreeProp.cc \
Modules/MContraction/WeakHamiltonianEye.cc \
Modules/MContraction/WeakHamiltonianNonEye.cc \
Modules/MContraction/WeakNeutral4ptDisc.cc \
Modules/MGauge/StochEm.cc \
Modules/MContraction/WeakHamiltonianNonEye.cc \
Modules/MGauge/Unit.cc \
Modules/MGauge/Load.cc \
Modules/MGauge/StochEm.cc \
Modules/MGauge/Random.cc \
Modules/MGauge/FundtoHirep.cc \
Modules/MGauge/Random.cc
Modules/MScalar/FreeProp.cc \
Modules/MScalar/ChargedProp.cc \
Modules/MIO/LoadNersc.cc
modules_hpp =\
Modules/MLoop/NoiseLoop.hpp \
Modules/MScalar/ChargedProp.hpp \
Modules/MScalar/Scalar.hpp \
Modules/MScalar/FreeProp.hpp \
Modules/MSource/Wall.hpp \
Modules/MContraction/Baryon.hpp \
Modules/MContraction/Meson.hpp \
Modules/MContraction/WeakHamiltonian.hpp \
Modules/MContraction/WeakHamiltonianNonEye.hpp \
Modules/MContraction/DiscLoop.hpp \
Modules/MContraction/WeakNeutral4ptDisc.hpp \
Modules/MContraction/Gamma3pt.hpp \
Modules/MContraction/WardIdentity.hpp \
Modules/MContraction/WeakHamiltonianEye.hpp \
Modules/MFermion/GaugeProp.hpp \
Modules/MSource/SeqGamma.hpp \
Modules/MSource/Point.hpp \
Modules/MSource/Wall.hpp \
Modules/MSource/Z2.hpp \
Modules/MFermion/GaugeProp.hpp \
Modules/MContraction/Meson.hpp \
Modules/MContraction/WeakHamiltonianNonEye.hpp \
Modules/MContraction/WeakHamiltonianEye.hpp \
Modules/MContraction/DiscLoop.hpp \
Modules/MContraction/Baryon.hpp \
Modules/MContraction/Gamma3pt.hpp \
Modules/MContraction/WeakNeutral4ptDisc.hpp \
Modules/MContraction/WeakHamiltonian.hpp \
Modules/MSource/SeqConserved.hpp \
Modules/MSink/Smear.hpp \
Modules/MSink/Point.hpp \
Modules/MSolver/RBPrecCG.hpp \
Modules/MGauge/Unit.hpp \
Modules/MGauge/Random.hpp \
Modules/MGauge/StochEm.hpp \
Modules/MGauge/FundtoHirep.hpp \
Modules/MGauge/Unit.hpp \
Modules/MGauge/Load.hpp \
Modules/MGauge/Random.hpp \
Modules/MAction/WilsonClover.hpp \
Modules/MUtilities/TestSeqGamma.hpp \
Modules/MUtilities/TestSeqConserved.hpp \
Modules/MLoop/NoiseLoop.hpp \
Modules/MScalar/FreeProp.hpp \
Modules/MScalar/Scalar.hpp \
Modules/MScalar/ChargedProp.hpp \
Modules/MAction/DWF.hpp \
Modules/MAction/Wilson.hpp
Modules/MAction/Wilson.hpp \
Modules/MAction/WilsonClover.hpp \
Modules/MScalarSUN/TrMag.hpp \
Modules/MScalarSUN/TwoPoint.hpp \
Modules/MScalarSUN/TrPhi.hpp \
Modules/MIO/LoadNersc.hpp \
Modules/MIO/LoadBinary.hpp

View File

@ -1,28 +1,18 @@
extra_sources=
extra_headers=
if BUILD_COMMS_MPI
extra_sources+=communicator/Communicator_mpi.cc
extra_sources+=communicator/Communicator_base.cc
endif
if BUILD_COMMS_MPI3
extra_sources+=communicator/Communicator_mpi3.cc
extra_sources+=communicator/Communicator_base.cc
endif
if BUILD_COMMS_MPIT
extra_sources+=communicator/Communicator_mpit.cc
extra_sources+=communicator/Communicator_base.cc
endif
if BUILD_COMMS_SHMEM
extra_sources+=communicator/Communicator_shmem.cc
extra_sources+=communicator/Communicator_base.cc
extra_sources+=communicator/SharedMemoryMPI.cc
extra_sources+=communicator/SharedMemory.cc
endif
if BUILD_COMMS_NONE
extra_sources+=communicator/Communicator_none.cc
extra_sources+=communicator/Communicator_base.cc
extra_sources+=communicator/SharedMemoryNone.cc
extra_sources+=communicator/SharedMemory.cc
endif
if BUILD_HDF5

View File

@ -103,29 +103,32 @@ namespace Grid {
GridBase *CoarseGrid;
GridBase *FineGrid;
std::vector<Lattice<Fobj> > subspace;
int checkerboard;
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) :
CoarseGrid(_CoarseGrid),
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
CoarseGrid(_CoarseGrid),
FineGrid(_FineGrid),
subspace(nbasis,_FineGrid)
subspace(nbasis,_FineGrid),
checkerboard(_checkerboard)
{
};
void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
// CheckOrthogonal();
}
void CheckOrthogonal(void){
CoarseVector iProj(CoarseGrid);
CoarseVector eProj(CoarseGrid);
Lattice<CComplex> pokey(CoarseGrid);
for(int i=0;i<nbasis;i++){
blockProject(iProj,subspace[i],subspace);
eProj=zero;
for(int ss=0;ss<CoarseGrid->oSites();ss++){
parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){
eProj._odata[ss](i)=CComplex(1.0);
}
eProj=eProj - iProj;
@ -137,6 +140,7 @@ namespace Grid {
blockProject(CoarseVec,FineVec,subspace);
}
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
FineVec.checkerboard = subspace[0].checkerboard;
blockPromote(CoarseVec,FineVec,subspace);
}
void CreateSubspaceRandom(GridParallelRNG &RNG){
@ -147,6 +151,7 @@ namespace Grid {
Orthogonalise();
}
/*
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
{
// Run a Lanczos with sloppy convergence
@ -195,7 +200,7 @@ namespace Grid {
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
}
}
*/
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
RealD scale;

View File

@ -312,20 +312,34 @@ namespace Grid {
public:
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
GridLogIterative.TimingMode(1);
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
n2 = Mpc(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
ComplexD dot= innerProduct(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
n1 = real(dot);
}
virtual void HermOp(const Field &in, Field &out){
std::cout << GridLogIterative << " HermOp "<<std::endl;
Mpc(in,out);
}
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
_Mat.Meooe(in,tmp);
_Mat.MooeeInv(tmp,out);
_Mat.MeooeDag(out,tmp);
Field tmp2(in._grid);
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
_Mat.Mooee(in,out);
return axpy_norm(out,-1.0,tmp,out);
_Mat.Mooee(out,tmp);
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp2);
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
return nn;
}
virtual RealD MpcDag (const Field &in, Field &out){
return Mpc(in,out);
@ -350,6 +364,14 @@ namespace Grid {
virtual void operator() (const Field &in, Field &out) = 0;
};
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
public:
void operator() (const Field &in, Field &out){
out = in;
};
};
/////////////////////////////////////////////////////////////
// Base classes for Multishift solvers for operators
/////////////////////////////////////////////////////////////
@ -372,6 +394,64 @@ namespace Grid {
};
*/
////////////////////////////////////////////////////////////////////////////////////////////
// Hermitian operator Linear function and operator function
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};
};
template<typename Field>
class PlainHermOp : public LinearFunction<Field> {
public:
LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
{}
void operator()(const Field& in, Field& out) {
_Linop.HermOp(in,out);
}
};
template<typename Field>
class FunctionHermOp : public LinearFunction<Field> {
public:
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)
: _poly(poly), _Linop(linop) {};
void operator()(const Field& in, Field& out) {
_poly(_Linop,in,out);
}
};
template<class Field>
class Polynomial : public OperatorFunction<Field> {
private:
std::vector<RealD> Coeffs;
public:
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field AtoN(in._grid);
Field Mtmp(in._grid);
AtoN = in;
out = AtoN*Coeffs[0];
for(int n=1;n<Coeffs.size();n++){
Mtmp = AtoN;
Linop.HermOp(Mtmp,AtoN);
out=out+AtoN*Coeffs[n];
}
};
};
}

View File

@ -34,41 +34,12 @@ Author: Christoph Lehner <clehner@bnl.gov>
namespace Grid {
////////////////////////////////////////////////////////////////////////////////////////////
// Simple general polynomial with user supplied coefficients
////////////////////////////////////////////////////////////////////////////////////////////
template<class Field>
class HermOpOperatorFunction : public OperatorFunction<Field> {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Linop.HermOp(in,out);
};
};
template<class Field>
class Polynomial : public OperatorFunction<Field> {
private:
std::vector<RealD> Coeffs;
public:
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
Field AtoN(in._grid);
Field Mtmp(in._grid);
AtoN = in;
out = AtoN*Coeffs[0];
// std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl;
// std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl;
for(int n=1;n<Coeffs.size();n++){
Mtmp = AtoN;
Linop.HermOp(Mtmp,AtoN);
out=out+AtoN*Coeffs[n];
// std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl;
// std::cout << n<<" " <<norm2(out)<<std::endl;
}
};
};
struct ChebyParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams,
RealD, alpha,
RealD, beta,
int, Npoly);
};
////////////////////////////////////////////////////////////////////////////////////////////
// Generic Chebyshev approximations
@ -83,8 +54,10 @@ namespace Grid {
public:
void csv(std::ostream &out){
RealD diff = hi-lo;
for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) {
RealD diff = hi-lo;
RealD delta = (hi-lo)*1.0e-9;
for (RealD x=lo; x<hi; x+=delta) {
delta*=1.1;
RealD f = approx(x);
out<< x<<" "<<f<<std::endl;
}
@ -100,6 +73,7 @@ namespace Grid {
};
Chebyshev(){};
Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);};
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};

View File

@ -1,753 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung <chulwoo@bnl.gov>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BIRL_H
#define GRID_BIRL_H
#include <string.h> //memset
#include <zlib.h>
#include <sys/stat.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h>
#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h>
namespace Grid {
/////////////////////////////////////////////////////////////
// Implicitly restarted lanczos
/////////////////////////////////////////////////////////////
template<class Field>
class BlockImplicitlyRestartedLanczos {
const RealD small = 1.0e-16;
public:
int lock;
int get;
int Niter;
int converged;
int Nminres; // Minimum number of restarts; only check for convergence after
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
int Np; // Np -- Number of spare vecs in kryloc space
int Nm; // Nm -- total number of vectors
int orth_period;
RealD OrthoTime;
RealD eresid, betastp;
SortEigen<Field> _sort;
LinearFunction<Field> &_HermOp;
LinearFunction<Field> &_HermOpTest;
/////////////////////////
// Constructor
/////////////////////////
BlockImplicitlyRestartedLanczos(
LinearFunction<Field> & HermOp,
LinearFunction<Field> & HermOpTest,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
RealD _betastp, // if beta(k) < betastp: converged
int _Niter, // Max iterations
int _Nminres, int _orth_period = 1) :
_HermOp(HermOp),
_HermOpTest(HermOpTest),
Nstop(_Nstop),
Nk(_Nk),
Nm(_Nm),
eresid(_eresid),
betastp(_betastp),
Niter(_Niter),
Nminres(_Nminres),
orth_period(_orth_period)
{
Np = Nm-Nk; assert(Np>0);
};
BlockImplicitlyRestartedLanczos(
LinearFunction<Field> & HermOp,
LinearFunction<Field> & HermOpTest,
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
RealD _betastp, // if beta(k) < betastp: converged
int _Niter, // Max iterations
int _Nminres,
int _orth_period = 1) :
_HermOp(HermOp),
_HermOpTest(HermOpTest),
Nstop(_Nk),
Nk(_Nk),
Nm(_Nm),
eresid(_eresid),
betastp(_betastp),
Niter(_Niter),
Nminres(_Nminres),
orth_period(_orth_period)
{
Np = Nm-Nk; assert(Np>0);
};
/* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 0, v0 0
2. For k = 1,2,...,m Do:
3. wk:=Avkβkv_{k1}
4. αk:=(wk,vk) //
5. wk:=wkαkvk // wk orthog vk
6. βk+1 := wk2. If βk+1 = 0 then Stop
7. vk+1 := wk/βk+1
8. EndDo
*/
void step(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
BasisFieldVector<Field>& evec,
Field& w,int Nm,int k)
{
assert( k< Nm );
GridStopWatch gsw_op,gsw_o;
Field& evec_k = evec[k];
gsw_op.Start();
_HermOp(evec_k,w);
gsw_op.Stop();
if(k>0){
w -= lme[k-1] * evec[k-1];
}
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
RealD alph = real(zalph);
w = w - alph * evec_k;// 5. wk:=wkαkvk
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
std::cout<<GridLogMessage << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
const RealD tiny = 1.0e-20;
if ( beta < tiny ) {
std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl;
}
lmd[k] = alph;
lme[k] = beta;
gsw_o.Start();
if (k>0 && k % orth_period == 0) {
orthogonalize(w,evec,k); // orthonormalise
}
gsw_o.Stop();
if(k < Nm-1) {
evec[k+1] = w;
}
std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() <<
" orth=" << gsw_o.Elapsed() << std::endl;
}
void qr_decomp(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int Nk,
int Nm,
std::vector<RealD>& Qt,
RealD Dsh,
int kmin,
int kmax)
{
int k = kmin-1;
RealD x;
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
RealD c = ( lmd[k] -Dsh) *Fden;
RealD s = -lme[k] *Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
x =-s*lme[k+1];
lme[k+1] = c*lme[k+1];
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt[i+Nm*k ];
RealD Qtmp2 = Qt[i+Nm*(k+1)];
Qt[i+Nm*k ] = c*Qtmp1 - s*Qtmp2;
Qt[i+Nm*(k+1)] = s*Qtmp1 + c*Qtmp2;
}
// Givens transformations
for(int k = kmin; k < kmax-1; ++k){
RealD Fden = 1.0/hypot(x,lme[k-1]);
RealD c = lme[k-1]*Fden;
RealD s = - x*Fden;
RealD tmpa1 = lmd[k];
RealD tmpa2 = lmd[k+1];
RealD tmpb = lme[k];
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
lme[k-1] = c*lme[k-1] -s*x;
if(k != kmax-2){
x = -s*lme[k+1];
lme[k+1] = c*lme[k+1];
}
for(int i=0; i<Nk; ++i){
RealD Qtmp1 = Qt[i+Nm*k ];
RealD Qtmp2 = Qt[i+Nm*(k+1)];
Qt[i+Nm*k ] = c*Qtmp1 -s*Qtmp2;
Qt[i+Nm*(k+1)] = s*Qtmp1 +c*Qtmp2;
}
}
}
#ifdef USE_LAPACK_IRL
#define LAPACK_INT int
//long long
void diagonalize_lapack(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int N1,
int N2,
std::vector<RealD>& Qt,
GridBase *grid){
std::cout << GridLogMessage << "diagonalize_lapack start\n";
GridStopWatch gsw;
const int size = Nm;
// tevals.resize(size);
// tevecs.resize(size);
LAPACK_INT NN = N1;
std::vector<double> evals_tmp(NN);
std::vector<double> evec_tmp(NN*NN);
memset(&evec_tmp[0],0,sizeof(double)*NN*NN);
// double AA[NN][NN];
std::vector<double> DD(NN);
std::vector<double> EE(NN);
for (int i = 0; i< NN; i++)
for (int j = i - 1; j <= i + 1; j++)
if ( j < NN && j >= 0 ) {
if (i==j) DD[i] = lmd[i];
if (i==j) evals_tmp[i] = lmd[i];
if (j==(i-1)) EE[j] = lme[j];
}
LAPACK_INT evals_found;
LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
LAPACK_INT liwork = 3+NN*10 ;
std::vector<LAPACK_INT> iwork(liwork);
std::vector<double> work(lwork);
std::vector<LAPACK_INT> isuppz(2*NN);
char jobz = 'V'; // calculate evals & evecs
char range = 'I'; // calculate all evals
// char range = 'A'; // calculate all evals
char uplo = 'U'; // refer to upper half of original matrix
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
std::vector<int> ifail(NN);
LAPACK_INT info;
// int total = QMP_get_number_of_nodes();
// int node = QMP_get_node_number();
// GridBase *grid = evec[0]._grid;
int total = grid->_Nprocessors;
int node = grid->_processor;
int interval = (NN/total)+1;
double vl = 0.0, vu = 0.0;
LAPACK_INT il = interval*node+1 , iu = interval*(node+1);
if (iu > NN) iu=NN;
double tol = 0.0;
if (1) {
memset(&evals_tmp[0],0,sizeof(double)*NN);
if ( il <= NN){
std::cout << GridLogMessage << "dstegr started" << std::endl;
gsw.Start();
dstegr(&jobz, &range, &NN,
(double*)&DD[0], (double*)&EE[0],
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
&tol, // tolerance
&evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN,
&isuppz[0],
&work[0], &lwork, &iwork[0], &liwork,
&info);
gsw.Stop();
std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl;
for (int i = iu-1; i>= il-1; i--){
evals_tmp[i] = evals_tmp[i - (il-1)];
if (il>1) evals_tmp[i-(il-1)]=0.;
for (int j = 0; j< NN; j++){
evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j];
if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.;
}
}
}
{
// QMP_sum_double_array(evals_tmp,NN);
// QMP_sum_double_array((double *)evec_tmp,NN*NN);
grid->GlobalSumVector(&evals_tmp[0],NN);
grid->GlobalSumVector(&evec_tmp[0],NN*NN);
}
}
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
for(int i=0;i<NN;i++){
for(int j=0;j<NN;j++)
Qt[(NN-1-i)*N2+j]=evec_tmp[i*NN + j];
lmd [NN-1-i]=evals_tmp[i];
}
std::cout << GridLogMessage << "diagonalize_lapack complete\n";
}
#undef LAPACK_INT
#endif
void diagonalize(std::vector<RealD>& lmd,
std::vector<RealD>& lme,
int N2,
int N1,
std::vector<RealD>& Qt,
GridBase *grid)
{
#ifdef USE_LAPACK_IRL
const int check_lapack=0; // just use lapack if 0, check against lapack if 1
if(!check_lapack)
return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
std::vector <RealD> lmd2(N1);
std::vector <RealD> lme2(N1);
std::vector<RealD> Qt2(N1*N1);
for(int k=0; k<N1; ++k){
lmd2[k] = lmd[k];
lme2[k] = lme[k];
}
for(int k=0; k<N1*N1; ++k)
Qt2[k] = Qt[k];
// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
#endif
int Niter = 10000*N1;
int kmin = 1;
int kmax = N2;
// (this should be more sophisticated)
for(int iter=0; ; ++iter){
if ( (iter+1)%(100*N1)==0)
std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n";
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
Niter = iter;
#ifdef USE_LAPACK_IRL
if(check_lapack){
const double SMALL=1e-8;
diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
std::vector <RealD> lmd3(N2);
for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
_sort.push(lmd3,N2);
_sort.push(lmd2,N2);
for(int k=0; k<N2; ++k){
if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
}
for(int k=0; k<N1*N1; ++k){
// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
}
}
#endif
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
}
}
std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
abort();
}
#if 1
template<typename T>
static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void orthogonalize(Field& w,
BasisFieldVector<Field>& evec,
int k)
{
double t0=-usecond()/1e6;
evec.orthogonalize(w,k);
normalise(w);
t0+=usecond()/1e6;
OrthoTime +=t0;
}
void setUnit_Qt(int Nm, std::vector<RealD> &Qt) {
for(int i=0; i<Qt.size(); ++i) Qt[i] = 0.0;
for(int k=0; k<Nm; ++k) Qt[k + k*Nm] = 1.0;
}
/* Rudy Arthur's thesis pp.137
------------------------
Require: M > K P = M K
Compute the factorization AVM = VM HM + fM eM
repeat
Q=I
for i = 1,...,P do
QiRi =HM θiI Q = QQi
H M = Q i H M Q i
end for
βK =HM(K+1,K) σK =Q(M,K)
r=vK+1βK +rσK
VK =VM(1:M)Q(1:M,1:K)
HK =HM(1:K,1:K)
AVK =VKHK +fKeK Extend to an M = K + P step factorization AVM = VMHM + fMeM
until convergence
*/
void calc(std::vector<RealD>& eval,
BasisFieldVector<Field>& evec,
const Field& src,
int& Nconv,
bool reverse,
int SkipTest)
{
GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid;
assert(grid == src._grid);
std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl;
std::cout<<GridLogMessage << " -- size of eval = " << eval.size() << std::endl;
std::cout<<GridLogMessage << " -- size of evec = " << evec.size() << std::endl;
assert(Nm <= evec.size() && Nm <= eval.size());
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
RealD evalMaxApprox = 0.0;
{
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_IRL_MEVAPP_ = 50;
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
_HermOpTest(src_n,tmp);
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
}
std::vector<RealD> lme(Nm);
std::vector<RealD> lme2(Nm);
std::vector<RealD> eval2(Nm);
std::vector<RealD> eval2_copy(Nm);
std::vector<RealD> Qt(Nm*Nm);
Field f(grid);
Field v(grid);
int k1 = 1;
int k2 = Nk;
Nconv = 0;
RealD beta_k;
// Set initial vector
evec[0] = src;
normalise(evec[0]);
std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0])<<std::endl;
// Initial Nk steps
OrthoTime=0.;
double t0=usecond()/1e6;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
double t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
t1=usecond()/1e6;
// Restarting loop begins
for(int iter = 0; iter<Niter; ++iter){
std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl;
//
// Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs.
// We loop over
//
OrthoTime=0.;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
f *= lme[Nm-1];
t1=usecond()/1e6;
// getting eigenvalues
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k+k1-1];
lme2[k] = lme[k+k1-1];
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// sorting
eval2_copy = eval2;
_sort.push(eval2,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
// Implicitly shifted QR transformations
setUnit_Qt(Nm,Qt);
for(int ip=0; ip<k2; ++ip){
std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl;
}
for(int ip=k2; ip<Nm; ++ip){
std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
}
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
assert(k2<Nm);
assert(k2<Nm);
assert(k1>0);
evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
fflush(stdout);
// Compressed vector f and beta(k2)
f *= Qt[Nm-1+Nm*(k2-1)];
f += lme[k2-1] * evec[k2];
beta_k = norm2(f);
beta_k = sqrt(beta_k);
std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k;
evec[k2] = betar * f;
lme[k2-1] = beta_k;
// Convergence test
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k];
lme2[k] = lme[k];
std::cout<<GridLogMessage << "eval2[" << k << "] = " << eval2[k] << std::endl;
}
setUnit_Qt(Nm,Qt);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
Nconv = 0;
if (iter >= Nminres) {
std::cout << GridLogMessage << "Rotation to test convergence " << std::endl;
Field ev0_orig(grid);
ev0_orig = evec[0];
evec.rotate(Qt,0,Nk,0,Nk,Nm);
{
std::cout << GridLogMessage << "Test convergence" << std::endl;
Field B(grid);
for(int j = 0; j<Nk; j+=SkipTest){
B=evec[j];
//std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl;
B.checkerboard = evec[0].checkerboard;
_HermOpTest(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval2[j] = vnum/vden;
v -= eval2[j]*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] "
<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv
<<" "<< vnum/(sqrt(vden)*sqrt(vv0))
<< " norm(B["<<j<<"])="<< vden <<std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (j == Nconv) ){
Nconv+=SkipTest;
}
}
// test if we converged, if so, terminate
t1=usecond()/1e6;
std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl;
if( Nconv>=Nstop || beta_k < betastp){
goto converged;
}
std::cout << GridLogMessage << "Rotate back" << std::endl;
//B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss];
{
Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk);
for (int k=0;k<Nk;k++)
for (int j=0;j<Nk;j++)
qm(j,k) = Qt[k+Nm*j];
GridStopWatch timeInv;
timeInv.Start();
Eigen::MatrixXd qmI = qm.inverse();
timeInv.Stop();
std::vector<RealD> QtI(Nm*Nm);
for (int k=0;k<Nk;k++)
for (int j=0;j<Nk;j++)
QtI[k+Nm*j] = qmI(j,k);
RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 )
assert(res_check_rotate_inverse < 1e-7);
evec.rotate(QtI,0,Nk,0,Nk,Nm);
axpy(ev0_orig,-1.0,evec[0],ev0_orig);
std::cout << GridLogMessage << "Rotation done (in " << timeInv.Elapsed() << " = " << timeInv.useconds() << " us" <<
", error = " << res_check_rotate_inverse <<
"); | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl;
}
}
} else {
std::cout << GridLogMessage << "iter < Nminres: do not yet test for convergence\n";
} // end of iter loop
}
std::cout<<GridLogMessage<<"\n NOT converged.\n";
abort();
converged:
if (SkipTest == 1) {
eval = eval2;
} else {
// test quickly
for (int j=0;j<Nstop;j+=SkipTest) {
std::cout<<GridLogMessage << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl;
}
eval2_copy.resize(eval2.size());
eval = eval2_copy;
}
evec.sortInPlace(eval,reverse);
{
// test
for (int j=0;j<Nstop;j++) {
std::cout<<GridLogMessage << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl;
}
}
//_sort.push(eval,evec,Nconv);
//evec.sort(eval,Nconv);
std::cout<<GridLogMessage << "\n Converged\n Summary :\n";
std::cout<<GridLogMessage << " -- Iterations = "<< Nconv << "\n";
std::cout<<GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
std::cout<<GridLogMessage << " -- Nconv = "<< Nconv << "\n";
}
#endif
};
}
#endif

View File

@ -1,163 +0,0 @@
namespace Grid {
template<class Field>
class BasisFieldVector {
public:
int _Nm;
typedef typename Field::scalar_type Coeff_t;
typedef typename Field::vector_type vCoeff_t;
typedef typename Field::vector_object vobj;
typedef typename vobj::scalar_object sobj;
std::vector<Field> _v; // _Nfull vectors
void report(int n,GridBase* value) {
std::cout << GridLogMessage << "BasisFieldVector allocated:\n";
std::cout << GridLogMessage << " Delta N = " << n << "\n";
std::cout << GridLogMessage << " Size of full vectors (size) = " <<
((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n";
std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl;
value->Barrier();
if (value->IsBoss()) {
system("cat /proc/meminfo");
}
value->Barrier();
}
BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) {
report(Nm,value);
}
~BasisFieldVector() {
}
Field& operator[](int i) {
return _v[i];
}
void orthogonalize(Field& w, int k) {
for(int j=0; j<k; ++j){
Coeff_t ip = (Coeff_t)innerProduct(_v[j],w);
w = w - ip*_v[j];
}
}
void rotate(std::vector<RealD>& Qt,int j0, int j1, int k0,int k1,int Nm) {
GridBase* grid = _v[0]._grid;
#pragma omp parallel
{
std::vector < vobj > B(Nm);
#pragma omp for
for(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt[k+Nm*j] * _v[k]._odata[ss];
}
}
for(int j=j0; j<j1; ++j){
_v[j]._odata[ss] = B[j];
}
}
}
}
size_t size() const {
return _Nm;
}
void resize(int n) {
if (n > _Nm)
_v.reserve(n);
_v.resize(n,_v[0]._grid);
if (n < _Nm)
_v.shrink_to_fit();
report(n - _Nm,_v[0]._grid);
_Nm = n;
}
std::vector<int> getIndex(std::vector<RealD>& sort_vals) {
std::vector<int> idx(sort_vals.size());
iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
sort(idx.begin(), idx.end(),
[&sort_vals](int i1, int i2) {return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);});
return idx;
}
void reorderInPlace(std::vector<RealD>& sort_vals, std::vector<int>& idx) {
GridStopWatch gsw;
gsw.Start();
int nswaps = 0;
for (size_t i=0;i<idx.size();i++) {
if (idx[i] != i) {
// find proper place (this could be done in logarithmic time, don't bother for now)
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(j!=idx.size());
Field _t(_v[0]._grid);
_t = _v[idx[j]];
_v[idx[j]] = _v[idx[i]];
_v[idx[i]] = _t;
RealD _td = sort_vals[idx[j]];
sort_vals[idx[j]] = sort_vals[idx[i]];
sort_vals[idx[i]] = _td;
int _tt = idx[i];
idx[i] = idx[j];
idx[j] = _tt;
nswaps++;
}
}
// sort values
gsw.Stop();
std::cout << GridLogMessage << "Sorted eigenspace in place in " << gsw.Elapsed() << " using " << nswaps << " swaps" << std::endl;
}
void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) {
std::vector<int> idx = getIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
reorderInPlace(sort_vals,idx);
}
void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = zero;
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
};
}

View File

@ -78,12 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
cp = a;
ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq;
@ -92,7 +92,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
return;
}
std::cout << GridLogIterative << std::setprecision(4)
std::cout << GridLogIterative << std::setprecision(8)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
GridStopWatch LinalgTimer;

View File

@ -7,8 +7,9 @@
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung
Author: Guido Cossu
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Chulwoo Jung <chulwoo@bnl.gov>
Author: Christoph Lehner <clehner@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -27,125 +28,288 @@ Author: Guido Cossu
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_IRL_H
#define GRID_IRL_H
#ifndef GRID_BIRL_H
#define GRID_BIRL_H
#include <string.h> //memset
//#include <zlib.h>
#include <sys/stat.h>
namespace Grid {
namespace Grid {
enum IRLdiagonalisation {
IRLdiagonaliseWithDSTEGR,
IRLdiagonaliseWithQR,
IRLdiagonaliseWithEigen
};
////////////////////////////////////////////////////////////////////////////////
// Helper class for sorting the evalues AND evectors by Field
// Use pointer swizzle on vectors
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
// Move following 100 LOC to lattice/Lattice_basis.h
////////////////////////////////////////////////////////
template<class Field>
class SortEigen {
private:
static bool less_lmd(RealD left,RealD right){
return left > right;
}
static bool less_pair(std::pair<RealD,Field const*>& left,
std::pair<RealD,Field const*>& right){
return left.first > (right.first);
}
public:
void push(std::vector<RealD>& lmd,std::vector<Field>& evec,int N) {
////////////////////////////////////////////////////////////////////////
// PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set.
// : The vector reorder should be done by pointer swizzle somehow
////////////////////////////////////////////////////////////////////////
std::vector<Field> cpy(lmd.size(),evec[0]._grid);
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
std::vector<std::pair<RealD, Field const*> > emod(lmd.size());
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
{
for(int j=0; j<k; ++j){
auto ip = innerProduct(basis[j],w);
w = w - ip*basis[j];
}
}
for(int i=0;i<lmd.size();++i) emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
typename std::vector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
for(int i=0;i<N;++i){
lmd[i]=it->first;
evec[i]=*(it->second);
++it;
template<class Field>
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
{
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0]._grid;
parallel_region
{
std::vector < vobj > B(Nm); // Thread private
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt(j,k) * basis[k]._odata[ss];
}
}
for(int j=j0; j<j1; ++j){
basis[j]._odata[ss] = B[j];
}
}
}
void push(std::vector<RealD>& lmd,int N) {
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
}
// Extract a single rotated vector
template<class Field>
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
{
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0]._grid;
result.checkerboard = basis[0].checkerboard;
parallel_for(int ss=0;ss < grid->oSites();ss++){
vobj B = zero;
for(int k=k0; k<k1; ++k){
B +=Qt(j,k) * basis[k]._odata[ss];
}
result._odata[ss] = B;
}
bool saturated(RealD lmd, RealD thrs) {
return fabs(lmd) > fabs(thrs);
}
template<class Field>
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
{
int vlen = idx.size();
assert(vlen>=1);
assert(vlen<=sort_vals.size());
assert(vlen<=_v.size());
for (size_t i=0;i<vlen;i++) {
if (idx[i] != i) {
//////////////////////////////////////
// idx[i] is a table of desired sources giving a permutation.
// Swap v[i] with v[idx[i]].
// Find j>i for which _vnew[j] = _vold[i],
// track the move idx[j] => idx[i]
// track the move idx[i] => i
//////////////////////////////////////
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy
std::swap(sort_vals[i],sort_vals[idx[i]]);
idx[j] = idx[i];
idx[i] = i;
}
}
};
}
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
{
std::vector<int> idx(sort_vals.size());
std::iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
});
return idx;
}
template<class Field>
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
{
std::vector<int> idx = basisSortGetIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
basisReorderInPlace(_v,sort_vals,idx);
}
// PAB: faster to compute the inner products first then fuse loops.
// If performance critical can improve.
template<class Field>
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = zero;
assert(_v.size()==eval.size());
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
/////////////////////////////////////////////////////////////
// Implicitly restarted lanczos
/////////////////////////////////////////////////////////////
template<class Field> class ImplicitlyRestartedLanczosTester
{
public:
virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
};
enum IRLdiagonalisation {
IRLdiagonaliseWithDSTEGR,
IRLdiagonaliseWithQR,
IRLdiagonaliseWithEigen
};
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
{
public:
LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
{
return TestConvergence(j,resid,B,eval,evalMaxApprox);
}
int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
{
Field v(B);
RealD eval_poly = eval;
// Apply operator
_HermOp(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval = vnum/vden;
v -= eval*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
};
template<class Field>
class ImplicitlyRestartedLanczos {
private:
int MaxIter; // Max iterations
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
int Nm; // Nm -- total number of vectors
RealD eresid;
private:
const RealD small = 1.0e-8;
int MaxIter;
int MinRestart; // Minimum number of restarts; only check for convergence after
int Nstop; // Number of evecs checked for convergence
int Nk; // Number of converged sought
// int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk
int Nm; // Nm -- total number of vectors
IRLdiagonalisation diagonalisation;
////////////////////////////////////
int orth_period;
RealD OrthoTime;
RealD eresid, betastp;
////////////////////////////////
// Embedded objects
////////////////////////////////////
SortEigen<Field> _sort;
LinearOperatorBase<Field> &_Linop;
OperatorFunction<Field> &_poly;
////////////////////////////////
LinearFunction<Field> &_PolyOp;
LinearFunction<Field> &_HermOp;
ImplicitlyRestartedLanczosTester<Field> &_Tester;
// Default tester provided (we need a ref to something in default case)
ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
/////////////////////////
// Constructor
/////////////////////////
public:
ImplicitlyRestartedLanczos(LinearOperatorBase<Field> &Linop, // op
OperatorFunction<Field> & poly, // polynomial
int _Nstop, // really sought vecs
int _Nk, // sought vecs
int _Nm, // total vecs
RealD _eresid, // resid in lmd deficit
int _MaxIter, // Max iterations
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen ) :
_Linop(Linop), _poly(poly),
Nstop(_Nstop), Nk(_Nk), Nm(_Nm),
eresid(_eresid), MaxIter(_MaxIter),
diagonalisation(_diagonalisation)
{ };
//////////////////////////////////////////////////////////////////
// PAB:
//////////////////////////////////////////////////////////////////
// Too many options & knobs.
// Eliminate:
// orth_period
// betastp
// MinRestart
//
// Do we really need orth_period
// What is the theoretical basis & guarantees of betastp ?
// Nstop=Nk viable?
// MinRestart avoidable with new convergence test?
// Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
// HermOp could be eliminated if we dropped the Power method for max eval.
// -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
//////////////////////////////////////////////////////////////////
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
LinearFunction<Field> & HermOp,
ImplicitlyRestartedLanczosTester<Field> & Tester,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
eresid(_eresid), betastp(_betastp),
MaxIter(_MaxIter) , MinRestart(_MinRestart),
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
LinearFunction<Field> & HermOp,
int _Nstop, // sought vecs
int _Nk, // sought vecs
int _Nm, // spare vecs
RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
eresid(_eresid), betastp(_betastp),
MaxIter(_MaxIter) , MinRestart(_MinRestart),
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
////////////////////////////////
// Helpers
////////////////////////////////
static RealD normalise(Field& v)
template<typename T> static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void orthogonalize(Field& w, std::vector<Field>& evec, int k)
void orthogonalize(Field& w, std::vector<Field>& evec,int k)
{
typedef typename Field::scalar_type MyComplex;
MyComplex ip;
for(int j=0; j<k; ++j){
ip = innerProduct(evec[j],w);
w = w - ip * evec[j];
}
OrthoTime-=usecond()/1e6;
basisOrthogonalize(evec,w,k);
normalise(w);
OrthoTime+=usecond()/1e6;
}
/* Rudy Arthur's thesis pp.137
@ -165,184 +329,238 @@ repeat
AVK =VKHK +fKeK Extend to an M = K + P step factorization AVM = VMHM + fMeM
until convergence
*/
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv)
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=false)
{
GridBase *grid = src._grid;
assert(grid == evec[0]._grid);
GridBase *grid = evec[0]._grid;
assert(grid == src._grid);
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
std::cout << GridLogMessage <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
std::cout << GridLogMessage <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
std::cout << GridLogMessage <<" -- size of eval = " << eval.size() << std::endl;
std::cout << GridLogMessage <<" -- size of evec = " << evec.size() << std::endl;
GridLogIRL.TimingMode(1);
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
std::cout << GridLogMessage << "Diagonalisation is DSTEGR "<<std::endl;
std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
std::cout << GridLogMessage << "Diagonalisation is QR "<<std::endl;
std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
std::cout << GridLogMessage << "Diagonalisation is Eigen "<<std::endl;
std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
}
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
assert(Nm <= evec.size() && Nm <= eval.size());
assert(Nm == evec.size() && Nm == eval.size());
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
RealD evalMaxApprox = 0.0;
{
auto src_n = src;
auto tmp = src;
const int _MAX_ITER_IRL_MEVAPP_ = 50;
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
normalise(src_n);
_HermOp(src_n,tmp);
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vden = norm2(src_n);
RealD na = vnum/vden;
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na;
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp;
}
}
std::vector<RealD> lme(Nm);
std::vector<RealD> lme2(Nm);
std::vector<RealD> eval2(Nm);
std::vector<RealD> eval2_copy(Nm);
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
std::vector<int> Iconv(Nm);
std::vector<Field> B(Nm,grid); // waste of space replicating
Field f(grid);
Field v(grid);
int k1 = 1;
int k2 = Nk;
Nconv = 0;
RealD beta_k;
Nconv = 0;
// Set initial vector
evec[0] = src;
std::cout << GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl;
normalise(evec[0]);
std::cout << GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
// Initial Nk steps
OrthoTime=0.;
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
//////////////////////////////////
// Restarting loop begins
//////////////////////////////////
int iter;
for(iter = 0; iter<MaxIter; ++iter){
OrthoTime=0.;
std::cout<< GridLogMessage <<" **********************"<< std::endl;
std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
std::cout<< GridLogMessage <<" **********************"<< std::endl;
std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
f *= lme[Nm-1];
std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
//////////////////////////////////
// getting eigenvalues
//////////////////////////////////
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k+k1-1];
lme2[k] = lme[k+k1-1];
}
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
//////////////////////////////////
// sorting
_sort.push(eval2,Nm);
//////////////////////////////////
eval2_copy = eval2;
std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
const int chunk=8;
for(int io=0; io<k2;io+=chunk){
std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
for(int ii=0;ii<chunk;ii++){
if ( (io+ii)<k2 )
std::cout<< " "<< std::setw(12)<< eval2[io+ii];
}
std::cout << std::endl;
}
//////////////////////////////////
// Implicitly shifted QR transformations
//////////////////////////////////
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
for(int ip=k2; ip<Nm; ++ip){
// Eigen replacement for qr_decomp ???
qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
}
for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
for(int j=k1-1; j<k2+1; ++j){
for(int k=0; k<Nm; ++k){
B[j].checkerboard = evec[k].checkerboard;
B[j] += Qt(j,k) * evec[k];
}
}
for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
assert(k2<Nm); assert(k2<Nm); assert(k1>0);
basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
std::cout<<GridLogIRL <<"basisRotated by Qt"<<std::endl;
////////////////////////////////////////////////////
// Compressed vector f and beta(k2)
////////////////////////////////////////////////////
f *= Qt(k2-1,Nm-1);
f += lme[k2-1] * evec[k2];
beta_k = norm2(f);
beta_k = sqrt(beta_k);
std::cout<< GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
RealD betar = 1.0/beta_k;
evec[k2] = betar * f;
lme[k2-1] = beta_k;
////////////////////////////////////////////////////
// Convergence test
////////////////////////////////////////////////////
for(int k=0; k<Nm; ++k){
eval2[k] = eval[k];
lme2[k] = lme[k];
}
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
for(int k = 0; k<Nk; ++k) B[k]=0.0;
for(int j = 0; j<Nk; ++j){
for(int k = 0; k<Nk; ++k){
B[j].checkerboard = evec[k].checkerboard;
B[j] += Qt(j,k) * evec[k];
}
}
std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
Nconv = 0;
for(int i=0; i<Nk; ++i){
_Linop.HermOp(B[i],v);
RealD vnum = real(innerProduct(B[i],v)); // HermOp.
RealD vden = norm2(B[i]);
eval2[i] = vnum/vden;
v -= eval2[i]*B[i];
RealD vv = norm2(v);
std::cout.precision(13);
std::cout << GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
std::cout << " |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
if((vv<eresid*eresid) && (i == Nconv) ){
Iconv[Nconv] = i;
++Nconv;
}
} // i-loop end
std::cout<< GridLogMessage <<" #modes converged: "<<Nconv<<std::endl;
if (iter >= MinRestart) {
if( Nconv>=Nstop ){
goto converged;
}
} // end of iter loop
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout<< GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged.";
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
Field B(grid); B.checkerboard = evec[0].checkerboard;
// power of two search pattern; not every evalue in eval2 is assessed.
for(int jj = 1; jj<=Nstop; jj*=2){
int j = Nstop-jj;
RealD e = eval2_copy[j]; // Discard the evalue
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
if ( j > Nconv ) {
Nconv=j+1;
jj=Nstop; // Terminate the scan
}
}
}
// Do evec[0] for good measure
{
int j=0;
RealD e = eval2_copy[0];
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
}
// test if we converged, if so, terminate
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
// if( Nconv>=Nstop || beta_k < betastp){
if( Nconv>=Nstop){
goto converged;
}
} else {
std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
} // end of iter loop
}
std::cout<<GridLogError<<"\n NOT converged.\n";
abort();
converged:
// Sorting
eval.resize(Nconv);
evec.resize(Nconv,grid);
for(int i=0; i<Nconv; ++i){
eval[i] = eval2[Iconv[i]];
evec[i] = B[Iconv[i]];
{
Field B(grid); B.checkerboard = evec[0].checkerboard;
basisRotate(evec,Qt,0,Nk,0,Nk,Nm);
std::cout << GridLogIRL << " Rotated basis"<<std::endl;
Nconv=0;
//////////////////////////////////////////////////////////////////////
// Full final convergence test; unconditionally applied
//////////////////////////////////////////////////////////////////////
for(int j = 0; j<=Nk; j++){
B=evec[j];
if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
Nconv++;
}
}
if ( Nconv < Nstop )
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
eval=eval2;
//Keep only converged
eval.resize(Nconv);// Nstop?
evec.resize(Nconv,grid);// Nstop?
basisSortInPlace(evec,eval,reverse);
}
_sort.push(eval,evec,Nconv);
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogMessage << " -- Iterations = "<< iter << "\n";
std::cout << GridLogMessage << " -- beta(k) = "<< beta_k << "\n";
std::cout << GridLogMessage << " -- Nconv = "<< Nconv << "\n";
std::cout << GridLogMessage <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n";
std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n";
std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n";
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
}
private:
private:
/* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 0, v0 0
2. For k = 1,2,...,m Do:
@ -360,28 +578,38 @@ private:
{
const RealD tiny = 1.0e-20;
assert( k< Nm );
_poly(_Linop,evec[k],w); // 3. wk:=Avkβkv_{k1}
GridStopWatch gsw_op,gsw_o;
Field& evec_k = evec[k];
_PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl;
if(k>0) w -= lme[k-1] * evec[k-1];
ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk)
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
RealD alph = real(zalph);
w = w - alph * evec[k];// 5. wk:=wkαkvk
w = w - alph * evec_k;// 5. wk:=wkαkvk
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
// 7. vk+1 := wk/βk+1
lmd[k] = alph;
lme[k] = beta;
if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise
if ( k < Nm-1) evec[k+1] = w;
if ( beta < tiny ) std::cout << GridLogMessage << " beta is tiny "<<beta<<std::endl;
if (k>0 && k % orth_period == 0) {
orthogonalize(w,evec,k); // orthonormalise
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
}
if(k < Nm-1) evec[k+1] = w;
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
if ( beta < tiny )
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
}
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt, // Nm x Nm
@ -404,11 +632,11 @@ private:
}
}
}
///////////////////////////////////////////////////////////////////////////
// File could end here if settle on Eigen ???
///////////////////////////////////////////////////////////////////////////
void qr_decomp(std::vector<RealD>& lmd, // Nm
///////////////////////////////////////////////////////////////////////////
// File could end here if settle on Eigen ??? !!!
///////////////////////////////////////////////////////////////////////////
void QR_decomp(std::vector<RealD>& lmd, // Nm
std::vector<RealD>& lme, // Nm
int Nk, int Nm, // Nk, Nm
Eigen::MatrixXd& Qt, // Nm x Nm matrix
@ -575,51 +803,50 @@ void diagonalize_lapack(std::vector<RealD>& lmd,
#endif
}
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt,
GridBase *grid)
{
int Niter = 100*Nm;
int kmin = 1;
int kmax = Nk;
// (this should be more sophisticated)
for(int iter=0; iter<Niter; ++iter){
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
Niter = iter;
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
int Nk, int Nm,
Eigen::MatrixXd & Qt,
GridBase *grid)
{
int QRiter = 100*Nm;
int kmin = 1;
int kmax = Nk;
// (this should be more sophisticated)
for(int iter=0; iter<QRiter; ++iter){
// determination of 2x2 leading submatrix
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
// (Dsh: shift)
// transformation
QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
// Convergence criterion (redef of kmin and kamx)
for(int j=kmax-1; j>= kmin; --j){
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
if(fabs(lme[j-1])+dds > dds){
kmax = j+1;
goto continued;
}
}
QRiter = iter;
return;
continued:
for(int j=0; j<kmax-1; ++j){
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
if(fabs(lme[j])+dds > dds){
kmin = j+1;
break;
}
}
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
abort();
}
};
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
abort();
}
};
}
#endif

View File

@ -0,0 +1,352 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
Copyright (C) 2015
Author: Christoph Lehner <clehner@bnl.gov>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LOCAL_COHERENCE_IRL_H
#define GRID_LOCAL_COHERENCE_IRL_H
namespace Grid {
struct LanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
ChebyParams, Cheby,/*Chebyshev*/
int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
int, Nk, /*Vecs in Lanczos seek converge*/
int, Nm, /*Total vecs in Lanczos include restart*/
RealD, resid, /*residual*/
int, MaxIt,
RealD, betastp, /* ? */
int, MinRes); // Must restart
};
struct LocalCoherenceLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
bool, doFine,
bool, doFineRead,
bool, doCoarse,
bool, doCoarseRead,
LanczosParams, FineParams,
LanczosParams, CoarseParams,
ChebyParams, Smoother,
RealD , coarse_relax_tol,
std::vector<int>, blockSize,
std::string, config,
std::vector < std::complex<double> >, omega,
RealD, mass,
RealD, M5);
};
// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) :
_Linop(linop),
_Aggregate(aggregate) { };
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = _Aggregate.FineGrid;
FineField fin(FineGrid);
FineField fout(FineGrid);
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
}
};
template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
OperatorFunction<FineField> & _poly;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,
Aggregation<Fobj,CComplex,nbasis> &aggregate) :
_poly(poly),
_Linop(linop),
_Aggregate(aggregate) { };
void operator()(const CoarseField& in, CoarseField& out) {
GridBase *FineGrid = _Aggregate.FineGrid;
FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard;
FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard;
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
}
};
template<class Fobj,class CComplex,int nbasis>
class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LinearFunction<CoarseField> & _Poly;
OperatorFunction<FineField> & _smoother;
LinearOperatorBase<FineField> &_Linop;
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
RealD _coarse_relax_tol;
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
OperatorFunction<FineField> &smoother,
LinearOperatorBase<FineField> &Linop,
Aggregation<Fobj,CComplex,nbasis> &Aggregate,
RealD coarse_relax_tol=5.0e3)
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { };
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
CoarseField v(B);
RealD eval_poly = eval;
// Apply operator
_Poly(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
eval = vnum/vden;
v -= eval*B;
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
}
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
{
GridBase *FineGrid = _Aggregate.FineGrid;
int checkerboard = _Aggregate.checkerboard;
FineField fB(FineGrid);fB.checkerboard =checkerboard;
FineField fv(FineGrid);fv.checkerboard =checkerboard;
_Aggregate.PromoteFromSubspace(B,fv);
_smoother(_Linop,fv,fB);
RealD eval_poly = eval;
_Linop.HermOp(fB,fv);
RealD vnum = real(innerProduct(fB,fv)); // HermOp.
RealD vden = norm2(fB);
RealD vv0 = norm2(fv);
eval = vnum/vden;
fv -= eval*fB;
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
std::cout.precision(13);
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
if( (vv<eresid*eresid) ) return 1;
return 0;
}
};
////////////////////////////////////////////
// Make serializable Lanczos params
////////////////////////////////////////////
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczos
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<Fobj> FineField;
protected:
GridBase *_CoarseGrid;
GridBase *_FineGrid;
int _checkerboard;
LinearOperatorBase<FineField> & _FineOp;
// FIXME replace Aggregation with vector of fine; the code reuse is too small for
// the hassle and complexity of cross coupling.
Aggregation<Fobj,CComplex,nbasis> _Aggregate;
std::vector<RealD> evals_fine;
std::vector<RealD> evals_coarse;
std::vector<CoarseField> evec_coarse;
public:
LocalCoherenceLanczos(GridBase *FineGrid,
GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard) :
_CoarseGrid(CoarseGrid),
_FineGrid(FineGrid),
_Aggregate(CoarseGrid,FineGrid,checkerboard),
_FineOp(FineOp),
_checkerboard(checkerboard)
{
evals_fine.resize(0);
evals_coarse.resize(0);
};
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); }
template<typename T> static RealD normalise(T& v)
{
RealD nn = norm2(v);
nn = ::sqrt(nn);
v = v * (1.0/nn);
return nn;
}
void fakeFine(void)
{
int Nk = nbasis;
_Aggregate.subspace.resize(Nk,_FineGrid);
_Aggregate.subspace[0]=1.0;
_Aggregate.subspace[0].checkerboard=_checkerboard;
normalise(_Aggregate.subspace[0]);
PlainHermOp<FineField> Op(_FineOp);
for(int k=1;k<Nk;k++){
_Aggregate.subspace[k].checkerboard=_checkerboard;
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]);
normalise(_Aggregate.subspace[k]);
}
}
void testFine(RealD resid)
{
assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis);
PlainHermOp<FineField> Op(_FineOp);
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
for(int k=0;k<nbasis;k++){
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1);
}
}
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
{
assert(evals_fine.size() == nbasis);
assert(_Aggregate.subspace.size() == nbasis);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
for(int k=0;k<evec_coarse.size();k++){
if ( k < nbasis ) {
assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
} else {
assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
}
}
}
void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
assert(nbasis<=Nm);
Chebyshev<FineField> Cheby(cheby_parms);
FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
PlainHermOp<FineField> Op(_FineOp);
evals_fine.resize(Nm);
_Aggregate.subspace.resize(Nm,_FineGrid);
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
int Nconv;
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false);
// Shrink down to number saved
assert(Nstop>=nbasis);
assert(Nconv>=nbasis);
evals_fine.resize(nbasis);
_Aggregate.subspace.resize(nbasis,_FineGrid);
}
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
int Nstop, int Nk, int Nm,RealD resid,
RealD MaxIt, RealD betastp, int MinRes)
{
Chebyshev<FineField> Cheby(cheby_op);
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate);
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate);
//////////////////////////////////////////////////////////////////////////////////////////////////
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid);
CoarseField src(_CoarseGrid); src=1.0;
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
int Nconv=0;
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
assert(Nconv>=Nstop);
evals_coarse.resize(Nstop);
evec_coarse.resize (Nstop,_CoarseGrid);
for (int i=0;i<Nstop;i++){
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
}
}
};
}
#endif

View File

@ -55,7 +55,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*Odd
* i) D_oo psi_o = L^{-1} eta_o
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
*
* Wilson:
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
* Stag:
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
*
* L^-1 eta_o= (1 0 ) (e
* (-MoeMee^{-1} 1 )
*
*Even
* ii) Mee psi_e + Meo psi_o = src_e
*
@ -82,7 +90,7 @@ namespace Grid {
// Take a matrix and form a Red Black solver calling a Herm solver
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Now make the norm reflect extra factor of Mee
template<class Field> class SchurRedBlackStaggeredSolve {
private:
OperatorFunction<Field> & _HermitianRBSolver;
@ -115,26 +123,31 @@ namespace Grid {
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
/////////////////////////////////////////////////////
// src_o = Mdag * (source_o - Moe MeeInv source_e)
// src_o = (source_o - Moe MeeInv source_e)
/////////////////////////////////////////////////////
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); assert(src_o.checkerboard ==Odd);
//src_o = tmp; assert(src_o.checkerboard ==Odd);
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
//////////////////////////////////////////////////////////////
// Call the red-black solver
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
@ -143,15 +156,16 @@ namespace Grid {
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};

View File

@ -3,9 +3,12 @@
namespace Grid {
MemoryStats *MemoryProfiler::stats = nullptr;
bool MemoryProfiler::debug = false;
int PointerCache::victim;
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
void *PointerCache::Insert(void *ptr,size_t bytes) {
@ -94,4 +97,29 @@ void check_huge_pages(void *Buf,uint64_t BYTES)
#endif
}
std::string sizeString(const size_t bytes)
{
constexpr unsigned int bufSize = 256;
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
char buf[256];
size_t s = 0;
double count = bytes;
while (count >= 1024 && s < 7)
{
s++;
count /= 1024;
}
if (count - floor(count) == 0.0)
{
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
}
else
{
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
}
return std::string(buf);
}
}

View File

@ -63,6 +63,64 @@ namespace Grid {
static void *Lookup(size_t bytes) ;
};
std::string sizeString(size_t bytes);
struct MemoryStats
{
size_t totalAllocated{0}, maxAllocated{0},
currentlyAllocated{0}, totalFreed{0};
};
class MemoryProfiler
{
public:
static MemoryStats *stats;
static bool debug;
};
#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")"
#define profilerDebugPrint \
if (MemoryProfiler::stats)\
{\
auto s = MemoryProfiler::stats;\
std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\
std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \
<< std::endl;\
std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \
<< std::endl;\
std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \
<< std::endl;\
std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \
<< std::endl;\
}
#define profilerAllocate(bytes)\
if (MemoryProfiler::stats)\
{\
auto s = MemoryProfiler::stats;\
s->totalAllocated += (bytes);\
s->currentlyAllocated += (bytes);\
s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated);\
}\
if (MemoryProfiler::debug)\
{\
std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl;\
profilerDebugPrint;\
}
#define profilerFree(bytes)\
if (MemoryProfiler::stats)\
{\
auto s = MemoryProfiler::stats;\
s->totalFreed += (bytes);\
s->currentlyAllocated -= (bytes);\
}\
if (MemoryProfiler::debug)\
{\
std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl;\
profilerDebugPrint;\
}
void check_huge_pages(void *Buf,uint64_t BYTES);
@ -92,6 +150,7 @@ public:
pointer allocate(size_type __n, const void* _p= 0)
{
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
// if ( ptr != NULL )
@ -122,6 +181,8 @@ public:
void deallocate(pointer __p, size_type __n) {
size_type bytes = __n * sizeof(_Tp);
profilerFree(bytes);
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
#ifdef HAVE_MM_MALLOC_H
@ -172,10 +233,13 @@ public:
#ifdef GRID_COMMS_SHMEM
pointer allocate(size_type __n, const void* _p= 0)
{
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
#ifdef CRAY
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
_Tp *ptr = (_Tp *) shmem_align(bytes,64);
#else
_Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
_Tp *ptr = (_Tp *) shmem_align(64,bytes);
#endif
#ifdef PARANOID_SYMMETRIC_HEAP
static void * bcast;
@ -193,18 +257,23 @@ public:
#endif
return ptr;
}
void deallocate(pointer __p, size_type) {
void deallocate(pointer __p, size_type __n) {
size_type bytes = __n*sizeof(_Tp);
profilerFree(bytes);
shmem_free((void *)__p);
}
#else
pointer allocate(size_type __n, const void* _p= 0)
{
#ifdef HAVE_MM_MALLOC_H
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),GRID_ALLOC_ALIGN);
#else
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,__n*sizeof(_Tp));
#endif
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
#ifdef HAVE_MM_MALLOC_H
_Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN);
#else
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes);
#endif
uint8_t *cp = (uint8_t *)ptr;
if ( ptr ) {
// One touch per 4k page, static OMP loop to catch same loop order
@ -215,7 +284,10 @@ public:
}
return ptr;
}
void deallocate(pointer __p, size_type) {
void deallocate(pointer __p, size_type __n) {
size_type bytes = __n*sizeof(_Tp);
profilerFree(bytes);
#ifdef HAVE_MM_MALLOC_H
_mm_free((void *)__p);
#else

View File

@ -44,13 +44,21 @@ namespace Grid{
class GridBase : public CartesianCommunicator , public GridThread {
public:
int dummy;
// Give Lattice access
template<class object> friend class Lattice;
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
GridBase(const std::vector<int> & processor_grid,
const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {};
const CartesianCommunicator &parent,
int &split_rank)
: CartesianCommunicator(processor_grid,parent,split_rank) {};
GridBase(const std::vector<int> & processor_grid,
const CartesianCommunicator &parent)
: CartesianCommunicator(processor_grid,parent,dummy) {};
virtual ~GridBase() = default;
// Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.

View File

@ -38,7 +38,7 @@ namespace Grid{
class GridCartesian: public GridBase {
public:
int dummy;
virtual int CheckerBoardFromOindexTable (int Oindex) {
return 0;
}
@ -67,7 +67,14 @@ public:
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const GridCartesian &parent) : GridBase(processor_grid,parent)
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
{
Init(dimensions,simd_layout,processor_grid);
}
GridCartesian(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid,
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
{
Init(dimensions,simd_layout,processor_grid);
}
@ -81,6 +88,8 @@ public:
Init(dimensions,simd_layout,processor_grid);
}
virtual ~GridCartesian() = default;
void Init(const std::vector<int> &dimensions,
const std::vector<int> &simd_layout,
const std::vector<int> &processor_grid)
@ -114,6 +123,7 @@ public:
// Use a reduced simd grid
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
@ -158,6 +168,7 @@ public:
block = block * _rdimensions[d];
}
};
};
}
#endif

View File

@ -133,6 +133,8 @@ public:
{
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ;
}
virtual ~GridRedBlackCartesian() = default;
#if 0
////////////////////////////////////////////////////////////
// Create redblack grid ;; deprecate these. Should not
@ -204,6 +206,7 @@ public:
{
assert((_gdimensions[d] & 0x1) == 0);
_gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard
_gsites /= 2;
}
_ldimensions[d] = _gdimensions[d] / _processors[d];
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);

View File

@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_COMMUNICATOR_H
#define GRID_COMMUNICATOR_H
#include <Grid/communicator/SharedMemory.h>
#include <Grid/communicator/Communicator_base.h>
#endif

View File

@ -36,33 +36,9 @@ namespace Grid {
///////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////
void * CartesianCommunicator::ShmCommBuf;
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
CartesianCommunicator::CommunicatorPolicy_t
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
int CartesianCommunicator::nCommThreads = -1;
int CartesianCommunicator::Hugepages = 0;
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= MAX_MPI_SHM_BYTES) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (MAX_MPI_SHM_BYTES/(1024*1024)) <<std::endl;
assert(heap_bytes<MAX_MPI_SHM_BYTES);
}
return ptr;
}
void CartesianCommunicator::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
/////////////////////////////////
// Grid information queries
@ -95,191 +71,6 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
{
GlobalSumVector((double *)c,2*N);
}
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
{
_ndimension = processors.size();
assert(_ndimension = parent._ndimension);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
int Nparent;
MPI_Comm_size(parent.communicator,&Nparent);
int childsize=1;
for(int d=0;d<processors.size();d++) {
childsize *= processors[d];
}
int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent);
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
std::vector<int> scoor(_ndimension); // coor of split within parent
std::vector<int> ssize(_ndimension); // coor of split within parent
for(int d=0;d<_ndimension;d++){
ccoor[d] = parent._processor_coor[d] % processors[d];
scoor[d] = parent._processor_coor[d] / processors[d];
ssize[d] = parent._processors[d]/ processors[d];
}
int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms
Lexicographic::IndexFromCoor(ccoor,crank,processors);
Lexicographic::IndexFromCoor(scoor,srank,ssize);
MPI_Comm comm_split;
if ( Nchild > 1 ) {
// std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
// std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
// for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
// std::cout<<std::endl;
// std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
// for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
// std::cout<<std::endl;
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
} else {
comm_split=parent.communicator;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Set up from the new split communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Take an MPI_Comm and self assemble
//////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
// if ( communicator_base != communicator_world ) {
// std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
// }
_ndimension = processors.size();
_processor_coor.resize(_ndimension);
/////////////////////////////////
// Count the requested nodes
/////////////////////////////////
_Nprocessors=1;
_processors = processors;
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
std::vector<int> periodic(_ndimension,1);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
int Size;
MPI_Comm_size(communicator,&Size);
#ifdef GRID_COMMS_MPIT
communicator_halo.resize (2*_ndimension);
for(int i=0;i<_ndimension*2;i++){
MPI_Comm_dup(communicator,&communicator_halo[i]);
}
#endif
assert(Size==_Nprocessors);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
InitFromMPICommunicator(processors,communicator_world);
}
#endif
#if !defined( GRID_COMMS_MPI3)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
#endif
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT)
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes, int dir)
{
std::vector<CommsRequest_t> list;
// Discard the "dir"
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
SendToRecvFromComplete(list);
return 2.0*bytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes, int dir)
{
// Discard the "dir"
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
return 2.0*bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{
SendToRecvFromComplete(waitall);
}
#endif
#if !defined( GRID_COMMS_MPI3)
void CartesianCommunicator::StencilBarrier(void){};
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
void *CartesianCommunicator::ShmBuffer(int rank) {
return NULL;
}
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
return NULL;
}
void CartesianCommunicator::ShmInitGeneric(void){
#if 1
int mmap_flag =0;
#ifdef MAP_ANONYMOUS
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
#endif
#ifdef MAP_ANON
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
#endif
#ifdef MAP_HUGETLB
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
#endif
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
if (ShmCommBuf == (void *)MAP_FAILED) {
perror("mmap failed ");
exit(EXIT_FAILURE);
}
#ifdef MADV_HUGEPAGE
if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE);
#endif
#else
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
ShmCommBuf=(void *)&ShmBufStorageVector[0];
#endif
bzero(ShmCommBuf,MAX_MPI_SHM_BYTES);
}
#endif
}

View File

@ -32,117 +32,33 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
///////////////////////////////////
// Processor layout information
///////////////////////////////////
#ifdef GRID_COMMS_MPI
#include <mpi.h>
#endif
#ifdef GRID_COMMS_MPI3
#include <mpi.h>
#endif
#ifdef GRID_COMMS_MPIT
#include <mpi.h>
#endif
#ifdef GRID_COMMS_SHMEM
#include <mpp/shmem.h>
#endif
#include <Grid/communicator/SharedMemory.h>
namespace Grid {
class CartesianCommunicator {
public:
class CartesianCommunicator : public SharedMemory {
public:
////////////////////////////////////////////
// Isend/Irecv/Wait, or Sendrecv blocking
// Policies
////////////////////////////////////////////
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
///////////////////////////////////////////
// Up to 65536 ranks per node adequate for now
// 128MB shared memory for comms enought for 48^4 local vol comms
// Give external control (command line override?) of this
///////////////////////////////////////////
static const int MAXLOG2RANKSPERNODE = 16;
static uint64_t MAX_MPI_SHM_BYTES;
static int nCommThreads;
// use explicit huge pages
static int Hugepages;
////////////////////////////////////////////
// Communicator should know nothing of the physics grid, only processor grid.
////////////////////////////////////////////
int _Nprocessors; // How many in all
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
int _processor; // linear processor rank
std::vector<int> _processor_coor; // linear processor coordinate
unsigned long _ndimension;
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT)
static MPI_Comm communicator_world;
MPI_Comm communicator;
std::vector<MPI_Comm> communicator_halo;
typedef MPI_Request CommsRequest_t;
#else
typedef int CommsRequest_t;
#endif
////////////////////////////////////////////////////////////////////
// Helper functionality for SHM Windows common to all other impls
////////////////////////////////////////////////////////////////////
// Longer term; drop this in favour of a master / slave model with
// cartesian communicator on a subset of ranks, slave ranks controlled
// by group leader with data xfer via shared memory
////////////////////////////////////////////////////////////////////
#ifdef GRID_COMMS_MPI3
static int ShmRank;
static int ShmSize;
static int GroupRank;
static int GroupSize;
static int WorldRank;
static int WorldSize;
std::vector<int> WorldDims;
std::vector<int> GroupDims;
std::vector<int> ShmDims;
std::vector<int> GroupCoor;
std::vector<int> ShmCoor;
std::vector<int> WorldCoor;
static std::vector<int> GroupRanks;
static std::vector<int> MyGroup;
static int ShmSetup;
static MPI_Win ShmWindow;
static MPI_Comm ShmComm;
std::vector<int> LexicographicToWorldRank;
static std::vector<void *> ShmCommBufs;
#else
static void ShmInitGeneric(void);
static commVector<uint8_t> ShmBufStorageVector;
#endif
/////////////////////////////////
// Grid information and queries
// Implemented in Communicator_base.C
/////////////////////////////////
static void * ShmCommBuf;
size_t heap_top;
size_t heap_bytes;
void *ShmBufferSelf(void);
void *ShmBuffer(int rank);
void *ShmBufferTranslate(int rank,void * local_p);
void *ShmBufferMalloc(size_t bytes);
void ShmBufferFreeAll(void) ;
unsigned long _ndimension;
static Grid_MPI_Comm communicator_world;
Grid_MPI_Comm communicator;
std::vector<Grid_MPI_Comm> communicator_halo;
////////////////////////////////////////////////
// Must call in Grid startup
@ -153,18 +69,20 @@ class CartesianCommunicator {
// Constructors to sub-divide a parent communicator
// and default to comm world
////////////////////////////////////////////////
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent);
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank);
CartesianCommunicator(const std::vector<int> &pdimensions_in);
virtual ~CartesianCommunicator();
private:
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
////////////////////////////////////////////////
// Private initialise from an MPI communicator
// Can use after an MPI_Comm_split, but hidden from user so private
////////////////////////////////////////////////
void InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base);
#endif
void InitFromMPICommunicator(const std::vector<int> &processors, Grid_MPI_Comm communicator_base);
public:
////////////////////////////////////////////////////////////////////////////////////////
// Wraps MPI_Cart routines, or implements equivalent on other impls
@ -180,8 +98,6 @@ class CartesianCommunicator {
const std::vector<int> & ThisProcessorCoor(void) ;
const std::vector<int> & ProcessorGrid(void) ;
int ProcessorCount(void) ;
int NodeCount(void) ;
int RankCount(void) ;
////////////////////////////////////////////////////////////////////////////////
// very VERY rarely (Log, serial RNG) we need world without a grid
@ -269,16 +185,16 @@ class CartesianCommunicator {
template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){
assert(dim>=0);
assert(dim<_ndimension);
int numnode = _processors[dim];
// std::cerr << " AllToAll in.size() "<<in.size()<<std::endl;
// std::cerr << " AllToAll out.size() "<<out.size()<<std::endl;
assert(in.size()==out.size());
size_t bytes=(in.size()*sizeof(T))/numnode;
assert((bytes*numnode) == in.size()*sizeof(T));
AllToAll(dim,(void *)&in[0],(void *)&out[0],bytes);
int numnode = _processors[dim];
uint64_t bytes=sizeof(T);
uint64_t words=in.size()/numnode;
assert(numnode * words == in.size());
assert(words < (1ULL<<31));
AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
}
void AllToAll(int dim ,void *in,void *out,int bytes);
void AllToAll(void *in,void *out,int bytes);
void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes);
void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes);
template<class obj> void Broadcast(int root,obj &data)
{

View File

@ -1,228 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/Communicator_mpi.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/ActionCore.h>
#include <mpi.h>
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Comm CartesianCommunicator::communicator_world;
// Should error check all MPI calls.
void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag;
int provided;
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
if ( provided != MPI_THREAD_MULTIPLE ) {
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute;
}
}
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
ShmInitGeneric();
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(double &d)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
assert(ierr==0);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
assert(ierr==0);
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
coor.resize(_ndimension);
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
assert(ierr==0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
MPI_Status stat;
assert(sender != receiver);
int tag = sender;
if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
}
if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
int myrank = _processor;
int ierr;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
MPI_Request xrq;
MPI_Request rrq;
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
}
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
}
}
void CartesianCommunicator::Barrier(void)
{
int ierr = MPI_Barrier(communicator);
assert(ierr==0);
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
int ierr=MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator);
assert(ierr==0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
// Split the communicator
row[dim] = _processors[dim];
CartesianCommunicator Comm(row,*this);
Comm.AllToAll(in,out,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,int bytes)
{
MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator);
}
///////////////////////////////////////////////////////
// Should only be used prior to Grid Init finished.
// Check for this?
///////////////////////////////////////////////////////
int CartesianCommunicator::RankWorld(void){
int r;
MPI_Comm_rank(communicator_world,&r);
return r;
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
int ierr= MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator_world);
assert(ierr==0);
}
}

View File

@ -26,89 +26,20 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
#include <mpi.h>
#include <semaphore.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>
#include <zlib.h>
#ifdef HAVE_NUMAIF_H
#include <numaif.h>
#endif
#include <Grid/communicator/SharedMemory.h>
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
int CartesianCommunicator::ShmSetup = 0;
Grid_MPI_Comm CartesianCommunicator::communicator_world;
int CartesianCommunicator::ShmRank;
int CartesianCommunicator::ShmSize;
int CartesianCommunicator::GroupRank;
int CartesianCommunicator::GroupSize;
int CartesianCommunicator::WorldRank;
int CartesianCommunicator::WorldSize;
MPI_Comm CartesianCommunicator::communicator_world;
MPI_Comm CartesianCommunicator::ShmComm;
MPI_Win CartesianCommunicator::ShmWindow;
std::vector<int> CartesianCommunicator::GroupRanks;
std::vector<int> CartesianCommunicator::MyGroup;
std::vector<void *> CartesianCommunicator::ShmCommBufs;
int CartesianCommunicator::NodeCount(void) { return GroupSize;};
int CartesianCommunicator::RankCount(void) { return WorldSize;};
#undef FORCE_COMMS
void *CartesianCommunicator::ShmBufferSelf(void)
////////////////////////////////////////////
// First initialise of comms system
////////////////////////////////////////////
void CartesianCommunicator::Init(int *argc, char ***argv)
{
return ShmCommBufs[ShmRank];
}
void *CartesianCommunicator::ShmBuffer(int rank)
{
int gpeer = GroupRanks[rank];
#ifdef FORCE_COMMS
return NULL;
#endif
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
return ShmCommBufs[gpeer];
}
}
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
{
static int count =0;
int gpeer = GroupRanks[rank];
assert(gpeer!=ShmRank); // never send to self
assert(rank!=WorldRank);// never send to self
#ifdef FORCE_COMMS
return NULL;
#endif
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
return (void *) remote;
}
}
void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag;
int provided;
// mtrace();
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
@ -119,483 +50,213 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
Grid_quiesce_nodes();
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
MPI_Comm_rank(communicator_world,&WorldRank);
MPI_Comm_size(communicator_world,&WorldSize);
if ( WorldRank == 0 ) {
std::cout << GridLogMessage<< "Initialising MPI "<< WorldRank <<"/"<<WorldSize <<std::endl;
}
/////////////////////////////////////////////////////////////////////
// Split into groups that can share memory
/////////////////////////////////////////////////////////////////////
MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
MPI_Comm_rank(ShmComm ,&ShmRank);
MPI_Comm_size(ShmComm ,&ShmSize);
GroupSize = WorldSize/ShmSize;
/////////////////////////////////////////////////////////////////////
// find world ranks in our SHM group (i.e. which ranks are on our node)
/////////////////////////////////////////////////////////////////////
MPI_Group WorldGroup, ShmGroup;
MPI_Comm_group (communicator_world, &WorldGroup);
MPI_Comm_group (ShmComm, &ShmGroup);
std::vector<int> world_ranks(WorldSize);
GroupRanks.resize(WorldSize);
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &GroupRanks[0]);
///////////////////////////////////////////////////////////////////
// Identify who is in my group and noninate the leader
///////////////////////////////////////////////////////////////////
int g=0;
MyGroup.resize(ShmSize);
for(int rank=0;rank<WorldSize;rank++){
if(GroupRanks[rank]!=MPI_UNDEFINED){
assert(g<ShmSize);
MyGroup[g++] = rank;
}
}
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
int myleader = MyGroup[0];
std::vector<int> leaders_1hot(WorldSize,0);
std::vector<int> leaders_group(GroupSize,0);
leaders_1hot [ myleader ] = 1;
///////////////////////////////////////////////////////////////////
// global sum leaders over comm world
///////////////////////////////////////////////////////////////////
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
assert(ierr==0);
///////////////////////////////////////////////////////////////////
// find the group leaders world rank
///////////////////////////////////////////////////////////////////
int group=0;
for(int l=0;l<WorldSize;l++){
if(leaders_1hot[l]){
leaders_group[group++] = l;
}
}
///////////////////////////////////////////////////////////////////
// Identify the rank of the group in which I (and my leader) live
///////////////////////////////////////////////////////////////////
GroupRank=-1;
for(int g=0;g<GroupSize;g++){
if (myleader == leaders_group[g]){
GroupRank=g;
}
}
assert(GroupRank!=-1);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared window for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(ShmComm);
ShmCommBuf = 0;
ShmCommBufs.resize(ShmSize);
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMMMAP
char shm_name [NAME_MAX];
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",GroupRank,r);
//sprintf(shm_name,"/var/lib/hugetlbfs/group/wheel/pagesize-2MB/" "Grid_mpi3_shm_%d_%d",GroupRank,r);
// printf("Opening file %s \n",shm_name);
int fd=open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd == -1) {
printf("open %s failed\n",shm_name);
perror("open hugetlbfs");
exit(0);
}
int mmap_flag = MAP_SHARED ;
#ifdef MAP_POPULATE
mmap_flag|=MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
#endif
void *ptr = (void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
if ( ptr == (void *)MAP_FAILED ) {
printf("mmap %s failed\n",shm_name);
perror("failed mmap"); assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
ShmCommBufs[r] =ptr;
}
#endif
////////////////////////////////////////////////////////////////////////////////////////////
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for
// the posix shm virtual file system
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMOPEN
char shm_name [NAME_MAX];
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r);
shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size);
int mmap_flag = MAP_SHARED;
#ifdef MAP_POPULATE
mmap_flag |= MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if (Hugepages) mmap_flag |= MAP_HUGETLB;
#endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
// Experiments; Experiments; Try to force numa domain on the shm segment if we have numaif.h
#if 0
//#ifdef HAVE_NUMAIF_H
int status;
int flags=MPOL_MF_MOVE;
#ifdef KNL
int nodes=1; // numa domain == MCDRAM
// Find out if in SNC2,SNC4 mode ?
#else
int nodes=r; // numa domain == MPI ID
#endif
unsigned long count=1;
for(uint64_t page=0;page<size;page+=4096){
void *pages = (void *) ( page + (uint64_t)ptr );
uint64_t *cow_it = (uint64_t *)pages; *cow_it = 1;
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
if (ierr && (page==0)) perror("numa relocate command failed");
}
#endif
ShmCommBufs[r] =ptr;
}
}
MPI_Barrier(ShmComm);
if ( ShmRank != 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES ;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r);
int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
ShmCommBufs[r] =ptr;
}
}
#endif
////////////////////////////////////////////////////////////////////////////////////////////
// SHMGET SHMAT and SHM_HUGETLB flag
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMGET
std::vector<int> shmids(ShmSize);
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
key_t key = IPC_PRIVATE;
int flags = IPC_CREAT | SHM_R | SHM_W;
#ifdef SHM_HUGETLB
if (Hugepages) flags|=SHM_HUGETLB;
#endif
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
int errsv = errno;
printf("Errno %d\n",errsv);
printf("key %d\n",key);
printf("size %lld\n",size);
printf("flags %d\n",flags);
perror("shmget");
exit(1);
} else {
printf("shmid: 0x%x\n", shmids[r]);
}
}
}
MPI_Barrier(ShmComm);
MPI_Bcast(&shmids[0],ShmSize*sizeof(int),MPI_BYTE,0,ShmComm);
MPI_Barrier(ShmComm);
for(int r=0;r<ShmSize;r++){
ShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
if (ShmCommBufs[r] == (uint64_t *)-1) {
perror("Shared memory attach failure");
shmctl(shmids[r], IPC_RMID, NULL);
exit(2);
}
printf("shmaddr: %p\n", ShmCommBufs[r]);
}
MPI_Barrier(ShmComm);
// Mark for clean up
for(int r=0;r<ShmSize;r++){
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
}
MPI_Barrier(ShmComm);
#endif
ShmCommBuf = ShmCommBufs[ShmRank];
MPI_Barrier(ShmComm);
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
check[0] = GroupRank;
check[1] = r;
check[2] = 0x5A5A5A;
}
}
MPI_Barrier(ShmComm);
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
assert(check[0]==GroupRank);
assert(check[1]==r);
assert(check[2]==0x5A5A5A);
}
MPI_Barrier(ShmComm);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Verbose for now
//////////////////////////////////////////////////////////////////////////////////////////////////////////
if (WorldRank == 0){
std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
std::cout<< WorldSize << " Ranks " ;
std::cout<< GroupSize << " Nodes " ;
std::cout<< " with "<< ShmSize << " ranks-per-node "<<std::endl;
std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size ";
std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
for(int g=0;g<GroupSize;g++){
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<<leaders_group[g]<<std::endl;
}
std::cout<<GridLogMessage<<" Boss Node Shm Pointers are {";
for(int g=0;g<ShmSize;g++){
std::cout<<std::hex<<ShmCommBufs[g]<<std::dec;
if(g!=ShmSize-1) std::cout<<",";
else std::cout<<"}"<<std::endl;
}
}
for(int g=0;g<GroupSize;g++){
if ( (ShmRank == 0) && (GroupRank==g) ) std::cout<<GridLogMessage<<"["<<g<<"] Node Group "<<g<<" is ranks {";
for(int r=0;r<ShmSize;r++){
if ( (ShmRank == 0) && (GroupRank==g) ) {
std::cout<<MyGroup[r];
if(r<ShmSize-1) std::cout<<",";
else std::cout<<"}"<<std::endl<<std::flush;
}
MPI_Barrier(communicator_world);
}
}
assert(ShmSetup==0); ShmSetup=1;
GlobalSharedMemory::Init(communicator_world);
GlobalSharedMemory::SharedMemoryAllocate(
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
GlobalSharedMemory::Hugepages);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Want to implement some magic ... Group sub-cubes into those on same node
////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &dest,int &source)
///////////////////////////////////////////////////////////////////////////
// Use cartesian communicators now even in MPI3
///////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
std::vector<int> coor = _processor_coor; // my coord
assert(std::abs(shift) <_processors[dim]);
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,source,_processors);
source = LexicographicToWorldRank[source];
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,dest,_processors);
dest = LexicographicToWorldRank[dest];
}// rank is world rank.
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
assert(ierr==0);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
Lexicographic::IndexFromCoor(coor,rank,_processors);
rank = LexicographicToWorldRank[rank];
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
assert(ierr==0);
return rank;
}// rank is world rank
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
int lr=-1;
for(int r=0;r<WorldSize;r++){// map world Rank to lexico and then to coor
if( LexicographicToWorldRank[r]==rank) lr = r;
}
assert(lr!=-1);
Lexicographic::CoorFromIndex(coor,lr,_processors);
coor.resize(_ndimension);
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
assert(ierr==0);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// Initialises from communicator_world
////////////////////////////////////////////////////////////////////////////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
MPI_Comm optimal_comm;
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine
InitFromMPICommunicator(processors,optimal_comm);
SetCommunicator(optimal_comm);
}
//////////////////////////////////
// Try to subdivide communicator
//////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors)
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
{
std::cout << "Attempts to split MPI3 communicators will fail until implemented" <<std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
int ierr;
communicator=communicator_world;
_ndimension = processors.size();
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
std::vector<int> parent_processor_coor(_ndimension,0);
std::vector<int> parent_processors (_ndimension,1);
// Can make 5d grid from 4d etc...
int pad = _ndimension-parent_ndimension;
for(int d=0;d<parent_ndimension;d++){
parent_processor_coor[pad+d]=parent._processor_coor[d];
parent_processors [pad+d]=parent._processors[d];
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
// int Nparent = parent._processors ;
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
int Nparent;
MPI_Comm_size(parent.communicator,&Nparent);
// std::cout << " Parent size "<<Nparent <<std::endl;
int childsize=1;
for(int d=0;d<processors.size();d++) {
childsize *= processors[d];
}
int Nchild = Nparent/childsize;
assert (childsize * Nchild == Nparent);
// std::cout << " child size "<<childsize <<std::endl;
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
std::vector<int> scoor(_ndimension); // coor of split within parent
std::vector<int> ssize(_ndimension); // coor of split within parent
for(int d=0;d<_ndimension;d++){
ccoor[d] = parent_processor_coor[d] % processors[d];
scoor[d] = parent_processor_coor[d] / processors[d];
ssize[d] = parent_processors[d] / processors[d];
}
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
int crank;
// Mpi uses the reverse Lexico convention to us; so reversed routines called
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids
MPI_Comm comm_split;
if ( Nchild > 1 ) {
if(0){
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
std::cout<<std::endl;
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
std::cout << " Split communicator " <<comm_split <<std::endl;
}
////////////////////////////////////////////////////////////////
// Split the communicator
////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0);
} else {
srank = 0;
comm_split = parent.communicator;
// std::cout << " Inherited communicator " <<comm_split <<std::endl;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Set up from the new split communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Take the right SHM buffers
//////////////////////////////////////////////////////////////////////////////////////////////////////
SetCommunicator(comm_split);
if(0){
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
for(int d=0;d<processors.size();d++){
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
}
}
for(int d=0;d<processors.size();d++){
assert(_processor_coor[d] == ccoor[d] );
}
}
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
_ndimension = processors.size();
_processor_coor.resize(_ndimension);
/////////////////////////////////
// Count the requested nodes
/////////////////////////////////
_Nprocessors=1;
_processors = processors;
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
std::vector<int> periodic(_ndimension,1);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
if ( 0 && (communicator_base != communicator_world) ) {
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
for(int d=0;d<_processors.size();d++){
std::cout << _processor_coor[d]<<" ";
}
std::cout << std::endl;
}
int Size;
MPI_Comm_size(communicator,&Size);
communicator_halo.resize (2*_ndimension);
for(int i=0;i<_ndimension*2;i++){
MPI_Comm_dup(communicator,&communicator_halo[i]);
}
assert(Size==_Nprocessors);
}
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = -1;
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
if ( (0x1<<i) == ShmSize ) {
log2size = i;
break;
CartesianCommunicator::~CartesianCommunicator()
{
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised) {
MPI_Comm_free(&communicator);
for(int i=0;i<communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]);
}
}
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
std::vector<int> WorldDims = processors;
ShmDims.resize (_ndimension,1);
GroupDims.resize(_ndimension);
ShmCoor.resize (_ndimension);
GroupCoor.resize(_ndimension);
WorldCoor.resize(_ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%_ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%_ndimension;
}
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<_ndimension;d++){
GroupDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Verbose
////////////////////////////////////////////////////////////////
#if 0
std::cout<< GridLogMessage << "MPI-3 usage "<<std::endl;
std::cout<< GridLogMessage << "SHM ";
for(int d=0;d<_ndimension;d++){
std::cout<< ShmDims[d] <<" ";
}
std::cout<< std::endl;
std::cout<< GridLogMessage << "Group ";
for(int d=0;d<_ndimension;d++){
std::cout<< GroupDims[d] <<" ";
}
std::cout<< std::endl;
std::cout<< GridLogMessage<<"World ";
for(int d=0;d<_ndimension;d++){
std::cout<< WorldDims[d] <<" ";
}
std::cout<< std::endl;
#endif
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
_Nprocessors=1;
_processors = processors;
_processor_coor.resize(_ndimension);
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
assert(WorldSize==_Nprocessors);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
////////////////////////////////////////////////////////////////
Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
for(int d=0;d<_ndimension;d++){
WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
}
_processor_coor = WorldCoor;
_processor = WorldRank;
///////////////////////////////////////////////////////////////////
// global sum Lexico to World mapping
///////////////////////////////////////////////////////////////////
int lexico;
LexicographicToWorldRank.resize(WorldSize,0);
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
LexicographicToWorldRank[lexico] = WorldRank;
ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
assert(ierr==0);
for(int i=0;i<WorldSize;i++){
int wr = LexicographicToWorldRank[i];
// int wr = i;
std::vector<int> coor(_ndimension);
ProcessorCoorFromRank(wr,coor); // from world rank
int ck = RankFromProcessorCoor(coor);
assert(ck==wr);
if ( wr == WorldRank ) {
for(int j=0;j<coor.size();j++) {
assert(coor[j] == _processor_coor[j]);
}
}
/*
std::cout << GridLogMessage<< " Lexicographic "<<i;
std::cout << " MPI rank "<<wr;
std::cout << " Coor ";
for(int j=0;j<coor.size();j++) std::cout << coor[j];
std::cout<< std::endl;
*/
/////////////////////////////////////////////////////
// Check everyone agrees on everyone elses coords
/////////////////////////////////////////////////////
std::vector<int> mcoor = coor;
this->Broadcast(0,(void *)&mcoor[0],mcoor.size()*sizeof(int));
for(int d = 0 ; d< _ndimension; d++) {
assert(coor[d] == mcoor[d]);
}
}
};
}
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
@ -712,34 +373,31 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
int from,
int bytes,int dir)
{
assert(dir < communicator_halo.size());
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
MPI_Request xrq;
MPI_Request rrq;
int ierr;
int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor];
int gdest = ShmRanks[dest];
int gfrom = ShmRanks[from];
int gme = ShmRanks[_processor];
assert(dest != _processor);
assert(from != _processor);
assert(gme == ShmRank);
double off_node_bytes=0.0;
#ifdef FORCE_COMMS
gdest = MPI_UNDEFINED;
gfrom = MPI_UNDEFINED;
#endif
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq);
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq);
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
assert(ierr==0);
list.push_back(xrq);
off_node_bytes+=bytes;
@ -799,5 +457,38 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
assert(ierr==0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
// Split the communicator
row[dim] = _processors[dim];
int me;
CartesianCommunicator Comm(row,*this,me);
Comm.AllToAll(in,out,words,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
// MPI is a pain and uses "int" arguments
// 64*64*64*128*16 == 500Million elements of data.
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
// (Turns up on 32^3 x 64 Gparity too)
MPI_Datatype object;
int iwords;
int ibytes;
iwords = words;
ibytes = bytes;
assert(words == iwords); // safe to cast to int ?
assert(bytes == ibytes); // safe to cast to int ?
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
MPI_Type_commit(&object);
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
MPI_Type_free(&object);
}
}

View File

@ -1,988 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/Communicator_mpi.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include "Grid.h"
#include <mpi.h>
//#include <numaif.h>
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Workarounds:
/// i) bloody mac os doesn't implement unnamed semaphores since it is "optional" posix.
/// darwin dispatch semaphores don't seem to be multiprocess.
///
/// ii) openmpi under --mca shmem posix works with two squadrons per node;
/// openmpi under default mca settings (I think --mca shmem mmap) on MacOS makes two squadrons map the SAME
/// memory as each other, despite their living on different communicators. This appears to be a bug in OpenMPI.
///
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#include <semaphore.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
typedef sem_t *Grid_semaphore;
#error /*THis is deprecated*/
#if 0
#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED );
#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED );
#define SEM_POST(S) assert ( sem_post(S) == 0 );
#define SEM_WAIT(S) assert ( sem_wait(S) == 0 );
#else
#define SEM_INIT(S) ;
#define SEM_INIT_EXCL(S) ;
#define SEM_POST(S) ;
#define SEM_WAIT(S) ;
#endif
#include <sys/mman.h>
namespace Grid {
enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL, COMMAND_SENDRECV };
struct Descriptor {
uint64_t buf;
size_t bytes;
int rank;
int tag;
int command;
uint64_t xbuf;
uint64_t rbuf;
int xtag;
int rtag;
int src;
int dest;
MPI_Request request;
};
const int pool = 48;
class SlaveState {
public:
volatile int head;
volatile int start;
volatile int tail;
volatile Descriptor Descrs[pool];
};
class Slave {
public:
Grid_semaphore sem_head;
Grid_semaphore sem_tail;
SlaveState *state;
MPI_Comm squadron;
uint64_t base;
int universe_rank;
int vertical_rank;
char sem_name [NAME_MAX];
////////////////////////////////////////////////////////////
// Descriptor circular pointers
////////////////////////////////////////////////////////////
Slave() {};
void Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank);
void SemInit(void) {
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
SEM_INIT(sem_head);
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
SEM_INIT(sem_tail);
}
void SemInitExcl(void) {
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
SEM_INIT_EXCL(sem_head);
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
SEM_INIT_EXCL(sem_tail);
}
void WakeUpDMA(void) {
SEM_POST(sem_head);
};
void WakeUpCompute(void) {
SEM_POST(sem_tail);
};
void WaitForCommand(void) {
SEM_WAIT(sem_head);
};
void WaitForComplete(void) {
SEM_WAIT(sem_tail);
};
void EventLoop (void) {
// std::cout<< " Entering event loop "<<std::endl;
while(1){
WaitForCommand();
// std::cout << "Getting command "<<std::endl;
#if 0
_mm_monitor((void *)&state->head,0,0);
int s=state->start;
if ( s != state->head ) {
_mm_mwait(0,0);
}
#endif
Event();
}
}
int Event (void) ;
uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ;
void QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) ;
void WaitAll() {
// std::cout << "Queueing WAIT command "<<std::endl;
QueueCommand(COMMAND_WAITALL,0,0,0,squadron,0);
// std::cout << "Waking up DMA "<<std::endl;
WakeUpDMA();
// std::cout << "Waiting from semaphore "<<std::endl;
WaitForComplete();
// std::cout << "Checking FIFO is empty "<<std::endl;
while ( state->tail != state->head );
}
};
////////////////////////////////////////////////////////////////////////
// One instance of a data mover.
// Master and Slave must agree on location in shared memory
////////////////////////////////////////////////////////////////////////
class MPIoffloadEngine {
public:
static std::vector<Slave> Slaves;
static int ShmSetup;
static int UniverseRank;
static int UniverseSize;
static MPI_Comm communicator_universe;
static MPI_Comm communicator_cached;
static MPI_Comm HorizontalComm;
static int HorizontalRank;
static int HorizontalSize;
static MPI_Comm VerticalComm;
static MPI_Win VerticalWindow;
static int VerticalSize;
static int VerticalRank;
static std::vector<void *> VerticalShmBufs;
static std::vector<std::vector<int> > UniverseRanks;
static std::vector<int> UserCommunicatorToWorldRanks;
static MPI_Group WorldGroup, CachedGroup;
static void CommunicatorInit (MPI_Comm &communicator_world,
MPI_Comm &ShmComm,
void * &ShmCommBuf);
static void MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int commrank);
/////////////////////////////////////////////////////////
// routines for master proc must handle any communicator
/////////////////////////////////////////////////////////
static void QueueSend(int slave,void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
// std::cout<< " Queueing send "<< bytes<< " slave "<< slave << " to comm "<<rank <<std::endl;
Slaves[slave].QueueCommand(COMMAND_ISEND,buf,bytes,tag,comm,rank);
// std::cout << "Queued send command to rank "<< rank<< " via "<<slave <<std::endl;
Slaves[slave].WakeUpDMA();
// std::cout << "Waking up DMA "<< slave<<std::endl;
};
static void QueueSendRecv(int slave,void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src)
{
Slaves[slave].QueueSendRecv(xbuf,rbuf,bytes,xtag,rtag,comm,dest,src);
Slaves[slave].WakeUpDMA();
}
static void QueueRecv(int slave, void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
// std::cout<< " Queueing recv "<< bytes<< " slave "<< slave << " from comm "<<rank <<std::endl;
Slaves[slave].QueueCommand(COMMAND_IRECV,buf,bytes,tag,comm,rank);
// std::cout << "Queued recv command from rank "<< rank<< " via "<<slave <<std::endl;
Slaves[slave].WakeUpDMA();
// std::cout << "Waking up DMA "<< slave<<std::endl;
};
static void WaitAll() {
for(int s=1;s<VerticalSize;s++) {
// std::cout << "Waiting for slave "<< s<<std::endl;
Slaves[s].WaitAll();
}
// std::cout << " Wait all Complete "<<std::endl;
};
static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){
int basework = nwork/units;
int backfill = units-(nwork%units);
if ( me >= units ) {
mywork = myoff = 0;
} else {
mywork = (nwork+me)/units;
myoff = basework * me;
if ( me > backfill )
myoff+= (me-backfill);
}
return;
};
static void QueueRoundRobinSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) {
uint8_t * cxbuf = (uint8_t *) xbuf;
uint8_t * crbuf = (uint8_t *) rbuf;
static int rrp=0;
int procs = VerticalSize-1;
int myoff=0;
int mywork=bytes;
QueueSendRecv(rrp+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src);
rrp = rrp+1;
if ( rrp == (VerticalSize-1) ) rrp = 0;
}
static void QueueMultiplexedSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) {
uint8_t * cxbuf = (uint8_t *) xbuf;
uint8_t * crbuf = (uint8_t *) rbuf;
int mywork, myoff, procs;
procs = VerticalSize-1;
for(int s=0;s<procs;s++) {
GetWork(bytes,s,mywork,myoff,procs);
QueueSendRecv(s+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src);
}
};
static void QueueMultiplexedSend(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
uint8_t * cbuf = (uint8_t *) buf;
int mywork, myoff, procs;
procs = VerticalSize-1;
for(int s=0;s<procs;s++) {
GetWork(bytes,s,mywork,myoff,procs);
QueueSend(s+1,&cbuf[myoff],mywork,tag,comm,rank);
}
};
static void QueueMultiplexedRecv(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
uint8_t * cbuf = (uint8_t *) buf;
int mywork, myoff, procs;
procs = VerticalSize-1;
for(int s=0;s<procs;s++) {
GetWork(bytes,s,mywork,myoff,procs);
QueueRecv(s+1,&cbuf[myoff],mywork,tag,comm,rank);
}
};
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
std::vector<Slave> MPIoffloadEngine::Slaves;
int MPIoffloadEngine::UniverseRank;
int MPIoffloadEngine::UniverseSize;
MPI_Comm MPIoffloadEngine::communicator_universe;
MPI_Comm MPIoffloadEngine::communicator_cached;
MPI_Group MPIoffloadEngine::WorldGroup;
MPI_Group MPIoffloadEngine::CachedGroup;
MPI_Comm MPIoffloadEngine::HorizontalComm;
int MPIoffloadEngine::HorizontalRank;
int MPIoffloadEngine::HorizontalSize;
MPI_Comm MPIoffloadEngine::VerticalComm;
int MPIoffloadEngine::VerticalSize;
int MPIoffloadEngine::VerticalRank;
MPI_Win MPIoffloadEngine::VerticalWindow;
std::vector<void *> MPIoffloadEngine::VerticalShmBufs;
std::vector<std::vector<int> > MPIoffloadEngine::UniverseRanks;
std::vector<int> MPIoffloadEngine::UserCommunicatorToWorldRanks;
int CartesianCommunicator::NodeCount(void) { return HorizontalSize;};
int MPIoffloadEngine::ShmSetup = 0;
void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
MPI_Comm &ShmComm,
void * &ShmCommBuf)
{
int flag;
assert(ShmSetup==0);
//////////////////////////////////////////////////////////////////////
// Universe is all nodes prior to squadron grouping
//////////////////////////////////////////////////////////////////////
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_universe);
MPI_Comm_rank(communicator_universe,&UniverseRank);
MPI_Comm_size(communicator_universe,&UniverseSize);
/////////////////////////////////////////////////////////////////////
// Split into groups that can share memory (Verticals)
/////////////////////////////////////////////////////////////////////
#undef MPI_SHARED_MEM_DEBUG
#ifdef MPI_SHARED_MEM_DEBUG
MPI_Comm_split(communicator_universe,(UniverseRank/4),UniverseRank,&VerticalComm);
#else
MPI_Comm_split_type(communicator_universe, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&VerticalComm);
#endif
MPI_Comm_rank(VerticalComm ,&VerticalRank);
MPI_Comm_size(VerticalComm ,&VerticalSize);
//////////////////////////////////////////////////////////////////////
// Split into horizontal groups by rank in squadron
//////////////////////////////////////////////////////////////////////
MPI_Comm_split(communicator_universe,VerticalRank,UniverseRank,&HorizontalComm);
MPI_Comm_rank(HorizontalComm,&HorizontalRank);
MPI_Comm_size(HorizontalComm,&HorizontalSize);
assert(HorizontalSize*VerticalSize==UniverseSize);
////////////////////////////////////////////////////////////////////////////////
// What is my place in the world
////////////////////////////////////////////////////////////////////////////////
int WorldRank=0;
if(VerticalRank==0) WorldRank = HorizontalRank;
int ierr=MPI_Allreduce(MPI_IN_PLACE,&WorldRank,1,MPI_INT,MPI_SUM,VerticalComm);
assert(ierr==0);
////////////////////////////////////////////////////////////////////////////////
// Where is the world in the universe?
////////////////////////////////////////////////////////////////////////////////
UniverseRanks = std::vector<std::vector<int> >(HorizontalSize,std::vector<int>(VerticalSize,0));
UniverseRanks[WorldRank][VerticalRank] = UniverseRank;
for(int w=0;w<HorizontalSize;w++){
ierr=MPI_Allreduce(MPI_IN_PLACE,&UniverseRanks[w][0],VerticalSize,MPI_INT,MPI_SUM,communicator_universe);
assert(ierr==0);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared window for our group, pass back Shm info to CartesianCommunicator
//////////////////////////////////////////////////////////////////////////////////////////////////////////
VerticalShmBufs.resize(VerticalSize);
#undef MPI_SHARED_MEM
#ifdef MPI_SHARED_MEM
ierr = MPI_Win_allocate_shared(CartesianCommunicator::MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,VerticalComm,&ShmCommBuf,&VerticalWindow);
ierr|= MPI_Win_lock_all (MPI_MODE_NOCHECK, VerticalWindow);
assert(ierr==0);
// std::cout<<"SHM "<<ShmCommBuf<<std::endl;
for(int r=0;r<VerticalSize;r++){
MPI_Aint sz;
int dsp_unit;
MPI_Win_shared_query (VerticalWindow, r, &sz, &dsp_unit, &VerticalShmBufs[r]);
// std::cout<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
}
#else
char shm_name [NAME_MAX];
MPI_Barrier(VerticalComm);
if ( VerticalRank == 0 ) {
for(int r=0;r<VerticalSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
if ( r>0 ) size = sizeof(SlaveState);
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r);
shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600);
if ( fd < 0 ) {
perror("failed shm_open");
assert(0);
}
ftruncate(fd, size);
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( VerticalShmBufs[r] == MAP_FAILED ) {
perror("failed mmap");
assert(0);
}
/*
for(uint64_t page=0;page<size;page+=4096){
void *pages = (void *) ( page + (uint64_t)VerticalShmBufs[r] );
int status;
int flags=MPOL_MF_MOVE_ALL;
int nodes=1; // numa domain == MCDRAM
unsigned long count=1;
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
if (ierr && (page==0)) perror("numa relocate command failed");
}
*/
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
check[0] = WorldRank;
check[1] = r;
// std::cout<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
}
}
MPI_Barrier(VerticalComm);
if ( VerticalRank != 0 ) {
for(int r=0;r<VerticalSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES ;
if ( r>0 ) size = sizeof(SlaveState);
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600);
if ( fd<0 ) {
perror("failed shm_open");
assert(0);
}
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
assert(check[0]== WorldRank);
assert(check[1]== r);
// std::cerr<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
}
}
#endif
MPI_Barrier(VerticalComm);
//////////////////////////////////////////////////////////////////////
// Map rank of leader on node in their in new world, to the
// rank in this vertical plane's horizontal communicator
//////////////////////////////////////////////////////////////////////
communicator_world = HorizontalComm;
ShmComm = VerticalComm;
ShmCommBuf = VerticalShmBufs[0];
MPI_Comm_group (communicator_world, &WorldGroup);
///////////////////////////////////////////////////////////
// Start the slave data movers
///////////////////////////////////////////////////////////
if ( VerticalRank != 0 ) {
Slave indentured;
indentured.Init( (SlaveState *) VerticalShmBufs[VerticalRank], VerticalComm, UniverseRank,VerticalRank);
indentured.SemInitExcl();// init semaphore in shared memory
MPI_Barrier(VerticalComm);
MPI_Barrier(VerticalComm);
indentured.EventLoop();
assert(0);
} else {
Slaves.resize(VerticalSize);
for(int i=1;i<VerticalSize;i++){
Slaves[i].Init((SlaveState *)VerticalShmBufs[i],VerticalComm, UniverseRanks[HorizontalRank][i],i);
}
MPI_Barrier(VerticalComm);
for(int i=1;i<VerticalSize;i++){
Slaves[i].SemInit();// init semaphore in shared memory
}
MPI_Barrier(VerticalComm);
}
///////////////////////////////////////////////////////////
// Verbose for now
///////////////////////////////////////////////////////////
ShmSetup=1;
if (UniverseRank == 0){
std::cout<<GridLogMessage << "Grid MPI-3 configuration: detected ";
std::cout<<UniverseSize << " Ranks " ;
std::cout<<HorizontalSize << " Nodes " ;
std::cout<<VerticalSize << " with ranks-per-node "<<std::endl;
std::cout<<GridLogMessage << "Grid MPI-3 configuration: using one lead process per node " << std::endl;
std::cout<<GridLogMessage << "Grid MPI-3 configuration: reduced communicator has size " << HorizontalSize << std::endl;
for(int g=0;g<HorizontalSize;g++){
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<< UniverseRanks[g][0]<<std::endl;
}
for(int g=0;g<HorizontalSize;g++){
std::cout<<GridLogMessage<<" { ";
for(int s=0;s<VerticalSize;s++){
std::cout<< UniverseRanks[g][s];
if ( s<VerticalSize-1 ) {
std::cout<<",";
}
}
std::cout<<" } "<<std::endl;
}
}
};
///////////////////////////////////////////////////////////////////////////////////////////////
// Map the communicator into communicator_world, and find the neighbour.
// Cache the mappings; cache size is 1.
///////////////////////////////////////////////////////////////////////////////////////////////
void MPIoffloadEngine::MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int rank) {
if ( comm == HorizontalComm ) {
comm_world_peer = rank;
// std::cout << " MapCommRankToWorldRank horiz " <<rank<<"->"<<comm_world_peer<<std::endl;
} else if ( comm == communicator_cached ) {
comm_world_peer = UserCommunicatorToWorldRanks[rank];
// std::cout << " MapCommRankToWorldRank cached " <<rank<<"->"<<comm_world_peer<<std::endl;
} else {
int size;
MPI_Comm_size(comm,&size);
UserCommunicatorToWorldRanks.resize(size);
std::vector<int> cached_ranks(size);
for(int r=0;r<size;r++) {
cached_ranks[r]=r;
}
communicator_cached=comm;
MPI_Comm_group(communicator_cached, &CachedGroup);
MPI_Group_translate_ranks(CachedGroup,size,&cached_ranks[0],WorldGroup, &UserCommunicatorToWorldRanks[0]);
comm_world_peer = UserCommunicatorToWorldRanks[rank];
// std::cout << " MapCommRankToWorldRank cache miss " <<rank<<"->"<<comm_world_peer<<std::endl;
assert(comm_world_peer != MPI_UNDEFINED);
}
assert( (tag & (~0xFFFFL)) ==0);
uint64_t icomm = (uint64_t)comm;
int comm_hash = ((icomm>>0 )&0xFFFF)^((icomm>>16)&0xFFFF)
^ ((icomm>>32)&0xFFFF)^((icomm>>48)&0xFFFF);
// hashtag = (comm_hash<<15) | tag;
hashtag = tag;
};
void Slave::Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank)
{
squadron=_squadron;
universe_rank=_universe_rank;
vertical_rank=_vertical_rank;
state =_state;
// std::cout << "state "<<_state<<" comm "<<_squadron<<" universe_rank"<<universe_rank <<std::endl;
state->head = state->tail = state->start = 0;
base = (uint64_t)MPIoffloadEngine::VerticalShmBufs[0];
int rank; MPI_Comm_rank(_squadron,&rank);
}
#define PERI_PLUS(A) ( (A+1)%pool )
int Slave::Event (void) {
static int tail_last;
static int head_last;
static int start_last;
int ierr;
MPI_Status stat;
static int i=0;
////////////////////////////////////////////////////
// Try to advance the start pointers
////////////////////////////////////////////////////
int s=state->start;
if ( s != state->head ) {
switch ( state->Descrs[s].command ) {
case COMMAND_ISEND:
ierr = MPI_Isend((void *)(state->Descrs[s].buf+base),
state->Descrs[s].bytes,
MPI_CHAR,
state->Descrs[s].rank,
state->Descrs[s].tag,
MPIoffloadEngine::communicator_universe,
(MPI_Request *)&state->Descrs[s].request);
assert(ierr==0);
state->start = PERI_PLUS(s);
return 1;
break;
case COMMAND_IRECV:
ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base),
state->Descrs[s].bytes,
MPI_CHAR,
state->Descrs[s].rank,
state->Descrs[s].tag,
MPIoffloadEngine::communicator_universe,
(MPI_Request *)&state->Descrs[s].request);
// std::cout<< " Request is "<<state->Descrs[s].request<<std::endl;
// std::cout<< " Request0 is "<<state->Descrs[0].request<<std::endl;
assert(ierr==0);
state->start = PERI_PLUS(s);
return 1;
break;
case COMMAND_SENDRECV:
// fprintf(stderr,"Sendrecv ->%d %d : <-%d %d \n",state->Descrs[s].dest, state->Descrs[s].xtag+i*10,state->Descrs[s].src, state->Descrs[s].rtag+i*10);
ierr=MPI_Sendrecv((void *)(state->Descrs[s].xbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].dest, state->Descrs[s].xtag+i*10,
(void *)(state->Descrs[s].rbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].src , state->Descrs[s].rtag+i*10,
MPIoffloadEngine::communicator_universe,MPI_STATUS_IGNORE);
assert(ierr==0);
// fprintf(stderr,"Sendrecv done %d %d\n",ierr,i);
// MPI_Barrier(MPIoffloadEngine::HorizontalComm);
// fprintf(stderr,"Barrier\n");
i++;
state->start = PERI_PLUS(s);
return 1;
break;
case COMMAND_WAITALL:
for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){
if ( state->Descrs[t].command != COMMAND_SENDRECV ) {
MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE);
}
};
s=PERI_PLUS(s);
state->start = s;
state->tail = s;
WakeUpCompute();
return 1;
break;
default:
assert(0);
break;
}
}
return 0;
}
//////////////////////////////////////////////////////////////////////////////
// External interaction with the queue
//////////////////////////////////////////////////////////////////////////////
void Slave::QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src)
{
int head =state->head;
int next = PERI_PLUS(head);
// Set up descriptor
int worldrank;
int hashtag;
MPI_Comm communicator;
MPI_Request request;
uint64_t relative;
relative = (uint64_t)xbuf - base;
state->Descrs[head].xbuf = relative;
relative= (uint64_t)rbuf - base;
state->Descrs[head].rbuf = relative;
state->Descrs[head].bytes = bytes;
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,xtag,comm,dest);
state->Descrs[head].dest = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
state->Descrs[head].xtag = hashtag;
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,rtag,comm,src);
state->Descrs[head].src = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
state->Descrs[head].rtag = hashtag;
state->Descrs[head].command= COMMAND_SENDRECV;
// Block until FIFO has space
while( state->tail==next );
// Msync on weak order architectures
// Advance pointer
state->head = next;
};
uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank)
{
/////////////////////////////////////////
// Spin; if FIFO is full until not full
/////////////////////////////////////////
int head =state->head;
int next = PERI_PLUS(head);
// Set up descriptor
int worldrank;
int hashtag;
MPI_Comm communicator;
MPI_Request request;
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,tag,comm,commrank);
uint64_t relative= (uint64_t)buf - base;
state->Descrs[head].buf = relative;
state->Descrs[head].bytes = bytes;
state->Descrs[head].rank = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
state->Descrs[head].tag = hashtag;
state->Descrs[head].command= command;
/*
if ( command == COMMAND_ISEND ) {
std::cout << "QueueSend from "<< universe_rank <<" to commrank " << commrank
<< " to worldrank " << worldrank <<std::endl;
std::cout << " via VerticalRank "<< vertical_rank <<" to universerank " << MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]<<std::endl;
std::cout << " QueueCommand "<<buf<<"["<<bytes<<"]" << std::endl;
}
if ( command == COMMAND_IRECV ) {
std::cout << "QueueRecv on "<< universe_rank <<" from commrank " << commrank
<< " from worldrank " << worldrank <<std::endl;
std::cout << " via VerticalRank "<< vertical_rank <<" from universerank " << MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]<<std::endl;
std::cout << " QueueSend "<<buf<<"["<<bytes<<"]" << std::endl;
}
*/
// Block until FIFO has space
while( state->tail==next );
// Msync on weak order architectures
// Advance pointer
state->head = next;
return 0;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Comm CartesianCommunicator::communicator_world;
void CartesianCommunicator::Init(int *argc, char ***argv)
{
int flag;
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init(argc,argv);
}
communicator_world = MPI_COMM_WORLD;
MPI_Comm ShmComm;
MPIoffloadEngine::CommunicatorInit (communicator_world,ShmComm,ShmCommBuf);
}
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
assert(ierr==0);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
assert(ierr==0);
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
coor.resize(_ndimension);
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
assert(ierr==0);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_ndimension = processors.size();
std::vector<int> periodic(_ndimension,1);
_Nprocessors=1;
_processors = processors;
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
int Size;
MPI_Comm_size(communicator_world,&Size);
assert(Size==_Nprocessors);
_processor_coor.resize(_ndimension);
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
MPI_Comm_rank (communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
};
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(double &d)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
MPI_Status stat;
assert(sender != receiver);
int tag = sender;
if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
}
if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
uint64_t xmit_i = (uint64_t) xmit;
uint64_t recv_i = (uint64_t) recv;
uint64_t shm = (uint64_t) ShmCommBuf;
// assert xmit and recv lie in shared memory region
assert( (xmit_i >= shm) && (xmit_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
assert(from!=_processor);
assert(dest!=_processor);
MPIoffloadEngine::QueueMultiplexedSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from);
//MPIoffloadEngine::QueueRoundRobinSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from);
//MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest);
//MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from);
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
MPIoffloadEngine::WaitAll();
//this->Barrier();
}
void CartesianCommunicator::StencilBarrier(void) { }
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
}
void CartesianCommunicator::Barrier(void)
{
int ierr = MPI_Barrier(communicator);
assert(ierr==0);
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
int ierr=MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator);
assert(ierr==0);
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
int ierr= MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator_world);
assert(ierr==0);
}
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
void *CartesianCommunicator::ShmBuffer(int rank) {
return NULL;
}
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
return NULL;
}
};

View File

@ -1,259 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/Communicator_mpi.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/ActionCore.h>
#include <mpi.h>
namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Comm CartesianCommunicator::communicator_world;
// Should error check all MPI calls.
void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag;
int provided;
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
if ( provided != MPI_THREAD_MULTIPLE ) {
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute;
}
}
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
ShmInitGeneric();
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(double &d)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
assert(ierr==0);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
assert(ierr==0);
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
coor.resize(_ndimension);
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
assert(ierr==0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
MPI_Status stat;
assert(sender != receiver);
int tag = sender;
if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
}
if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
int myrank = _processor;
int ierr;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
MPI_Request xrq;
MPI_Request rrq;
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
}
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
}
}
void CartesianCommunicator::Barrier(void)
{
int ierr = MPI_Barrier(communicator);
assert(ierr==0);
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
int ierr=MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator);
assert(ierr==0);
}
///////////////////////////////////////////////////////
// Should only be used prior to Grid Init finished.
// Check for this?
///////////////////////////////////////////////////////
int CartesianCommunicator::RankWorld(void){
int r;
MPI_Comm_rank(communicator_world,&r);
return r;
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
int ierr= MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator_world);
assert(ierr==0);
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes,int dir)
{
int myrank = _processor;
int ierr;
assert(dir < communicator_halo.size());
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
// Give the CPU to MPI immediately; can use threads to overlap optionally
MPI_Request req[2];
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[dir],&req[0]);
list.push_back(req[0]);
list.push_back(req[1]);
return 2.0*bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{
int nreq=waitall.size();
MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE);
};
double CartesianCommunicator::StencilSendToRecvFrom(void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes,int dir)
{
int myrank = _processor;
int ierr;
assert(dir < communicator_halo.size());
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
// Give the CPU to MPI immediately; can use threads to overlap optionally
MPI_Request req[2];
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[dir],&req[0]);
MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
return 2.0*bytes;
}
}

View File

@ -32,14 +32,22 @@ namespace Grid {
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
Grid_MPI_Comm CartesianCommunicator::communicator_world;
void CartesianCommunicator::Init(int *argc, char *** arv)
{
ShmInitGeneric();
GlobalSharedMemory::Init(communicator_world);
GlobalSharedMemory::SharedMemoryAllocate(
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
GlobalSharedMemory::Hugepages);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors) {}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors)
{
srank=0;
SetCommunicator(communicator_world);
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
@ -54,8 +62,11 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
assert(_processors[d]==1);
_processor_coor[d] = 0;
}
SetCommunicator(communicator_world);
}
CartesianCommunicator::~CartesianCommunicator(){}
void CartesianCommunicator::GlobalSum(float &){}
void CartesianCommunicator::GlobalSumVector(float *,int N){}
void CartesianCommunicator::GlobalSum(double &){}
@ -98,9 +109,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
{
assert(0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes);
bcopy(in,out,bytes*words);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes*words);
}
int CartesianCommunicator::RankWorld(void){return 0;}
@ -115,6 +130,36 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest
dest=0;
}
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes, int dir)
{
std::vector<CommsRequest_t> list;
// Discard the "dir"
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
SendToRecvFromComplete(list);
return 2.0*bytes;
}
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes, int dir)
{
// Discard the "dir"
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
return 2.0*bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{
SendToRecvFromComplete(waitall);
}
void CartesianCommunicator::StencilBarrier(void){};
}

View File

@ -1,355 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/Communicator_shmem.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <mpp/shmem.h>
#include <array>
namespace Grid {
// Should error check all MPI calls.
#define SHMEM_VET(addr)
#define SHMEM_VET_DEBUG(addr) { \
if ( ! shmem_addr_accessible(addr,_processor) ) {\
std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \
BACKTRACEFILE(); \
}\
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout
///////////////////////////////////////////////////////////////////////////////////////////////////
typedef struct HandShake_t {
uint64_t seq_local;
uint64_t seq_remote;
} HandShake;
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) {
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
ret.fill(SHMEM_SYNC_VALUE);
return ret;
}
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync_init = make_psync_init();
static Vector< HandShake > XConnections;
static Vector< HandShake > RConnections;
void CartesianCommunicator::Init(int *argc, char ***argv) {
shmem_init();
XConnections.resize(shmem_n_pes());
RConnections.resize(shmem_n_pes());
for(int pe =0 ; pe<shmem_n_pes();pe++){
XConnections[pe].seq_local = 0;
XConnections[pe].seq_remote= 0;
RConnections[pe].seq_local = 0;
RConnections[pe].seq_remote= 0;
}
shmem_barrier_all();
ShmInitGeneric();
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors)
{
std::cout << "Attempts to split SHMEM communicators will fail " <<std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_ndimension = processors.size();
std::vector<int> periodic(_ndimension,1);
_Nprocessors=1;
_processors = processors;
_processor_coor.resize(_ndimension);
_processor = shmem_my_pe();
Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors);
for(int i=0;i<_ndimension;i++){
_Nprocessors*=_processors[i];
}
int Size = shmem_n_pes();
assert(Size==_Nprocessors);
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
static long long source ;
static long long dest ;
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
// int nreduce=1;
// int pestart=0;
// int logStride=0;
source = u;
dest = 0;
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all(); // necessary?
u = dest;
}
void CartesianCommunicator::GlobalSum(uint64_t &u){
static long long source ;
static long long dest ;
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
// int nreduce=1;
// int pestart=0;
// int logStride=0;
source = u;
dest = 0;
shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all(); // necessary?
u = dest;
}
void CartesianCommunicator::GlobalSum(float &f){
static float source ;
static float dest ;
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
source = f;
dest =0.0;
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
f = dest;
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
static float source ;
static float dest = 0 ;
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
if ( shmem_addr_accessible(f,_processor) ){
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
return;
}
for(int i=0;i<N;i++){
dest =0.0;
source = f[i];
shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
f[i] = dest;
}
}
void CartesianCommunicator::GlobalSum(double &d)
{
static double source;
static double dest ;
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
source = d;
dest = 0;
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
d = dest;
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
static double source ;
static double dest ;
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
if ( shmem_addr_accessible(d,_processor) ){
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
return;
}
for(int i=0;i<N;i++){
source = d[i];
dest =0.0;
shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data());
shmem_barrier_all();
d[i] = dest;
}
}
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
std::vector<int> coor = _processor_coor;
assert(std::abs(shift) <_processors[dim]);
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,source,_processors);
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,dest,_processors);
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
Lexicographic::IndexFromCoor(coor,rank,_processors);
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
Lexicographic::CoorFromIndex(coor,rank,_processors);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
SHMEM_VET(xmit);
SHMEM_VET(recv);
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
static uint64_t seq;
assert(recv!=xmit);
volatile HandShake *RecvSeq = (volatile HandShake *) & RConnections[sender];
volatile HandShake *SendSeq = (volatile HandShake *) & XConnections[receiver];
if ( _processor == sender ) {
// Check he has posted a receive
while(SendSeq->seq_remote == SendSeq->seq_local);
// Advance our send count
seq = ++(SendSeq->seq_local);
// Send this packet
SHMEM_VET(recv);
shmem_putmem(recv,xmit,bytes,receiver);
shmem_fence();
//Notify him we're done
shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
shmem_fence();
}
if ( _processor == receiver ) {
// Post a receive
seq = ++(RecvSeq->seq_local);
shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
// Now wait until he has advanced our reception counter
while(RecvSeq->seq_remote != RecvSeq->seq_local);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
SHMEM_VET(xmit);
SHMEM_VET(recv);
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
shmem_putmem(recv,xmit,bytes,dest);
if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all();
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
// shmem_quiet(); // I'm done
if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too
}
void CartesianCommunicator::Barrier(void)
{
shmem_barrier_all();
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
static uint32_t word;
uint32_t *array = (uint32_t *) data;
assert( (bytes % 4)==0);
int words = bytes/4;
if ( shmem_addr_accessible(data,_processor) ){
shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data());
return;
}
for(int w=0;w<words;w++){
word = array[w];
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
if ( shmem_my_pe() != root ) {
array[w] = word;
}
shmem_barrier_all();
}
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
static uint32_t word;
uint32_t *array = (uint32_t *) data;
assert( (bytes % 4)==0);
int words = bytes/4;
for(int w=0;w<words;w++){
word = array[w];
shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync.data());
if ( shmem_my_pe() != root ) {
array[w]= word;
}
shmem_barrier_all();
}
}
int CartesianCommunicator::RankWorld(void){
return shmem_my_pe();
}
}

View File

@ -0,0 +1,92 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
namespace Grid {
// static data
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
int GlobalSharedMemory::Hugepages = 0;
int GlobalSharedMemory::_ShmSetup;
int GlobalSharedMemory::_ShmAlloc;
uint64_t GlobalSharedMemory::_ShmAllocBytes;
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
Grid_MPI_Comm GlobalSharedMemory::WorldShmComm;
int GlobalSharedMemory::WorldShmRank;
int GlobalSharedMemory::WorldShmSize;
std::vector<int> GlobalSharedMemory::WorldShmRanks;
Grid_MPI_Comm GlobalSharedMemory::WorldComm;
int GlobalSharedMemory::WorldSize;
int GlobalSharedMemory::WorldRank;
int GlobalSharedMemory::WorldNodes;
int GlobalSharedMemory::WorldNode;
void GlobalSharedMemory::SharedMemoryFree(void)
{
assert(_ShmAlloc);
assert(_ShmAllocBytes>0);
for(int r=0;r<WorldShmSize;r++){
munmap(WorldShmCommBufs[r],_ShmAllocBytes);
}
_ShmAlloc = 0;
_ShmAllocBytes = 0;
}
/////////////////////////////////
// Alloc, free shmem region
/////////////////////////////////
void *SharedMemory::ShmBufferMalloc(size_t bytes){
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
void *ptr = (void *)heap_top;
heap_top += bytes;
heap_bytes+= bytes;
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
assert(heap_bytes<heap_size);
}
return ptr;
}
void SharedMemory::ShmBufferFreeAll(void) {
heap_top =(size_t)ShmBufferSelf();
heap_bytes=0;
}
void *SharedMemory::ShmBufferSelf(void)
{
return ShmCommBufs[ShmRank];
}
}

View File

@ -0,0 +1,164 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
// TODO
// 1) move includes into SharedMemory.cc
//
// 2) split shared memory into a) optimal communicator creation from comm world
//
// b) shared memory buffers container
// -- static globally shared; init once
// -- per instance set of buffers.
//
#pragma once
#include <Grid/GridCore.h>
#if defined (GRID_COMMS_MPI3)
#include <mpi.h>
#endif
#include <semaphore.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>
#include <zlib.h>
#ifdef HAVE_NUMAIF_H
#include <numaif.h>
#endif
namespace Grid {
#if defined (GRID_COMMS_MPI3)
typedef MPI_Comm Grid_MPI_Comm;
typedef MPI_Request CommsRequest_t;
#else
typedef int CommsRequest_t;
typedef int Grid_MPI_Comm;
#endif
class GlobalSharedMemory {
private:
static const int MAXLOG2RANKSPERNODE = 16;
// Init once lock on the buffer allocation
static int _ShmSetup;
static int _ShmAlloc;
static uint64_t _ShmAllocBytes;
public:
static int ShmSetup(void) { return _ShmSetup; }
static int ShmAlloc(void) { return _ShmAlloc; }
static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
static uint64_t MAX_MPI_SHM_BYTES;
static int Hugepages;
static std::vector<void *> WorldShmCommBufs;
static Grid_MPI_Comm WorldComm;
static int WorldRank;
static int WorldSize;
static Grid_MPI_Comm WorldShmComm;
static int WorldShmRank;
static int WorldShmSize;
static int WorldNodes;
static int WorldNode;
static std::vector<int> WorldShmRanks;
//////////////////////////////////////////////////////////////////////////////////////
// Create an optimal reordered communicator that makes MPI_Cart_create get it right
//////////////////////////////////////////////////////////////////////////////////////
static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
///////////////////////////////////////////////////
// Provide shared memory facilities off comm world
///////////////////////////////////////////////////
static void SharedMemoryAllocate(uint64_t bytes, int flags);
static void SharedMemoryFree(void);
};
//////////////////////////////
// one per communicator
//////////////////////////////
class SharedMemory
{
private:
static const int MAXLOG2RANKSPERNODE = 16;
size_t heap_top;
size_t heap_bytes;
size_t heap_size;
protected:
Grid_MPI_Comm ShmComm; // for barriers
int ShmRank;
int ShmSize;
std::vector<void *> ShmCommBufs;
std::vector<int> ShmRanks;// Mapping comm ranks to Shm ranks
public:
SharedMemory() {};
///////////////////////////////////////////////////////////////////////////////////////
// set the buffers & sizes
///////////////////////////////////////////////////////////////////////////////////////
void SetCommunicator(Grid_MPI_Comm comm);
////////////////////////////////////////////////////////////////////////
// For this instance ; disjoint buffer sets between splits if split grid
////////////////////////////////////////////////////////////////////////
void ShmBarrier(void);
///////////////////////////////////////////////////
// Call on any instance
///////////////////////////////////////////////////
void SharedMemoryTest(void);
void *ShmBufferSelf(void);
void *ShmBuffer (int rank);
void *ShmBufferTranslate(int rank,void * local_p);
void *ShmBufferMalloc(size_t bytes);
void ShmBufferFreeAll(void) ;
//////////////////////////////////////////////////////////////////////////
// Make info on Nodes & ranks and Shared memory available
//////////////////////////////////////////////////////////////////////////
int NodeCount(void) { return GlobalSharedMemory::WorldNodes;};
int RankCount(void) { return GlobalSharedMemory::WorldSize;};
};
}

View File

@ -0,0 +1,395 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
namespace Grid {
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
assert(_ShmSetup==0);
WorldComm = comm;
MPI_Comm_rank(WorldComm,&WorldRank);
MPI_Comm_size(WorldComm,&WorldSize);
// WorldComm, WorldSize, WorldRank
/////////////////////////////////////////////////////////////////////
// Split into groups that can share memory
/////////////////////////////////////////////////////////////////////
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
MPI_Comm_rank(WorldShmComm ,&WorldShmRank);
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
// WorldShmComm, WorldShmSize, WorldShmRank
// WorldNodes
WorldNodes = WorldSize/WorldShmSize;
assert( (WorldNodes * WorldShmSize) == WorldSize );
// FIXME: Check all WorldShmSize are the same ?
/////////////////////////////////////////////////////////////////////
// find world ranks in our SHM group (i.e. which ranks are on our node)
/////////////////////////////////////////////////////////////////////
MPI_Group WorldGroup, ShmGroup;
MPI_Comm_group (WorldComm, &WorldGroup);
MPI_Comm_group (WorldShmComm, &ShmGroup);
std::vector<int> world_ranks(WorldSize); for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
WorldShmRanks.resize(WorldSize);
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &WorldShmRanks[0]);
///////////////////////////////////////////////////////////////////
// Identify who is in my group and nominate the leader
///////////////////////////////////////////////////////////////////
int g=0;
std::vector<int> MyGroup;
MyGroup.resize(WorldShmSize);
for(int rank=0;rank<WorldSize;rank++){
if(WorldShmRanks[rank]!=MPI_UNDEFINED){
assert(g<WorldShmSize);
MyGroup[g++] = rank;
}
}
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
int myleader = MyGroup[0];
std::vector<int> leaders_1hot(WorldSize,0);
std::vector<int> leaders_group(WorldNodes,0);
leaders_1hot [ myleader ] = 1;
///////////////////////////////////////////////////////////////////
// global sum leaders over comm world
///////////////////////////////////////////////////////////////////
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm);
assert(ierr==0);
///////////////////////////////////////////////////////////////////
// find the group leaders world rank
///////////////////////////////////////////////////////////////////
int group=0;
for(int l=0;l<WorldSize;l++){
if(leaders_1hot[l]){
leaders_group[group++] = l;
}
}
///////////////////////////////////////////////////////////////////
// Identify the node of the group in which I (and my leader) live
///////////////////////////////////////////////////////////////////
WorldNode=-1;
for(int g=0;g<WorldNodes;g++){
if (myleader == leaders_group[g]){
WorldNode=g;
}
}
assert(WorldNode!=-1);
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = -1;
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
if ( (0x1<<i) == WorldShmSize ) {
log2size = i;
break;
}
}
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int ndimension = processors.size();
std::vector<int> processor_coor(ndimension);
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%ndimension;
}
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<ndimension;d++){
NodeDims[d] = WorldDims[d]/ShmDims[d];
}
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
int Nprocessors=1;
for(int i=0;i<ndimension;i++){
Nprocessors*=processors[i];
}
assert(WorldSize==Nprocessors);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
////////////////////////////////////////////////////////////////
int rank;
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
/////////////////////////////////////////////////////////////////
// Build the new communicator
/////////////////////////////////////////////////////////////////
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
assert(ierr==0);
}
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended
////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbf and others map filesystems as mappable huge pages
////////////////////////////////////////////////////////////////////////////////////////////
char shm_name [NAME_MAX];
for(int r=0;r<WorldShmSize;r++){
sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",WorldNode,r);
int fd=open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd == -1) {
printf("open %s failed\n",shm_name);
perror("open hugetlbfs");
exit(0);
}
int mmap_flag = MAP_SHARED ;
#ifdef MAP_POPULATE
mmap_flag|=MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if ( flags ) mmap_flag |= MAP_HUGETLB;
#endif
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
if ( ptr == (void *)MAP_FAILED ) {
printf("mmap %s failed\n",shm_name);
perror("failed mmap"); assert(0);
}
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
};
#endif // MMAP
#ifdef GRID_MPI3_SHMOPEN
////////////////////////////////////////////////////////////////////////////////////////////
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for
// the posix shm virtual file system
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
char shm_name [NAME_MAX];
if ( WorldShmRank == 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
shm_unlink(shm_name);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size);
int mmap_flag = MAP_SHARED;
#ifdef MAP_POPULATE
mmap_flag |= MAP_POPULATE;
#endif
#ifdef MAP_HUGETLB
if (flags) mmap_flag |= MAP_HUGETLB;
#endif
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
close(fd);
}
}
MPI_Barrier(WorldShmComm);
if ( WorldShmRank != 0 ) {
for(int r=0;r<WorldShmSize;r++){
size_t size = bytes ;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
WorldShmCommBufs[r] =ptr;
close(fd);
}
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
}
#endif
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality
////////////////////////////////////////////////////////
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
{
int rank, size;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&size);
ShmRanks.resize(size);
/////////////////////////////////////////////////////////////////////
// Split into groups that can share memory
/////////////////////////////////////////////////////////////////////
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
MPI_Comm_rank(ShmComm ,&ShmRank);
MPI_Comm_size(ShmComm ,&ShmSize);
ShmCommBufs.resize(ShmSize);
//////////////////////////////////////////////////////////////////////
// Map ShmRank to WorldShmRank and use the right buffer
//////////////////////////////////////////////////////////////////////
assert (GlobalSharedMemory::ShmAlloc()==1);
heap_size = GlobalSharedMemory::ShmAllocBytes();
for(int r=0;r<ShmSize;r++){
uint32_t sr = (r==ShmRank) ? GlobalSharedMemory::WorldRank : 0 ;
MPI_Allreduce(MPI_IN_PLACE,&sr,1,MPI_UINT32_T,MPI_SUM,comm);
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[sr];
}
ShmBufferFreeAll();
/////////////////////////////////////////////////////////////////////
// find comm ranks in our SHM group (i.e. which ranks are on our node)
/////////////////////////////////////////////////////////////////////
MPI_Group FullGroup, ShmGroup;
MPI_Comm_group (comm , &FullGroup);
MPI_Comm_group (ShmComm, &ShmGroup);
std::vector<int> ranks(size); for(int r=0;r<size;r++) ranks[r]=r;
MPI_Group_translate_ranks (FullGroup,size,&ranks[0],ShmGroup, &ShmRanks[0]);
}
//////////////////////////////////////////////////////////////////
// On node barrier
//////////////////////////////////////////////////////////////////
void SharedMemory::ShmBarrier(void)
{
MPI_Barrier (ShmComm);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Test the shared memory is working
//////////////////////////////////////////////////////////////////////////////////////////////////////////
void SharedMemory::SharedMemoryTest(void)
{
ShmBarrier();
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
check[0] = GlobalSharedMemory::WorldNode;
check[1] = r;
check[2] = 0x5A5A5A;
}
}
ShmBarrier();
for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r];
assert(check[0]==GlobalSharedMemory::WorldNode);
assert(check[1]==r);
assert(check[2]==0x5A5A5A);
}
ShmBarrier();
}
void *SharedMemory::ShmBuffer(int rank)
{
int gpeer = ShmRanks[rank];
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
return ShmCommBufs[gpeer];
}
}
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
{
static int count =0;
int gpeer = ShmRanks[rank];
assert(gpeer!=ShmRank); // never send to self
if (gpeer == MPI_UNDEFINED){
return NULL;
} else {
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
return (void *) remote;
}
}
}

View File

@ -0,0 +1,126 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/SharedMemory.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
namespace Grid {
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
assert(_ShmSetup==0);
WorldComm = 0;
WorldRank = 0;
WorldSize = 1;
WorldShmComm = 0 ;
WorldShmRank = 0 ;
WorldShmSize = 1 ;
WorldNodes = 1 ;
WorldNode = 0 ;
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
WorldShmCommBufs.resize(1);
_ShmSetup=1;
}
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
{
optimal_comm = WorldComm;
}
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended, use anonymous mmap
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
void * ShmCommBuf ;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
int mmap_flag =0;
#ifdef MAP_ANONYMOUS
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
#endif
#ifdef MAP_ANON
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
#endif
#ifdef MAP_HUGETLB
if ( flags ) mmap_flag |= MAP_HUGETLB;
#endif
ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
if (ShmCommBuf == (void *)MAP_FAILED) {
perror("mmap failed ");
exit(EXIT_FAILURE);
}
#ifdef MADV_HUGEPAGE
if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE);
#endif
bzero(ShmCommBuf,bytes);
WorldShmCommBufs[0] = ShmCommBuf;
_ShmAllocBytes=bytes;
_ShmAlloc=1;
};
////////////////////////////////////////////////////////
// Global shared functionality finished
// Now move to per communicator functionality
////////////////////////////////////////////////////////
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
{
assert(GlobalSharedMemory::ShmAlloc()==1);
ShmRanks.resize(1);
ShmCommBufs.resize(1);
ShmRanks[0] = 0;
ShmRank = 0;
ShmSize = 1;
//////////////////////////////////////////////////////////////////////
// Map ShmRank to WorldShmRank and use the right buffer
//////////////////////////////////////////////////////////////////////
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
heap_size = GlobalSharedMemory::ShmAllocBytes();
ShmBufferFreeAll();
return;
}
//////////////////////////////////////////////////////////////////
// On node barrier
//////////////////////////////////////////////////////////////////
void SharedMemory::ShmBarrier(void){ return ; }
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// Test the shared memory is working
//////////////////////////////////////////////////////////////////////////////////////////////////////////
void SharedMemory::SharedMemoryTest(void) { return; }
void *SharedMemory::ShmBuffer(int rank)
{
return NULL;
}
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
{
return NULL;
}
}

View File

@ -77,9 +77,6 @@ namespace Grid {
// merge of April 11 2017
//<<<<<<< HEAD
// this function is necessary for the LS vectorised field
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
{
@ -91,7 +88,6 @@ namespace Grid {
// all further divisions are local
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
// then divide the number of local sites
// check that the total number of sims agree, meanse the iSites are the same
@ -102,27 +98,6 @@ namespace Grid {
return fine->lSites() / coarse->lSites();
}
/*
// Wrap seed_seq to give common interface with random_device
class fixedSeed {
public:
typedef std::seed_seq::result_type result_type;
std::seed_seq src;
fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
result_type operator () (void){
std::vector<result_type> list(1);
src.generate(list.begin(),list.end());
return list[0];
}
};
=======
>>>>>>> develop
*/
// real scalars are one component
template<class scalar,class distribution,class generator>
@ -171,7 +146,7 @@ namespace Grid {
// support for parallel init
///////////////////////
#ifdef RNG_FAST_DISCARD
static void Skip(RngEngine &eng)
static void Skip(RngEngine &eng,uint64_t site)
{
/////////////////////////////////////////////////////////////////////////////////////
// Skip by 2^40 elements between successive lattice sites
@ -184,8 +159,11 @@ namespace Grid {
// and margin of safety is orders of magnitude.
// We could hack Sitmo to skip in the higher order words of state if necessary
/////////////////////////////////////////////////////////////////////////////////////
uint64_t skip = 0x1; skip = skip<<40;
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
uint64_t skip = site;
skip = skip<<40;
eng.discard(skip);
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
}
#endif
static RngEngine Reseed(RngEngine &eng)
@ -407,15 +385,14 @@ namespace Grid {
// MT implementation does not implement fast discard even though
// in principle this is possible
////////////////////////////////////////////////
std::vector<int> gcoor;
int rank,o_idx,i_idx;
// Everybody loops over global volume.
for(int gidx=0;gidx<_grid->_gsites;gidx++){
Skip(master_engine); // Skip to next RNG sequence
parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){
// Where is it?
int rank,o_idx,i_idx;
std::vector<int> gcoor;
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
@ -423,6 +400,7 @@ namespace Grid {
if( rank == _grid->ThisRank() ){
int l_idx=generator_idx(o_idx,i_idx);
_generators[l_idx] = master_engine;
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
}
}

View File

@ -50,26 +50,22 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
half.checkerboard = cb;
int ssh=0;
//parallel_for
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
int cbos;
std::vector<int> coor;
full._grid->oCoorFromOindex(coor,ss);
cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) {
int ssh=half._grid->oIndex(coor);
half._odata[ssh] = full._odata[ss];
ssh++;
}
}
}
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
int cb = half.checkerboard;
int ssh=0;
//parallel_for
for(int ss=0;ss<full._grid->oSites();ss++){
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
@ -77,8 +73,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) {
int ssh=half._grid->oIndex(coor);
full._odata[ss]=half._odata[ssh];
ssh++;
}
}
}
@ -109,8 +105,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
coarseData=zero;
// Loop with a cache friendly loop ordering
for(int sf=0;sf<fine->oSites();sf++){
// Loop over coars parallel, and then loop over fine associated with coarse.
parallel_for(int sf=0;sf<fine->oSites();sf++){
int sc;
std::vector<int> coor_c(_ndimension);
@ -119,8 +115,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
PARALLEL_CRITICAL
for(int i=0;i<nbasis;i++) {
coarseData._odata[sc](i)=coarseData._odata[sc](i)
+ innerProduct(Basis[i]._odata[sf],fineData._odata[sf]);
@ -139,6 +136,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
GridBase * coarse= coarseA._grid;
fineZ.checkerboard=fineX.checkerboard;
assert(fineX.checkerboard==fineY.checkerboard);
subdivides(coarse,fine); // require they map
conformable(fineX,fineY);
conformable(fineX,fineZ);
@ -180,9 +178,10 @@ template<class vobj,class CComplex>
GridBase *coarse(CoarseInner._grid);
GridBase *fine (fineX._grid);
Lattice<dotp> fine_inner(fine);
Lattice<dotp> fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard;
Lattice<dotp> coarse_inner(coarse);
// Precision promotion?
fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner);
parallel_for(int ss=0;ss<coarse->oSites();ss++){
@ -193,7 +192,7 @@ template<class vobj,class CComplex>
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
{
GridBase *coarse = ip._grid;
Lattice<vobj> zz(fineX._grid); zz=zero;
Lattice<vobj> zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard;
blockInnerProduct(ip,fineX,fineX);
ip = pow(ip,-0.5);
blockZAXPY(fineX,ip,fineX,zz);
@ -216,19 +215,25 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
}
// Turn this around to loop threaded over sc and interior loop
// over sf would thread better
coarseData=zero;
for(int sf=0;sf<fine->oSites();sf++){
parallel_region {
int sc;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
PARALLEL_CRITICAL
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
}
}
return;
}
@ -238,7 +243,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
{
GridBase * fine = unpicked._grid;
Lattice<vobj> zz(fine);
Lattice<vobj> zz(fine); zz.checkerboard = unpicked.checkerboard;
Lattice<iScalar<vInteger> > fcoor(fine);
zz = zero;
@ -303,20 +308,21 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
}
// Loop with a cache friendly loop ordering
for(int sf=0;sf<fine->oSites();sf++){
parallel_region {
int sc;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
parallel_for_internal(int sf=0;sf<fine->oSites();sf++){
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf];
}
}
}
return;
@ -688,30 +694,6 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
////////////////////////////////////////////////////////////////////////////////
// Communicate between grids
////////////////////////////////////////////////////////////////////////////////
//
// All to all plan
//
// Subvolume on fine grid is v. Vectors a,b,c,d
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLEST CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Mesh of nodes (2) ; subdivide to 1 subdivisions
//
// Lex ord:
// N0 va0 vb0 N1 va1 vb1
//
// For each dimension do an all to all
//
// full AllToAll(0)
// N0 va0 va1 N1 vb0 vb1
//
// REARRANGE
// N0 va01 N1 vb01
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLE CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -745,8 +727,17 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
//
/////////////////////////////////////////////////////////
/*
* Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks )
*
* 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
*
* node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth
* node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1)..
* node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* etc...
*/
template<class Vobj>
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{
@ -790,11 +781,12 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
int lsites = full_grid->lSites();
Integer sz = lsites * nvector;
uint64_t lsites = full_grid->lSites();
uint64_t sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
for(int v=0;v<nvector;v++){
unvectorizeToLexOrdArray(scalardata,full[v]);
parallel_for(int site=0;site<lsites;site++){
@ -804,39 +796,51 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
int nvec = nvector; // Counts down to 1 as we collapse dims
std::vector<int> ldims = full_grid->_ldimensions;
std::vector<int> lcoor(ndim);
for(int d=0;d<ndim;d++){
for(int d=ndim-1;d>=0;d--){
if ( ratio[d] != 1 ) {
full_grid ->AllToAll(d,alldata,tmpdata);
if ( split_grid->_processors[d] > 1 ) {
alldata=tmpdata;
split_grid->AllToAll(d,alldata,tmpdata);
}
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
nvec /= ratio[d];
auto rdims = ldims; rdims[d] *= ratio[d];
auto rsites= lsites*ratio[d];
for(int v=0;v<nvec;v++){
auto rdims = ldims;
auto M = ratio[d];
auto rsites= lsites*M;// increases rsites by M
nvec /= M; // Reduce nvec by subdivision factor
rdims[d] *= M; // increase local dim by same factor
// For loop over each site within old subvol
for(int lsite=0;lsite<lsites;lsite++){
int sP = split_grid->_processors[d];
int fP = full_grid->_processors[d];
Lexicographic::CoorFromIndex(lcoor, lsite, ldims);
int fvol = lsites;
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
for(int r=0;r<ratio[d];r++){ // ratio*nvec terms
// Loop over reordered data post A2A
parallel_for(int c=0;c<chunk;c++){
std::vector<int> coor(ndim);
for(int m=0;m<M;m++){
for(int s=0;s<sP;s++){
// addressing; use lexico
int lex_r;
uint64_t lex_c = c+chunk*m+chunk*M*s;
uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
uint64_t lex_vec = lex_fvol_vec/fvol;
auto rcoor = lcoor; rcoor[d] += r*ldims[d];
// which node sets an adder to the coordinate
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims);
rsite += v * rsites;
int rmul=nvec*lsites;
int vmul= lsites;
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
// LexicoFind coordinate & vector number within split lattice
alldata[lex_r] = tmpdata[lex_c];
}
}
@ -844,13 +848,8 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
ldims[d]*= ratio[d];
lsites *= ratio[d];
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
}
}
}
vectorizeFromLexOrdArray(alldata,split);
}
@ -908,8 +907,8 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
int lsites = full_grid->lSites();
Integer sz = lsites * nvector;
uint64_t lsites = full_grid->lSites();
uint64_t sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
@ -919,72 +918,74 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
/////////////////////////////////////////////////////////////////
// Start from split grid and work towards full grid
/////////////////////////////////////////////////////////////////
std::vector<int> lcoor(ndim);
std::vector<int> rcoor(ndim);
int nvec = 1;
lsites = split_grid->lSites();
std::vector<int> ldims = split_grid->_ldimensions;
uint64_t rsites = split_grid->lSites();
std::vector<int> rdims = split_grid->_ldimensions;
for(int d=ndim-1;d>=0;d--){
for(int d=0;d<ndim;d++){
if ( ratio[d] != 1 ) {
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
}
auto M = ratio[d];
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
auto rsites= lsites/ratio[d];
auto rdims = ldims; rdims[d]/=ratio[d];
for(int v=0;v<nvec;v++){
// rsite, rcoor --> smaller local volume
// lsite, lcoor --> bigger original (single node?) volume
// For loop over each site within smaller subvol
for(int rsite=0;rsite<rsites;rsite++){
Lexicographic::CoorFromIndex(rcoor, rsite, rdims);
int lsite;
for(int r=0;r<ratio[d];r++){
lcoor = rcoor; lcoor[d] += r*rdims[d];
Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites;
int rmul=nvec*rsites;
int vmul= rsites;
tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite];
int sP = split_grid->_processors[d];
int fP = full_grid->_processors[d];
auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor
auto lsites= rsites/M; // Decreases rsites by M
int fvol = lsites;
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
{
// Loop over reordered data post A2A
parallel_for(int c=0;c<chunk;c++){
std::vector<int> coor(ndim);
for(int m=0;m<M;m++){
for(int s=0;s<sP;s++){
// addressing; use lexico
int lex_r;
uint64_t lex_c = c+chunk*m+chunk*M*s;
uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
uint64_t lex_vec = lex_fvol_vec/fvol;
// which node sets an adder to the coordinate
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
// LexicoFind coordinate & vector number within split lattice
tmpdata[lex_c] = alldata[lex_r];
}
}
}
}
nvec *= ratio[d];
ldims[d]=rdims[d];
lsites =rsites;
if ( split_grid->_processors[d] > 1 ) {
split_grid->AllToAll(d,tmpdata,alldata);
tmpdata=alldata;
}
full_grid ->AllToAll(d,tmpdata,alldata);
rdims[d]/= M;
rsites /= M;
nvec *= M; // Increase nvec by subdivision factor
}
}
lsites = full_grid->lSites();
for(int v=0;v<nvector;v++){
// assert(v<full.size());
parallel_for(int site=0;site<lsites;site++){
// assert(v*lsites+site < alldata.size());
scalardata[site] = alldata[v*lsites+site];
}
assert(v<full.size());
vectorizeFromLexOrdArray(scalardata,full[v]);
}
}
}
#endif

Some files were not shown because too many files have changed in this diff Show More