mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'feature/hadrons' of https://github.com/paboyle/Grid into feature/hadrons
This commit is contained in:
commit
c291ef77b5
1
.gitignore
vendored
1
.gitignore
vendored
@ -9,6 +9,7 @@
|
|||||||
################
|
################
|
||||||
*~
|
*~
|
||||||
*#
|
*#
|
||||||
|
*.sublime-*
|
||||||
|
|
||||||
# Precompiled Headers #
|
# Precompiled Headers #
|
||||||
#######################
|
#######################
|
||||||
|
@ -113,6 +113,36 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||||
std::cout<<GridLogMessage << "******************"<<std::endl;
|
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||||
|
|
||||||
|
#define BENCH_ZDW(A,in,out) \
|
||||||
|
zDw.CayleyZeroCounters(); \
|
||||||
|
zDw. A (in,out); \
|
||||||
|
FGrid->Barrier(); \
|
||||||
|
t0=usecond(); \
|
||||||
|
for(int i=0;i<ncall;i++){ \
|
||||||
|
zDw. A (in,out); \
|
||||||
|
} \
|
||||||
|
t1=usecond(); \
|
||||||
|
FGrid->Barrier(); \
|
||||||
|
zDw.CayleyReport(); \
|
||||||
|
std::cout<<GridLogMessage << "Called ZDw " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||||
|
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||||
|
|
||||||
|
#define BENCH_DW_SSC(A,in,out) \
|
||||||
|
Dw.CayleyZeroCounters(); \
|
||||||
|
Dw. A (in,out); \
|
||||||
|
FGrid->Barrier(); \
|
||||||
|
t0=usecond(); \
|
||||||
|
for(int i=0;i<ncall;i++){ \
|
||||||
|
__SSC_START ; \
|
||||||
|
Dw. A (in,out); \
|
||||||
|
__SSC_STOP ; \
|
||||||
|
} \
|
||||||
|
t1=usecond(); \
|
||||||
|
FGrid->Barrier(); \
|
||||||
|
Dw.CayleyReport(); \
|
||||||
|
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||||
|
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||||
|
|
||||||
#define BENCH_DW_MEO(A,in,out) \
|
#define BENCH_DW_MEO(A,in,out) \
|
||||||
Dw.CayleyZeroCounters(); \
|
Dw.CayleyZeroCounters(); \
|
||||||
Dw. A (in,out,0); \
|
Dw. A (in,out,0); \
|
||||||
@ -148,9 +178,15 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion sref(sFGrid);
|
LatticeFermion sref(sFGrid);
|
||||||
LatticeFermion result(sFGrid);
|
LatticeFermion result(sFGrid);
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Constructing Vec5D Dw "<<std::endl;
|
std::cout<<GridLogMessage << "Constructing Vec5D Dw "<<std::endl;
|
||||||
DomainWallFermionVec5dR Dw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5);
|
DomainWallFermionVec5dR Dw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5);
|
||||||
|
|
||||||
|
RealD b=1.5;// Scale factor b+c=2, b-c=1
|
||||||
|
RealD c=0.5;
|
||||||
|
std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
|
||||||
|
ZMobiusFermionVec5dR zDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dhop "<<std::endl;
|
std::cout<<GridLogMessage << "Calling Dhop "<<std::endl;
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
|
|
||||||
@ -173,10 +209,13 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
BENCH_DW_MEO(Dhop ,src,result);
|
BENCH_DW_MEO(Dhop ,src,result);
|
||||||
BENCH_DW_MEO(DhopEO ,src_o,r_e);
|
BENCH_DW_MEO(DhopEO ,src_o,r_e);
|
||||||
BENCH_DW(Meooe ,src_o,r_e);
|
BENCH_DW_SSC(Meooe ,src_o,r_e);
|
||||||
BENCH_DW(Mooee ,src_o,r_o);
|
BENCH_DW(Mooee ,src_o,r_o);
|
||||||
BENCH_DW(MooeeInv,src_o,r_o);
|
BENCH_DW(MooeeInv,src_o,r_o);
|
||||||
|
|
||||||
|
BENCH_ZDW(Mooee ,src_o,r_o);
|
||||||
|
BENCH_ZDW(MooeeInv,src_o,r_o);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
|
14
configure.ac
14
configure.ac
@ -99,6 +99,13 @@ case ${ac_MKL} in
|
|||||||
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
############### HDF5
|
||||||
|
AC_ARG_WITH([hdf5],
|
||||||
|
[AS_HELP_STRING([--with-hdf5=prefix],
|
||||||
|
[try this for a non-standard install prefix of the HDF5 library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_hdf5/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_hdf5/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
############### first-touch
|
############### first-touch
|
||||||
AC_ARG_ENABLE([numa],
|
AC_ARG_ENABLE([numa],
|
||||||
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||||
@ -145,6 +152,12 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
|||||||
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
|
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
|
||||||
[have_fftw=true])
|
[have_fftw=true])
|
||||||
|
|
||||||
|
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
|
||||||
|
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
|
||||||
|
[have_hdf5=true]
|
||||||
|
[LIBS="${LIBS} -lhdf5"], [], [-lhdf5])
|
||||||
|
AM_CONDITIONAL(BUILD_HDF5, [ test "${have_hdf5}X" == "trueX" ])
|
||||||
|
|
||||||
CXXFLAGS=$CXXFLAGS_CPY
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
LDFLAGS=$LDFLAGS_CPY
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
|
|
||||||
@ -410,6 +423,7 @@ RNG choice : ${ac_RNG}
|
|||||||
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||||
LAPACK : ${ac_LAPACK}
|
LAPACK : ${ac_LAPACK}
|
||||||
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||||
|
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
|
||||||
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
|
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
|
||||||
----- BUILD FLAGS -------------------------------------
|
----- BUILD FLAGS -------------------------------------
|
||||||
CXXFLAGS:
|
CXXFLAGS:
|
||||||
|
@ -42,7 +42,6 @@ using namespace Hadrons;
|
|||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
// constructors ////////////////////////////////////////////////////////////////
|
// constructors ////////////////////////////////////////////////////////////////
|
||||||
Application::Application(void)
|
Application::Application(void)
|
||||||
: env_(Environment::getInstance())
|
|
||||||
{
|
{
|
||||||
LOG(Message) << "Modules available:" << std::endl;
|
LOG(Message) << "Modules available:" << std::endl;
|
||||||
auto list = ModuleFactory::getInstance().getBuilderList();
|
auto list = ModuleFactory::getInstance().getBuilderList();
|
||||||
@ -74,11 +73,17 @@ Application::Application(const std::string parameterFileName)
|
|||||||
parameterFileName_ = parameterFileName;
|
parameterFileName_ = parameterFileName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// environment shortcut ////////////////////////////////////////////////////////
|
||||||
|
Environment & Application::env(void) const
|
||||||
|
{
|
||||||
|
return Environment::getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
// access //////////////////////////////////////////////////////////////////////
|
// access //////////////////////////////////////////////////////////////////////
|
||||||
void Application::setPar(const Application::GlobalPar &par)
|
void Application::setPar(const Application::GlobalPar &par)
|
||||||
{
|
{
|
||||||
par_ = par;
|
par_ = par;
|
||||||
env_.setSeed(strToVec<int>(par_.seed));
|
env().setSeed(strToVec<int>(par_.seed));
|
||||||
}
|
}
|
||||||
|
|
||||||
const Application::GlobalPar & Application::getPar(void)
|
const Application::GlobalPar & Application::getPar(void)
|
||||||
@ -89,7 +94,7 @@ const Application::GlobalPar & Application::getPar(void)
|
|||||||
// execute /////////////////////////////////////////////////////////////////////
|
// execute /////////////////////////////////////////////////////////////////////
|
||||||
void Application::run(void)
|
void Application::run(void)
|
||||||
{
|
{
|
||||||
if (!parameterFileName_.empty() and (env_.getNModule() == 0))
|
if (!parameterFileName_.empty() and (env().getNModule() == 0))
|
||||||
{
|
{
|
||||||
parseParameterFile(parameterFileName_);
|
parseParameterFile(parameterFileName_);
|
||||||
}
|
}
|
||||||
@ -124,7 +129,7 @@ void Application::parseParameterFile(const std::string parameterFileName)
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
read(reader, "id", id);
|
read(reader, "id", id);
|
||||||
env_.createModule(id.name, id.type, reader);
|
env().createModule(id.name, id.type, reader);
|
||||||
} while (reader.nextElement("module"));
|
} while (reader.nextElement("module"));
|
||||||
pop(reader);
|
pop(reader);
|
||||||
pop(reader);
|
pop(reader);
|
||||||
@ -134,7 +139,7 @@ void Application::saveParameterFile(const std::string parameterFileName)
|
|||||||
{
|
{
|
||||||
XmlWriter writer(parameterFileName);
|
XmlWriter writer(parameterFileName);
|
||||||
ObjectId id;
|
ObjectId id;
|
||||||
const unsigned int nMod = env_.getNModule();
|
const unsigned int nMod = env().getNModule();
|
||||||
|
|
||||||
LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
|
LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
|
||||||
write(writer, "parameters", getPar());
|
write(writer, "parameters", getPar());
|
||||||
@ -142,10 +147,10 @@ void Application::saveParameterFile(const std::string parameterFileName)
|
|||||||
for (unsigned int i = 0; i < nMod; ++i)
|
for (unsigned int i = 0; i < nMod; ++i)
|
||||||
{
|
{
|
||||||
push(writer, "module");
|
push(writer, "module");
|
||||||
id.name = env_.getModuleName(i);
|
id.name = env().getModuleName(i);
|
||||||
id.type = env_.getModule(i)->getRegisteredName();
|
id.type = env().getModule(i)->getRegisteredName();
|
||||||
write(writer, "id", id);
|
write(writer, "id", id);
|
||||||
env_.getModule(i)->saveParameters(writer, "options");
|
env().getModule(i)->saveParameters(writer, "options");
|
||||||
pop(writer);
|
pop(writer);
|
||||||
}
|
}
|
||||||
pop(writer);
|
pop(writer);
|
||||||
@ -164,10 +169,10 @@ auto memPeak = [this](const std::vector<unsigned int> &program)\
|
|||||||
\
|
\
|
||||||
msg = HadronsLogMessage.isActive();\
|
msg = HadronsLogMessage.isActive();\
|
||||||
HadronsLogMessage.Active(false);\
|
HadronsLogMessage.Active(false);\
|
||||||
env_.dryRun(true);\
|
env().dryRun(true);\
|
||||||
memPeak = env_.executeProgram(program);\
|
memPeak = env().executeProgram(program);\
|
||||||
env_.dryRun(false);\
|
env().dryRun(false);\
|
||||||
env_.freeAll();\
|
env().freeAll();\
|
||||||
HadronsLogMessage.Active(true);\
|
HadronsLogMessage.Active(true);\
|
||||||
\
|
\
|
||||||
return memPeak;\
|
return memPeak;\
|
||||||
@ -179,7 +184,7 @@ void Application::schedule(void)
|
|||||||
|
|
||||||
// build module dependency graph
|
// build module dependency graph
|
||||||
LOG(Message) << "Building module graph..." << std::endl;
|
LOG(Message) << "Building module graph..." << std::endl;
|
||||||
auto graph = env_.makeModuleGraph();
|
auto graph = env().makeModuleGraph();
|
||||||
auto con = graph.getConnectedComponents();
|
auto con = graph.getConnectedComponents();
|
||||||
|
|
||||||
// constrained topological sort using a genetic algorithm
|
// constrained topological sort using a genetic algorithm
|
||||||
@ -256,7 +261,7 @@ void Application::saveSchedule(const std::string filename)
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
for (auto address: program_)
|
for (auto address: program_)
|
||||||
{
|
{
|
||||||
program.push_back(env_.getModuleName(address));
|
program.push_back(env().getModuleName(address));
|
||||||
}
|
}
|
||||||
write(writer, "schedule", program);
|
write(writer, "schedule", program);
|
||||||
}
|
}
|
||||||
@ -274,7 +279,7 @@ void Application::loadSchedule(const std::string filename)
|
|||||||
program_.clear();
|
program_.clear();
|
||||||
for (auto &name: program)
|
for (auto &name: program)
|
||||||
{
|
{
|
||||||
program_.push_back(env_.getModuleAddress(name));
|
program_.push_back(env().getModuleAddress(name));
|
||||||
}
|
}
|
||||||
scheduled_ = true;
|
scheduled_ = true;
|
||||||
memPeak_ = memPeak(program_);
|
memPeak_ = memPeak(program_);
|
||||||
@ -291,7 +296,7 @@ void Application::printSchedule(void)
|
|||||||
for (unsigned int i = 0; i < program_.size(); ++i)
|
for (unsigned int i = 0; i < program_.size(); ++i)
|
||||||
{
|
{
|
||||||
LOG(Message) << std::setw(4) << i + 1 << ": "
|
LOG(Message) << std::setw(4) << i + 1 << ": "
|
||||||
<< env_.getModuleName(program_[i]) << std::endl;
|
<< env().getModuleName(program_[i]) << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -304,9 +309,9 @@ void Application::configLoop(void)
|
|||||||
{
|
{
|
||||||
LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t
|
LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t
|
||||||
<< " " << BIG_SEP << std::endl;
|
<< " " << BIG_SEP << std::endl;
|
||||||
env_.setTrajectory(t);
|
env().setTrajectory(t);
|
||||||
env_.executeProgram(program_);
|
env().executeProgram(program_);
|
||||||
}
|
}
|
||||||
LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl;
|
LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl;
|
||||||
env_.freeAll();
|
env().freeAll();
|
||||||
}
|
}
|
||||||
|
@ -98,11 +98,13 @@ public:
|
|||||||
void printSchedule(void);
|
void printSchedule(void);
|
||||||
// loop on configurations
|
// loop on configurations
|
||||||
void configLoop(void);
|
void configLoop(void);
|
||||||
|
private:
|
||||||
|
// environment shortcut
|
||||||
|
Environment & env(void) const;
|
||||||
private:
|
private:
|
||||||
long unsigned int locVol_;
|
long unsigned int locVol_;
|
||||||
std::string parameterFileName_{""};
|
std::string parameterFileName_{""};
|
||||||
GlobalPar par_;
|
GlobalPar par_;
|
||||||
Environment &env_;
|
|
||||||
std::vector<unsigned int> program_;
|
std::vector<unsigned int> program_;
|
||||||
Environment::Size memPeak_;
|
Environment::Size memPeak_;
|
||||||
bool scheduled_{false};
|
bool scheduled_{false};
|
||||||
@ -115,14 +117,14 @@ private:
|
|||||||
template <typename M>
|
template <typename M>
|
||||||
void Application::createModule(const std::string name)
|
void Application::createModule(const std::string name)
|
||||||
{
|
{
|
||||||
env_.createModule<M>(name);
|
env().createModule<M>(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename M>
|
template <typename M>
|
||||||
void Application::createModule(const std::string name,
|
void Application::createModule(const std::string name,
|
||||||
const typename M::Par &par)
|
const typename M::Par &par)
|
||||||
{
|
{
|
||||||
env_.createModule<M>(name, par);
|
env().createModule<M>(name, par);
|
||||||
}
|
}
|
||||||
|
|
||||||
END_HADRONS_NAMESPACE
|
END_HADRONS_NAMESPACE
|
||||||
|
@ -41,8 +41,9 @@ using namespace Hadrons;
|
|||||||
// constructor /////////////////////////////////////////////////////////////////
|
// constructor /////////////////////////////////////////////////////////////////
|
||||||
Environment::Environment(void)
|
Environment::Environment(void)
|
||||||
{
|
{
|
||||||
|
nd_ = GridDefaultLatt().size();
|
||||||
grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
|
grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
|
||||||
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
|
GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()),
|
||||||
GridDefaultMpi()));
|
GridDefaultMpi()));
|
||||||
gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
|
gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
|
||||||
auto loc = getGrid()->LocalDimensions();
|
auto loc = getGrid()->LocalDimensions();
|
||||||
@ -126,6 +127,11 @@ GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int Environment::getNd(void) const
|
||||||
|
{
|
||||||
|
return nd_;
|
||||||
|
}
|
||||||
|
|
||||||
// random number generator /////////////////////////////////////////////////////
|
// random number generator /////////////////////////////////////////////////////
|
||||||
void Environment::setSeed(const std::vector<int> &seed)
|
void Environment::setSeed(const std::vector<int> &seed)
|
||||||
{
|
{
|
||||||
|
@ -106,6 +106,7 @@ public:
|
|||||||
void createGrid(const unsigned int Ls);
|
void createGrid(const unsigned int Ls);
|
||||||
GridCartesian * getGrid(const unsigned int Ls = 1) const;
|
GridCartesian * getGrid(const unsigned int Ls = 1) const;
|
||||||
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
|
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
|
||||||
|
unsigned int getNd(void) const;
|
||||||
// random number generator
|
// random number generator
|
||||||
void setSeed(const std::vector<int> &seed);
|
void setSeed(const std::vector<int> &seed);
|
||||||
GridParallelRNG * get4dRng(void) const;
|
GridParallelRNG * get4dRng(void) const;
|
||||||
@ -200,6 +201,7 @@ private:
|
|||||||
std::map<unsigned int, GridPt> grid5d_;
|
std::map<unsigned int, GridPt> grid5d_;
|
||||||
GridRbPt gridRb4d_;
|
GridRbPt gridRb4d_;
|
||||||
std::map<unsigned int, GridRbPt> gridRb5d_;
|
std::map<unsigned int, GridRbPt> gridRb5d_;
|
||||||
|
unsigned int nd_;
|
||||||
// random number generator
|
// random number generator
|
||||||
RngPt rng4d_;
|
RngPt rng4d_;
|
||||||
// module and related maps
|
// module and related maps
|
||||||
|
@ -166,7 +166,7 @@ void GeneticScheduler<T>::initPopulation(void)
|
|||||||
{
|
{
|
||||||
auto p = graph_.topoSort(gen_);
|
auto p = graph_.topoSort(gen_);
|
||||||
|
|
||||||
population_.emplace(func_(p), p);
|
population_.insert(std::make_pair(func_(p), p));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,8 +180,8 @@ void GeneticScheduler<T>::doCrossover(void)
|
|||||||
crossover(c1, c2, p1, p2);
|
crossover(c1, c2, p1, p2);
|
||||||
PARALLEL_CRITICAL
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
population_.emplace(func_(c1), c1);
|
population_.insert(std::make_pair(func_(c1), c1));
|
||||||
population_.emplace(func_(c2), c2);
|
population_.insert(std::make_pair(func_(c2), c2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -200,7 +200,7 @@ void GeneticScheduler<T>::doMutation(void)
|
|||||||
mutation(m, it->second);
|
mutation(m, it->second);
|
||||||
PARALLEL_CRITICAL
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
population_.emplace(func_(m), m);
|
population_.insert(std::make_pair(func_(m), m));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -147,7 +147,7 @@ void TSeqGamma<FImpl>::execute(void)
|
|||||||
g = makeGammaProd(par().gamma);
|
g = makeGammaProd(par().gamma);
|
||||||
p = strToVec<Real>(par().mom);
|
p = strToVec<Real>(par().mom);
|
||||||
ph = zero;
|
ph = zero;
|
||||||
for(unsigned int mu = 0; mu < Nd; mu++)
|
for(unsigned int mu = 0; mu < env().getNd(); mu++)
|
||||||
{
|
{
|
||||||
LatticeCoordinate(coor, mu);
|
LatticeCoordinate(coor, mu);
|
||||||
ph = ph + p[mu]*coor;
|
ph = ph + p[mu]*coor;
|
||||||
|
65
lib/AlignedAllocator.cc
Normal file
65
lib/AlignedAllocator.cc
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
int PointerCache::victim;
|
||||||
|
|
||||||
|
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
||||||
|
|
||||||
|
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||||
|
|
||||||
|
if (bytes < 4096 ) return NULL;
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
assert(omp_in_parallel()==0);
|
||||||
|
#endif
|
||||||
|
void * ret = NULL;
|
||||||
|
int v = -1;
|
||||||
|
|
||||||
|
for(int e=0;e<Ncache;e++) {
|
||||||
|
if ( Entries[e].valid==0 ) {
|
||||||
|
v=e;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( v==-1 ) {
|
||||||
|
v=victim;
|
||||||
|
victim = (victim+1)%Ncache;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( Entries[v].valid ) {
|
||||||
|
ret = Entries[v].address;
|
||||||
|
Entries[v].valid = 0;
|
||||||
|
Entries[v].address = NULL;
|
||||||
|
Entries[v].bytes = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Entries[v].address=ptr;
|
||||||
|
Entries[v].bytes =bytes;
|
||||||
|
Entries[v].valid =1;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *PointerCache::Lookup(size_t bytes) {
|
||||||
|
|
||||||
|
if (bytes < 4096 ) return NULL;
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
assert(omp_in_parallel()==0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for(int e=0;e<Ncache;e++){
|
||||||
|
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
|
||||||
|
Entries[e].valid = 0;
|
||||||
|
return Entries[e].address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -42,9 +42,32 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
class PointerCache {
|
||||||
|
private:
|
||||||
|
|
||||||
|
static const int Ncache=8;
|
||||||
|
static int victim;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
void *address;
|
||||||
|
size_t bytes;
|
||||||
|
int valid;
|
||||||
|
} PointerCacheEntry;
|
||||||
|
|
||||||
|
static PointerCacheEntry Entries[Ncache];
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
static void *Insert(void *ptr,size_t bytes) ;
|
||||||
|
static void *Lookup(size_t bytes) ;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// A lattice of something, but assume the something is SIMDized.
|
// A lattice of something, but assume the something is SIMDized.
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<typename _Tp>
|
template<typename _Tp>
|
||||||
class alignedAllocator {
|
class alignedAllocator {
|
||||||
public:
|
public:
|
||||||
@ -66,27 +89,27 @@ public:
|
|||||||
|
|
||||||
pointer allocate(size_type __n, const void* _p= 0)
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
{
|
{
|
||||||
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
|
|
||||||
|
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
|
||||||
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128);
|
||||||
#else
|
#else
|
||||||
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
_Tp tmp;
|
|
||||||
#ifdef GRID_NUMA
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for(int i=0;i<__n;i++){
|
|
||||||
ptr[i]=tmp;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void deallocate(pointer __p, size_type) {
|
void deallocate(pointer __p, size_type __n) {
|
||||||
|
size_type bytes = __n * sizeof(_Tp);
|
||||||
|
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
|
||||||
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
_mm_free((void *)__p);
|
if ( __freeme ) _mm_free((void *)__freeme);
|
||||||
#else
|
#else
|
||||||
free((void *)__p);
|
if ( __freeme ) free((void *)__freeme);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void construct(pointer __p, const _Tp& __val) { };
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
|
@ -59,13 +59,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
///////////////////
|
///////////////////
|
||||||
// Grid headers
|
// Grid headers
|
||||||
///////////////////
|
///////////////////
|
||||||
#include <Grid/serialisation/Serialisation.h>
|
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
#include <Grid/Timer.h>
|
#include <Grid/Timer.h>
|
||||||
#include <Grid/PerfCount.h>
|
#include <Grid/PerfCount.h>
|
||||||
#include <Grid/Log.h>
|
#include <Grid/Log.h>
|
||||||
#include <Grid/AlignedAllocator.h>
|
#include <Grid/AlignedAllocator.h>
|
||||||
#include <Grid/Simd.h>
|
#include <Grid/Simd.h>
|
||||||
|
#include <Grid/serialisation/Serialisation.h>
|
||||||
#include <Grid/Threads.h>
|
#include <Grid/Threads.h>
|
||||||
#include <Grid/Lexicographic.h>
|
#include <Grid/Lexicographic.h>
|
||||||
#include <Grid/Init.h>
|
#include <Grid/Init.h>
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
extra_sources=
|
extra_sources=
|
||||||
|
extra_headers=
|
||||||
if BUILD_COMMS_MPI
|
if BUILD_COMMS_MPI
|
||||||
extra_sources+=communicator/Communicator_mpi.cc
|
extra_sources+=communicator/Communicator_mpi.cc
|
||||||
extra_sources+=communicator/Communicator_base.cc
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
@ -24,6 +25,12 @@ if BUILD_COMMS_NONE
|
|||||||
extra_sources+=communicator/Communicator_base.cc
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if BUILD_HDF5
|
||||||
|
extra_sources+=serialisation/Hdf5IO.cc
|
||||||
|
extra_headers+=serialisation/Hdf5IO.h
|
||||||
|
extra_headers+=serialisation/Hdf5Type.h
|
||||||
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# Libraries
|
# Libraries
|
||||||
#
|
#
|
||||||
@ -32,6 +39,9 @@ include Eigen.inc
|
|||||||
|
|
||||||
lib_LIBRARIES = libGrid.a
|
lib_LIBRARIES = libGrid.a
|
||||||
|
|
||||||
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
CCFILES += $(extra_sources)
|
||||||
|
HFILES += $(extra_headers)
|
||||||
|
|
||||||
|
libGrid_a_SOURCES = $(CCFILES)
|
||||||
libGrid_adir = $(pkgincludedir)
|
libGrid_adir = $(pkgincludedir)
|
||||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||||
|
@ -205,12 +205,13 @@ public:
|
|||||||
void Stop(void) {
|
void Stop(void) {
|
||||||
count=0;
|
count=0;
|
||||||
cycles=0;
|
cycles=0;
|
||||||
|
size_t ign;
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if ( fd!= -1) {
|
if ( fd!= -1) {
|
||||||
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||||
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
|
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
|
||||||
::read(fd, &count, sizeof(long long));
|
ign=::read(fd, &count, sizeof(long long));
|
||||||
::read(cyclefd, &cycles, sizeof(long long));
|
ign=::read(cyclefd, &cycles, sizeof(long long));
|
||||||
}
|
}
|
||||||
elapsed = cyclecount() - begin;
|
elapsed = cyclecount() - begin;
|
||||||
#else
|
#else
|
||||||
|
@ -113,7 +113,7 @@ Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice
|
|||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int i=0;i<table.size();i++){
|
for(int i=0;i<table.size();i++){
|
||||||
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
|
vstream(buffer[off+table[i].first],compress(rhs._odata[so+table[i].second]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,7 +386,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
//PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<lg->lSites();idx++){
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
std::vector<int> lcoor(nl);
|
std::vector<int> lcoor(nl);
|
||||||
std::vector<int> hcoor(nh);
|
std::vector<int> hcoor(nh);
|
||||||
@ -428,7 +428,7 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
//PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<lg->lSites();idx++){
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
std::vector<int> lcoor(nl);
|
std::vector<int> lcoor(nl);
|
||||||
std::vector<int> hcoor(nh);
|
std::vector<int> hcoor(nh);
|
||||||
|
@ -29,6 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid/Eigen/Dense>
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
@ -48,18 +49,18 @@ namespace QCD {
|
|||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_M5,p),
|
FourDimRedBlackGrid,_M5,p),
|
||||||
mass(_mass)
|
mass(_mass)
|
||||||
{ }
|
{
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
FermionField tmp(psi._grid);
|
|
||||||
|
|
||||||
this->DW(psi,tmp,DaggerNo);
|
this->DW(psi,this->tmp(),DaggerNo);
|
||||||
|
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],tmp,s,s);// chi = (1-c[s] D_W) psi
|
axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],this->tmp(),s,s);// chi = (1-c[s] D_W) psi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,8 +88,8 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
|
|||||||
std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl;
|
std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl;
|
||||||
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl;
|
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl;
|
||||||
|
|
||||||
// Flops = 9*12*Ls*vol/2
|
// Flops = MADD * Ls *Ls *4dvol * spin/colour/complex
|
||||||
RealD mflops = 9.0*12*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
|
RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||||
}
|
}
|
||||||
@ -110,12 +111,11 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
FermionField tmp(psi._grid);
|
|
||||||
|
|
||||||
this->DW(psi,tmp,DaggerYes);
|
this->DW(psi,this->tmp(),DaggerYes);
|
||||||
|
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],tmp,s,s);// chi = (1-c[s] D_W) psi
|
axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],this->tmp(),s,s);// chi = (1-c[s] D_W) psi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -138,6 +138,7 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
|
|||||||
lower[0] =-mass*lower[0];
|
lower[0] =-mass*lower[0];
|
||||||
M5D(psi,psi,Din,lower,diag,upper);
|
M5D(psi,psi,Din,lower,diag,upper);
|
||||||
}
|
}
|
||||||
|
// FIXME Redunant with the above routine; check this and eliminate
|
||||||
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
|
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
@ -259,36 +260,33 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
FermionField tmp(psi._grid);
|
|
||||||
|
|
||||||
Meooe5D(psi,tmp);
|
Meooe5D(psi,this->tmp());
|
||||||
|
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
this->DhopEO(tmp,chi,DaggerNo);
|
this->DhopEO(this->tmp(),chi,DaggerNo);
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(tmp,chi,DaggerNo);
|
this->DhopOE(this->tmp(),chi,DaggerNo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
FermionField tmp(psi._grid);
|
|
||||||
// Apply 4d dslash
|
// Apply 4d dslash
|
||||||
if ( psi.checkerboard == Odd ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
this->DhopEO(psi,tmp,DaggerYes);
|
this->DhopEO(psi,this->tmp(),DaggerYes);
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(psi,tmp,DaggerYes);
|
this->DhopOE(psi,this->tmp(),DaggerYes);
|
||||||
}
|
}
|
||||||
MeooeDag5D(tmp,chi);
|
MeooeDag5D(this->tmp(),chi);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
FermionField tmp(psi._grid);
|
Meo5D(psi,this->tmp());
|
||||||
Meo5D(psi,tmp);
|
|
||||||
// Apply 4d dslash fragment
|
// Apply 4d dslash fragment
|
||||||
this->DhopDir(tmp,chi,dir,disp);
|
this->DhopDir(this->tmp(),chi,dir,disp);
|
||||||
}
|
}
|
||||||
// force terms; five routines; default to Dhop on diagonal
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -459,9 +457,91 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
|
|||||||
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
||||||
dee[Ls-1] += delta_d;
|
dee[Ls-1] += delta_d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int inv=1;
|
||||||
|
this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
|
||||||
|
this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
|
||||||
|
Vector<iSinglet<Simd> > & Matp,
|
||||||
|
Vector<iSinglet<Simd> > & Matm)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
GridBase *grid = this->FermionRedBlackGrid();
|
||||||
|
int LLs = grid->_rdimensions[0];
|
||||||
|
|
||||||
|
if ( LLs == Ls ) return; // Not vectorised in 5th direction
|
||||||
|
|
||||||
|
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
|
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
|
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Pplus(s,s) = bee[s];
|
||||||
|
Pminus(s,s)= bee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pminus(s,s+1) = -cee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pplus(s+1,s) = -cee[s+1];
|
||||||
|
}
|
||||||
|
Pplus (0,Ls-1) = mass*cee[0];
|
||||||
|
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
||||||
|
|
||||||
|
Eigen::MatrixXcd PplusMat ;
|
||||||
|
Eigen::MatrixXcd PminusMat;
|
||||||
|
|
||||||
|
if ( inv ) {
|
||||||
|
PplusMat =Pplus.inverse();
|
||||||
|
PminusMat=Pminus.inverse();
|
||||||
|
} else {
|
||||||
|
PplusMat =Pplus;
|
||||||
|
PminusMat=Pminus;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dag){
|
||||||
|
PplusMat.adjointInPlace();
|
||||||
|
PminusMat.adjointInPlace();
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
||||||
|
const int Nsimd=Simd::Nsimd();
|
||||||
|
Matp.resize(Ls*LLs);
|
||||||
|
Matm.resize(Ls*LLs);
|
||||||
|
|
||||||
|
for(int s2=0;s2<Ls;s2++){
|
||||||
|
for(int s1=0;s1<LLs;s1++){
|
||||||
|
int istride = LLs;
|
||||||
|
int ostride = 1;
|
||||||
|
Simd Vp;
|
||||||
|
Simd Vm;
|
||||||
|
scalar_type *sp = (scalar_type *)&Vp;
|
||||||
|
scalar_type *sm = (scalar_type *)&Vm;
|
||||||
|
for(int l=0;l<Nsimd;l++){
|
||||||
|
if ( switcheroo<Coeff_t>::iscomplex() ) {
|
||||||
|
sp[l] = PplusMat (l*istride+s1*ostride,s2);
|
||||||
|
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
||||||
|
} else {
|
||||||
|
// if real
|
||||||
|
scalar_type tmp;
|
||||||
|
tmp = PplusMat (l*istride+s1*ostride,s2);
|
||||||
|
sp[l] = scalar_type(tmp.real(),tmp.real());
|
||||||
|
tmp = PminusMat(l*istride+s1*ostride,s2);
|
||||||
|
sm[l] = scalar_type(tmp.real(),tmp.real());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Matp[LLs*s2+s1] = Vp;
|
||||||
|
Matm[LLs*s2+s1] = Vm;
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
FermOpTemplateInstantiate(CayleyFermion5D);
|
FermOpTemplateInstantiate(CayleyFermion5D);
|
||||||
GparityFermOpTemplateInstantiate(CayleyFermion5D);
|
GparityFermOpTemplateInstantiate(CayleyFermion5D);
|
||||||
|
@ -33,6 +33,31 @@ namespace Grid {
|
|||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
|
template<typename T> struct switcheroo {
|
||||||
|
static inline int iscomplex() { return 0; }
|
||||||
|
|
||||||
|
template<class vec>
|
||||||
|
static inline vec mult(vec a, vec b) {
|
||||||
|
return real_mult(a,b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<> struct switcheroo<ComplexD> {
|
||||||
|
static inline int iscomplex() { return 1; }
|
||||||
|
|
||||||
|
template<class vec>
|
||||||
|
static inline vec mult(vec a, vec b) {
|
||||||
|
return a*b;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<> struct switcheroo<ComplexF> {
|
||||||
|
static inline int iscomplex() { return 1; }
|
||||||
|
template<class vec>
|
||||||
|
static inline vec mult(vec a, vec b) {
|
||||||
|
return a*b;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
class CayleyFermion5D : public WilsonFermion5D<Impl>
|
class CayleyFermion5D : public WilsonFermion5D<Impl>
|
||||||
{
|
{
|
||||||
@ -75,7 +100,19 @@ namespace Grid {
|
|||||||
std::vector<Coeff_t> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<Coeff_t> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<Coeff_t> &upper);
|
std::vector<Coeff_t> &upper);
|
||||||
|
|
||||||
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
|
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
|
||||||
|
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd> > & Matp, Vector<iSinglet<Simd> > & Matm);
|
||||||
|
|
||||||
|
void MooeeInternalAsm(const FermionField &in, FermionField &out,
|
||||||
|
int LLs, int site,
|
||||||
|
Vector<iSinglet<Simd> > &Matp,
|
||||||
|
Vector<iSinglet<Simd> > &Matm);
|
||||||
|
void MooeeInternalZAsm(const FermionField &in, FermionField &out,
|
||||||
|
int LLs, int site,
|
||||||
|
Vector<iSinglet<Simd> > &Matp,
|
||||||
|
Vector<iSinglet<Simd> > &Matm);
|
||||||
|
|
||||||
|
|
||||||
virtual void Instantiatable(void)=0;
|
virtual void Instantiatable(void)=0;
|
||||||
|
|
||||||
@ -112,6 +149,12 @@ namespace Grid {
|
|||||||
std::vector<Coeff_t> ueem;
|
std::vector<Coeff_t> ueem;
|
||||||
std::vector<Coeff_t> dee;
|
std::vector<Coeff_t> dee;
|
||||||
|
|
||||||
|
// Matrices of 5d ee inverse params
|
||||||
|
Vector<iSinglet<Simd> > MatpInv;
|
||||||
|
Vector<iSinglet<Simd> > MatmInv;
|
||||||
|
Vector<iSinglet<Simd> > MatpInvDag;
|
||||||
|
Vector<iSinglet<Simd> > MatmInvDag;
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
CayleyFermion5D(GaugeField &_Umu,
|
CayleyFermion5D(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
|
@ -29,13 +29,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid/Eigen/Dense>
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD { /*
|
||||||
/*
|
|
||||||
* Dense matrix versions of routines
|
* Dense matrix versions of routines
|
||||||
*/
|
*/
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -126,7 +125,6 @@ PARALLEL_FOR_LOOP
|
|||||||
for(int v=0;v<LLs;v++){
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
vprefetch(psi[ss+v+LLs]);
|
vprefetch(psi[ss+v+LLs]);
|
||||||
// vprefetch(phi[ss+v+LLs]);
|
|
||||||
|
|
||||||
int vp= (v==LLs-1) ? 0 : v+1;
|
int vp= (v==LLs-1) ? 0 : v+1;
|
||||||
int vm= (v==0 ) ? LLs-1 : v-1;
|
int vm= (v==0 ) ? LLs-1 : v-1;
|
||||||
@ -145,9 +143,6 @@ PARALLEL_FOR_LOOP
|
|||||||
Simd hm_11 = psi[ss+vm]()(1)(1);
|
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||||
Simd hm_12 = psi[ss+vm]()(1)(2);
|
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||||
|
|
||||||
// if ( ss==0) std::cout << " hp_00 " <<hp_00<<std::endl;
|
|
||||||
// if ( ss==0) std::cout << " hm_00 " <<hm_00<<std::endl;
|
|
||||||
|
|
||||||
if ( vp<=v ) {
|
if ( vp<=v ) {
|
||||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||||
@ -165,42 +160,20 @@ PARALLEL_FOR_LOOP
|
|||||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// Can force these to real arithmetic and save 2x.
|
||||||
if ( ss==0) std::cout << " dphi_00 " <<d[v]()()() * phi[ss+v]()(0)(0) <<std::endl;
|
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
|
||||||
if ( ss==0) std::cout << " dphi_10 " <<d[v]()()() * phi[ss+v]()(1)(0) <<std::endl;
|
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
|
||||||
if ( ss==0) std::cout << " dphi_20 " <<d[v]()()() * phi[ss+v]()(2)(0) <<std::endl;
|
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
|
||||||
if ( ss==0) std::cout << " dphi_30 " <<d[v]()()() * phi[ss+v]()(3)(0) <<std::endl;
|
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
|
||||||
*/
|
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
|
||||||
Simd p_00 = d[v]()()() * phi[ss+v]()(0)(0) + l[v]()()()*hm_00;
|
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
|
||||||
Simd p_01 = d[v]()()() * phi[ss+v]()(0)(1) + l[v]()()()*hm_01;
|
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
|
||||||
Simd p_02 = d[v]()()() * phi[ss+v]()(0)(2) + l[v]()()()*hm_02;
|
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
|
||||||
Simd p_10 = d[v]()()() * phi[ss+v]()(1)(0) + l[v]()()()*hm_10;
|
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
|
||||||
Simd p_11 = d[v]()()() * phi[ss+v]()(1)(1) + l[v]()()()*hm_11;
|
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
|
||||||
Simd p_12 = d[v]()()() * phi[ss+v]()(1)(2) + l[v]()()()*hm_12;
|
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
|
||||||
Simd p_20 = d[v]()()() * phi[ss+v]()(2)(0) + u[v]()()()*hp_00;
|
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
|
||||||
Simd p_21 = d[v]()()() * phi[ss+v]()(2)(1) + u[v]()()()*hp_01;
|
|
||||||
Simd p_22 = d[v]()()() * phi[ss+v]()(2)(2) + u[v]()()()*hp_02;
|
|
||||||
Simd p_30 = d[v]()()() * phi[ss+v]()(3)(0) + u[v]()()()*hp_10;
|
|
||||||
Simd p_31 = d[v]()()() * phi[ss+v]()(3)(1) + u[v]()()()*hp_11;
|
|
||||||
Simd p_32 = d[v]()()() * phi[ss+v]()(3)(2) + u[v]()()()*hp_12;
|
|
||||||
|
|
||||||
|
|
||||||
// if ( ss==0){
|
|
||||||
/*
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(0) << " bad "<<p_00<<" diff "<<chi[ss+v]()(0)(0)-p_00<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(1) << " bad "<<p_01<<" diff "<<chi[ss+v]()(0)(1)-p_01<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(2) << " bad "<<p_02<<" diff "<<chi[ss+v]()(0)(2)-p_02<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(0) << " bad "<<p_10<<" diff "<<chi[ss+v]()(1)(0)-p_10<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(1) << " bad "<<p_11<<" diff "<<chi[ss+v]()(1)(1)-p_11<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(2) << " bad "<<p_12<<" diff "<<chi[ss+v]()(1)(2)-p_12<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(0) << " bad "<<p_20<<" diff "<<chi[ss+v]()(2)(0)-p_20<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(1) << " bad "<<p_21<<" diff "<<chi[ss+v]()(2)(1)-p_21<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(2) << " bad "<<p_22<<" diff "<<chi[ss+v]()(2)(2)-p_22<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(0) << " bad "<<p_30<<" diff "<<chi[ss+v]()(3)(0)-p_30<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(1) << " bad "<<p_31<<" diff "<<chi[ss+v]()(3)(1)-p_31<<std::endl;
|
|
||||||
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(2) << " bad "<<p_32<<" diff "<<chi[ss+v]()(3)(2)-p_32<<std::endl;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
vstream(chi[ss+v]()(0)(0),p_00);
|
vstream(chi[ss+v]()(0)(0),p_00);
|
||||||
vstream(chi[ss+v]()(0)(1),p_01);
|
vstream(chi[ss+v]()(0)(1),p_01);
|
||||||
vstream(chi[ss+v]()(0)(2),p_02);
|
vstream(chi[ss+v]()(0)(2),p_02);
|
||||||
@ -261,7 +234,7 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
|||||||
M5Dtime-=usecond();
|
M5Dtime-=usecond();
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
||||||
|
#if 0
|
||||||
alignas(64) SiteHalfSpinor hp;
|
alignas(64) SiteHalfSpinor hp;
|
||||||
alignas(64) SiteHalfSpinor hm;
|
alignas(64) SiteHalfSpinor hm;
|
||||||
alignas(64) SiteSpinor fp;
|
alignas(64) SiteSpinor fp;
|
||||||
@ -287,9 +260,504 @@ PARALLEL_FOR_LOOP
|
|||||||
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
|
vprefetch(psi[ss+v+LLs]);
|
||||||
|
|
||||||
|
int vp= (v==LLs-1) ? 0 : v+1;
|
||||||
|
int vm= (v==0 ) ? LLs-1 : v-1;
|
||||||
|
|
||||||
|
Simd hp_00 = psi[ss+vp]()(0)(0);
|
||||||
|
Simd hp_01 = psi[ss+vp]()(0)(1);
|
||||||
|
Simd hp_02 = psi[ss+vp]()(0)(2);
|
||||||
|
Simd hp_10 = psi[ss+vp]()(1)(0);
|
||||||
|
Simd hp_11 = psi[ss+vp]()(1)(1);
|
||||||
|
Simd hp_12 = psi[ss+vp]()(1)(2);
|
||||||
|
|
||||||
|
Simd hm_00 = psi[ss+vm]()(2)(0);
|
||||||
|
Simd hm_01 = psi[ss+vm]()(2)(1);
|
||||||
|
Simd hm_02 = psi[ss+vm]()(2)(2);
|
||||||
|
Simd hm_10 = psi[ss+vm]()(3)(0);
|
||||||
|
Simd hm_11 = psi[ss+vm]()(3)(1);
|
||||||
|
Simd hm_12 = psi[ss+vm]()(3)(2);
|
||||||
|
|
||||||
|
if ( vp<=v ) {
|
||||||
|
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||||
|
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||||
|
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||||
|
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||||
|
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||||
|
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||||
|
}
|
||||||
|
if ( vm>=v ) {
|
||||||
|
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||||
|
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||||
|
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||||
|
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||||
|
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||||
|
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||||
|
}
|
||||||
|
|
||||||
|
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_00);
|
||||||
|
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_01);
|
||||||
|
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_02);
|
||||||
|
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_10);
|
||||||
|
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_11);
|
||||||
|
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(),hp_12);
|
||||||
|
|
||||||
|
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_00);
|
||||||
|
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_01);
|
||||||
|
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_02);
|
||||||
|
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_10);
|
||||||
|
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_11);
|
||||||
|
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(),hm_12);
|
||||||
|
|
||||||
|
vstream(chi[ss+v]()(0)(0),p_00);
|
||||||
|
vstream(chi[ss+v]()(0)(1),p_01);
|
||||||
|
vstream(chi[ss+v]()(0)(2),p_02);
|
||||||
|
vstream(chi[ss+v]()(1)(0),p_10);
|
||||||
|
vstream(chi[ss+v]()(1)(1),p_11);
|
||||||
|
vstream(chi[ss+v]()(1)(2),p_12);
|
||||||
|
vstream(chi[ss+v]()(2)(0),p_20);
|
||||||
|
vstream(chi[ss+v]()(2)(1),p_21);
|
||||||
|
vstream(chi[ss+v]()(2)(2),p_22);
|
||||||
|
vstream(chi[ss+v]()(3)(0),p_30);
|
||||||
|
vstream(chi[ss+v]()(3)(1),p_31);
|
||||||
|
vstream(chi[ss+v]()(3)(2),p_32);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
M5Dtime+=usecond();
|
M5Dtime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef AVX512
|
||||||
|
#include <simd/Intel512common.h>
|
||||||
|
#include <simd/Intel512avx.h>
|
||||||
|
#include <simd/Intel512single.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionField &chi,
|
||||||
|
int LLs, int site,
|
||||||
|
Vector<iSinglet<Simd> > &Matp,
|
||||||
|
Vector<iSinglet<Simd> > &Matm)
|
||||||
|
{
|
||||||
|
#ifndef AVX512
|
||||||
|
{
|
||||||
|
SiteHalfSpinor BcastP;
|
||||||
|
SiteHalfSpinor BcastM;
|
||||||
|
SiteHalfSpinor SiteChiP;
|
||||||
|
SiteHalfSpinor SiteChiM;
|
||||||
|
|
||||||
|
// Ls*Ls * 2 * 12 * vol flops
|
||||||
|
for(int s1=0;s1<LLs;s1++){
|
||||||
|
for(int s2=0;s2<LLs;s2++){
|
||||||
|
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
|
||||||
|
|
||||||
|
int s=s2+l*LLs;
|
||||||
|
int lex=s2+LLs*site;
|
||||||
|
|
||||||
|
if ( s2==0 && l==0) {
|
||||||
|
SiteChiP=zero;
|
||||||
|
SiteChiM=zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
|
||||||
|
}}
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
|
||||||
|
}}
|
||||||
|
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
SiteChiP()(sp)(co)=real_madd(Matp[LLs*s+s1]()()(),BcastP()(sp)(co),SiteChiP()(sp)(co)); // 1100 us.
|
||||||
|
SiteChiM()(sp)(co)=real_madd(Matm[LLs*s+s1]()()(),BcastM()(sp)(co),SiteChiM()(sp)(co)); // each found by commenting out
|
||||||
|
}}
|
||||||
|
|
||||||
|
}}
|
||||||
|
{
|
||||||
|
int lex = s1+LLs*site;
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
|
||||||
|
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
// pointers
|
||||||
|
// MASK_REGS;
|
||||||
|
#define Chi_00 %%zmm1
|
||||||
|
#define Chi_01 %%zmm2
|
||||||
|
#define Chi_02 %%zmm3
|
||||||
|
#define Chi_10 %%zmm4
|
||||||
|
#define Chi_11 %%zmm5
|
||||||
|
#define Chi_12 %%zmm6
|
||||||
|
#define Chi_20 %%zmm7
|
||||||
|
#define Chi_21 %%zmm8
|
||||||
|
#define Chi_22 %%zmm9
|
||||||
|
#define Chi_30 %%zmm10
|
||||||
|
#define Chi_31 %%zmm11
|
||||||
|
#define Chi_32 %%zmm12
|
||||||
|
|
||||||
|
#define BCAST0 %%zmm13
|
||||||
|
#define BCAST1 %%zmm14
|
||||||
|
#define BCAST2 %%zmm15
|
||||||
|
#define BCAST3 %%zmm16
|
||||||
|
#define BCAST4 %%zmm17
|
||||||
|
#define BCAST5 %%zmm18
|
||||||
|
#define BCAST6 %%zmm19
|
||||||
|
#define BCAST7 %%zmm20
|
||||||
|
#define BCAST8 %%zmm21
|
||||||
|
#define BCAST9 %%zmm22
|
||||||
|
#define BCAST10 %%zmm23
|
||||||
|
#define BCAST11 %%zmm24
|
||||||
|
|
||||||
|
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
|
||||||
|
for(int s1=0;s1<LLs;s1++){
|
||||||
|
for(int s2=0;s2<LLs;s2++){
|
||||||
|
int lex=s2+LLs*site;
|
||||||
|
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
|
||||||
|
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
|
||||||
|
uint64_t a2 = (uint64_t)&psi[lex];
|
||||||
|
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
|
||||||
|
if ( (s2+l)==0 ) {
|
||||||
|
asm (
|
||||||
|
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
|
||||||
|
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
|
||||||
|
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
|
||||||
|
VBCASTCDUP(0,%2,BCAST0)
|
||||||
|
VBCASTCDUP(1,%2,BCAST1)
|
||||||
|
VBCASTCDUP(2,%2,BCAST2)
|
||||||
|
VBCASTCDUP(3,%2,BCAST3)
|
||||||
|
VBCASTCDUP(4,%2,BCAST4) VMULMEM (0,%0,BCAST0,Chi_00)
|
||||||
|
VBCASTCDUP(5,%2,BCAST5) VMULMEM (0,%0,BCAST1,Chi_01)
|
||||||
|
VBCASTCDUP(6,%2,BCAST6) VMULMEM (0,%0,BCAST2,Chi_02)
|
||||||
|
VBCASTCDUP(7,%2,BCAST7) VMULMEM (0,%0,BCAST3,Chi_10)
|
||||||
|
VBCASTCDUP(8,%2,BCAST8) VMULMEM (0,%0,BCAST4,Chi_11)
|
||||||
|
VBCASTCDUP(9,%2,BCAST9) VMULMEM (0,%0,BCAST5,Chi_12)
|
||||||
|
VBCASTCDUP(10,%2,BCAST10) VMULMEM (0,%1,BCAST6,Chi_20)
|
||||||
|
VBCASTCDUP(11,%2,BCAST11) VMULMEM (0,%1,BCAST7,Chi_21)
|
||||||
|
VMULMEM (0,%1,BCAST8,Chi_22)
|
||||||
|
VMULMEM (0,%1,BCAST9,Chi_30)
|
||||||
|
VMULMEM (0,%1,BCAST10,Chi_31)
|
||||||
|
VMULMEM (0,%1,BCAST11,Chi_32)
|
||||||
|
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||||
|
} else {
|
||||||
|
asm (
|
||||||
|
VBCASTCDUP(0,%2,BCAST0) VMADDMEM (0,%0,BCAST0,Chi_00)
|
||||||
|
VBCASTCDUP(1,%2,BCAST1) VMADDMEM (0,%0,BCAST1,Chi_01)
|
||||||
|
VBCASTCDUP(2,%2,BCAST2) VMADDMEM (0,%0,BCAST2,Chi_02)
|
||||||
|
VBCASTCDUP(3,%2,BCAST3) VMADDMEM (0,%0,BCAST3,Chi_10)
|
||||||
|
VBCASTCDUP(4,%2,BCAST4) VMADDMEM (0,%0,BCAST4,Chi_11)
|
||||||
|
VBCASTCDUP(5,%2,BCAST5) VMADDMEM (0,%0,BCAST5,Chi_12)
|
||||||
|
VBCASTCDUP(6,%2,BCAST6) VMADDMEM (0,%1,BCAST6,Chi_20)
|
||||||
|
VBCASTCDUP(7,%2,BCAST7) VMADDMEM (0,%1,BCAST7,Chi_21)
|
||||||
|
VBCASTCDUP(8,%2,BCAST8) VMADDMEM (0,%1,BCAST8,Chi_22)
|
||||||
|
VBCASTCDUP(9,%2,BCAST9) VMADDMEM (0,%1,BCAST9,Chi_30)
|
||||||
|
VBCASTCDUP(10,%2,BCAST10) VMADDMEM (0,%1,BCAST10,Chi_31)
|
||||||
|
VBCASTCDUP(11,%2,BCAST11) VMADDMEM (0,%1,BCAST11,Chi_32)
|
||||||
|
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||||
|
}
|
||||||
|
a0 = a0+incr;
|
||||||
|
a1 = a1+incr;
|
||||||
|
a2 = a2+sizeof(Simd::scalar_type);
|
||||||
|
}}
|
||||||
|
{
|
||||||
|
int lexa = s1+LLs*site;
|
||||||
|
asm (
|
||||||
|
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
|
||||||
|
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
|
||||||
|
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
|
||||||
|
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
|
||||||
|
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef Chi_00
|
||||||
|
#undef Chi_01
|
||||||
|
#undef Chi_02
|
||||||
|
#undef Chi_10
|
||||||
|
#undef Chi_11
|
||||||
|
#undef Chi_12
|
||||||
|
#undef Chi_20
|
||||||
|
#undef Chi_21
|
||||||
|
#undef Chi_22
|
||||||
|
#undef Chi_30
|
||||||
|
#undef Chi_31
|
||||||
|
#undef Chi_32
|
||||||
|
|
||||||
|
#undef BCAST0
|
||||||
|
#undef BCAST1
|
||||||
|
#undef BCAST2
|
||||||
|
#undef BCAST3
|
||||||
|
#undef BCAST4
|
||||||
|
#undef BCAST5
|
||||||
|
#undef BCAST6
|
||||||
|
#undef BCAST7
|
||||||
|
#undef BCAST8
|
||||||
|
#undef BCAST9
|
||||||
|
#undef BCAST10
|
||||||
|
#undef BCAST11
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
// Z-mobius version
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionField &chi,
|
||||||
|
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
|
||||||
|
{
|
||||||
|
#ifndef AVX512
|
||||||
|
{
|
||||||
|
SiteHalfSpinor BcastP;
|
||||||
|
SiteHalfSpinor BcastM;
|
||||||
|
SiteHalfSpinor SiteChiP;
|
||||||
|
SiteHalfSpinor SiteChiM;
|
||||||
|
|
||||||
|
// Ls*Ls * 2 * 12 * vol flops
|
||||||
|
for(int s1=0;s1<LLs;s1++){
|
||||||
|
for(int s2=0;s2<LLs;s2++){
|
||||||
|
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
|
||||||
|
|
||||||
|
int s=s2+l*LLs;
|
||||||
|
int lex=s2+LLs*site;
|
||||||
|
|
||||||
|
if ( s2==0 && l==0) {
|
||||||
|
SiteChiP=zero;
|
||||||
|
SiteChiM=zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
vbroadcast(BcastP()(sp )(co),psi[lex]()(sp)(co),l);
|
||||||
|
}}
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
vbroadcast(BcastM()(sp )(co),psi[lex]()(sp+2)(co),l);
|
||||||
|
}}
|
||||||
|
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
SiteChiP()(sp)(co)=SiteChiP()(sp)(co)+ Matp[LLs*s+s1]()()()*BcastP()(sp)(co);
|
||||||
|
SiteChiM()(sp)(co)=SiteChiM()(sp)(co)+ Matm[LLs*s+s1]()()()*BcastM()(sp)(co);
|
||||||
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
}}
|
||||||
|
{
|
||||||
|
int lex = s1+LLs*site;
|
||||||
|
for(int sp=0;sp<2;sp++){
|
||||||
|
for(int co=0;co<Nc;co++){
|
||||||
|
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
|
||||||
|
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
// pointers
|
||||||
|
// MASK_REGS;
|
||||||
|
#define Chi_00 %zmm0
|
||||||
|
#define Chi_01 %zmm1
|
||||||
|
#define Chi_02 %zmm2
|
||||||
|
#define Chi_10 %zmm3
|
||||||
|
#define Chi_11 %zmm4
|
||||||
|
#define Chi_12 %zmm5
|
||||||
|
#define Chi_20 %zmm6
|
||||||
|
#define Chi_21 %zmm7
|
||||||
|
#define Chi_22 %zmm8
|
||||||
|
#define Chi_30 %zmm9
|
||||||
|
#define Chi_31 %zmm10
|
||||||
|
#define Chi_32 %zmm11
|
||||||
|
#define pChi_00 %%zmm0
|
||||||
|
#define pChi_01 %%zmm1
|
||||||
|
#define pChi_02 %%zmm2
|
||||||
|
#define pChi_10 %%zmm3
|
||||||
|
#define pChi_11 %%zmm4
|
||||||
|
#define pChi_12 %%zmm5
|
||||||
|
#define pChi_20 %%zmm6
|
||||||
|
#define pChi_21 %%zmm7
|
||||||
|
#define pChi_22 %%zmm8
|
||||||
|
#define pChi_30 %%zmm9
|
||||||
|
#define pChi_31 %%zmm10
|
||||||
|
#define pChi_32 %%zmm11
|
||||||
|
|
||||||
|
#define BCAST_00 %zmm12
|
||||||
|
#define SHUF_00 %zmm13
|
||||||
|
#define BCAST_01 %zmm14
|
||||||
|
#define SHUF_01 %zmm15
|
||||||
|
#define BCAST_02 %zmm16
|
||||||
|
#define SHUF_02 %zmm17
|
||||||
|
#define BCAST_10 %zmm18
|
||||||
|
#define SHUF_10 %zmm19
|
||||||
|
#define BCAST_11 %zmm20
|
||||||
|
#define SHUF_11 %zmm21
|
||||||
|
#define BCAST_12 %zmm22
|
||||||
|
#define SHUF_12 %zmm23
|
||||||
|
|
||||||
|
#define Mp %zmm24
|
||||||
|
#define Mps %zmm25
|
||||||
|
#define Mm %zmm26
|
||||||
|
#define Mms %zmm27
|
||||||
|
#define N 8
|
||||||
|
int incr=LLs*LLs*sizeof(iSinglet<Simd>);
|
||||||
|
for(int s1=0;s1<LLs;s1++){
|
||||||
|
for(int s2=0;s2<LLs;s2++){
|
||||||
|
int lex=s2+LLs*site;
|
||||||
|
uint64_t a0 = (uint64_t)&Matp[LLs*s2+s1]; // should be cacheable
|
||||||
|
uint64_t a1 = (uint64_t)&Matm[LLs*s2+s1];
|
||||||
|
uint64_t a2 = (uint64_t)&psi[lex];
|
||||||
|
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
|
||||||
|
if ( (s2+l)==0 ) {
|
||||||
|
LOAD64(%r8,a0);
|
||||||
|
LOAD64(%r9,a1);
|
||||||
|
LOAD64(%r10,a2);
|
||||||
|
asm (
|
||||||
|
VLOAD(0,%r8,Mp)// i r
|
||||||
|
VLOAD(0,%r9,Mm)
|
||||||
|
VSHUF(Mp,Mps) // r i
|
||||||
|
VSHUF(Mm,Mms)
|
||||||
|
VPREFETCH1(12,%r10) VPREFETCH1(13,%r10)
|
||||||
|
VPREFETCH1(14,%r10) VPREFETCH1(15,%r10)
|
||||||
|
|
||||||
|
VMULIDUP(0*N,%r10,Mps,Chi_00)
|
||||||
|
VMULIDUP(1*N,%r10,Mps,Chi_01)
|
||||||
|
VMULIDUP(2*N,%r10,Mps,Chi_02)
|
||||||
|
VMULIDUP(3*N,%r10,Mps,Chi_10)
|
||||||
|
VMULIDUP(4*N,%r10,Mps,Chi_11)
|
||||||
|
VMULIDUP(5*N,%r10,Mps,Chi_12)
|
||||||
|
|
||||||
|
VMULIDUP(6*N ,%r10,Mms,Chi_20)
|
||||||
|
VMULIDUP(7*N ,%r10,Mms,Chi_21)
|
||||||
|
VMULIDUP(8*N ,%r10,Mms,Chi_22)
|
||||||
|
VMULIDUP(9*N ,%r10,Mms,Chi_30)
|
||||||
|
VMULIDUP(10*N,%r10,Mms,Chi_31)
|
||||||
|
VMULIDUP(11*N,%r10,Mms,Chi_32)
|
||||||
|
|
||||||
|
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00)
|
||||||
|
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01)
|
||||||
|
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
|
||||||
|
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
|
||||||
|
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
|
||||||
|
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
|
||||||
|
|
||||||
|
VMADDSUBRDUP(6*N ,%r10,Mm,Chi_20)
|
||||||
|
VMADDSUBRDUP(7*N ,%r10,Mm,Chi_21)
|
||||||
|
VMADDSUBRDUP(8*N ,%r10,Mm,Chi_22)
|
||||||
|
VMADDSUBRDUP(9*N ,%r10,Mm,Chi_30)
|
||||||
|
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
|
||||||
|
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
LOAD64(%r8,a0);
|
||||||
|
LOAD64(%r9,a1);
|
||||||
|
LOAD64(%r10,a2);
|
||||||
|
asm (
|
||||||
|
VLOAD(0,%r8,Mp)
|
||||||
|
VSHUF(Mp,Mps)
|
||||||
|
|
||||||
|
VLOAD(0,%r9,Mm)
|
||||||
|
VSHUF(Mm,Mms)
|
||||||
|
|
||||||
|
VMADDSUBIDUP(0*N,%r10,Mps,Chi_00) // Mri * Pii +- Cir
|
||||||
|
VMADDSUBIDUP(1*N,%r10,Mps,Chi_01)
|
||||||
|
VMADDSUBIDUP(2*N,%r10,Mps,Chi_02)
|
||||||
|
VMADDSUBIDUP(3*N,%r10,Mps,Chi_10)
|
||||||
|
VMADDSUBIDUP(4*N,%r10,Mps,Chi_11)
|
||||||
|
VMADDSUBIDUP(5*N,%r10,Mps,Chi_12)
|
||||||
|
|
||||||
|
VMADDSUBIDUP(6 *N,%r10,Mms,Chi_20)
|
||||||
|
VMADDSUBIDUP(7 *N,%r10,Mms,Chi_21)
|
||||||
|
VMADDSUBIDUP(8 *N,%r10,Mms,Chi_22)
|
||||||
|
VMADDSUBIDUP(9 *N,%r10,Mms,Chi_30)
|
||||||
|
VMADDSUBIDUP(10*N,%r10,Mms,Chi_31)
|
||||||
|
VMADDSUBIDUP(11*N,%r10,Mms,Chi_32)
|
||||||
|
|
||||||
|
VMADDSUBRDUP(0*N,%r10,Mp,Chi_00) // Cir = Mir * Prr +- ( Mri * Pii +- Cir)
|
||||||
|
VMADDSUBRDUP(1*N,%r10,Mp,Chi_01) // Ci = MiPr + Ci + MrPi ; Cr = MrPr - ( MiPi - Cr)
|
||||||
|
VMADDSUBRDUP(2*N,%r10,Mp,Chi_02)
|
||||||
|
VMADDSUBRDUP(3*N,%r10,Mp,Chi_10)
|
||||||
|
VMADDSUBRDUP(4*N,%r10,Mp,Chi_11)
|
||||||
|
VMADDSUBRDUP(5*N,%r10,Mp,Chi_12)
|
||||||
|
|
||||||
|
VMADDSUBRDUP(6 *N,%r10,Mm,Chi_20)
|
||||||
|
VMADDSUBRDUP(7 *N,%r10,Mm,Chi_21)
|
||||||
|
VMADDSUBRDUP(8 *N,%r10,Mm,Chi_22)
|
||||||
|
VMADDSUBRDUP(9 *N,%r10,Mm,Chi_30)
|
||||||
|
VMADDSUBRDUP(10*N,%r10,Mm,Chi_31)
|
||||||
|
VMADDSUBRDUP(11*N,%r10,Mm,Chi_32)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
a0 = a0+incr;
|
||||||
|
a1 = a1+incr;
|
||||||
|
a2 = a2+sizeof(Simd::scalar_type);
|
||||||
|
}}
|
||||||
|
{
|
||||||
|
int lexa = s1+LLs*site;
|
||||||
|
/*
|
||||||
|
SiteSpinor tmp;
|
||||||
|
asm (
|
||||||
|
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
|
||||||
|
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
|
||||||
|
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
|
||||||
|
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
|
||||||
|
: : "r" ((uint64_t)&tmp) : "memory" );
|
||||||
|
*/
|
||||||
|
|
||||||
|
asm (
|
||||||
|
VSTORE(0,%0,pChi_00) VSTORE(1 ,%0,pChi_01) VSTORE(2 ,%0,pChi_02)
|
||||||
|
VSTORE(3,%0,pChi_10) VSTORE(4 ,%0,pChi_11) VSTORE(5 ,%0,pChi_12)
|
||||||
|
VSTORE(6,%0,pChi_20) VSTORE(7 ,%0,pChi_21) VSTORE(8 ,%0,pChi_22)
|
||||||
|
VSTORE(9,%0,pChi_30) VSTORE(10,%0,pChi_31) VSTORE(11,%0,pChi_32)
|
||||||
|
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
|
||||||
|
|
||||||
|
// if ( 1 || (site==0) ) {
|
||||||
|
// std::cout<<site << " s1 "<<s1<<"\n\t"<<tmp << "\n't" << chi[lexa] <<"\n\t"<<tmp-chi[lexa]<<std::endl;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef Chi_00
|
||||||
|
#undef Chi_01
|
||||||
|
#undef Chi_02
|
||||||
|
#undef Chi_10
|
||||||
|
#undef Chi_11
|
||||||
|
#undef Chi_12
|
||||||
|
#undef Chi_20
|
||||||
|
#undef Chi_21
|
||||||
|
#undef Chi_22
|
||||||
|
#undef Chi_30
|
||||||
|
#undef Chi_31
|
||||||
|
#undef Chi_32
|
||||||
|
|
||||||
|
#undef BCAST0
|
||||||
|
#undef BCAST1
|
||||||
|
#undef BCAST2
|
||||||
|
#undef BCAST3
|
||||||
|
#undef BCAST4
|
||||||
|
#undef BCAST5
|
||||||
|
#undef BCAST6
|
||||||
|
#undef BCAST7
|
||||||
|
#undef BCAST8
|
||||||
|
#undef BCAST9
|
||||||
|
#undef BCAST10
|
||||||
|
#undef BCAST11
|
||||||
|
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||||
{
|
{
|
||||||
@ -299,106 +767,39 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
|
|||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
Vector<iSinglet<Simd> > Matp;
|
||||||
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
Vector<iSinglet<Simd> > Matm;
|
||||||
|
Vector<iSinglet<Simd> > *_Matp;
|
||||||
|
Vector<iSinglet<Simd> > *_Matm;
|
||||||
|
|
||||||
for(int s=0;s<Ls;s++){
|
// MooeeInternalCompute(dag,inv,Matp,Matm);
|
||||||
Pplus(s,s) = bee[s];
|
if ( inv && dag ) {
|
||||||
Pminus(s,s)= bee[s];
|
_Matp = &MatpInvDag;
|
||||||
|
_Matm = &MatmInvDag;
|
||||||
}
|
}
|
||||||
|
if ( inv && (!dag) ) {
|
||||||
for(int s=0;s<Ls-1;s++){
|
_Matp = &MatpInv;
|
||||||
Pminus(s,s+1) = -cee[s];
|
_Matm = &MatmInv;
|
||||||
}
|
|
||||||
|
|
||||||
for(int s=0;s<Ls-1;s++){
|
|
||||||
Pplus(s+1,s) = -cee[s+1];
|
|
||||||
}
|
|
||||||
Pplus (0,Ls-1) = mass*cee[0];
|
|
||||||
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
|
||||||
|
|
||||||
Eigen::MatrixXcd PplusMat ;
|
|
||||||
Eigen::MatrixXcd PminusMat;
|
|
||||||
|
|
||||||
if ( inv ) {
|
|
||||||
PplusMat =Pplus.inverse();
|
|
||||||
PminusMat=Pminus.inverse();
|
|
||||||
} else {
|
|
||||||
PplusMat =Pplus;
|
|
||||||
PminusMat=Pminus;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(dag){
|
|
||||||
PplusMat.adjointInPlace();
|
|
||||||
PminusMat.adjointInPlace();
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
|
||||||
const int Nsimd=Simd::Nsimd();
|
|
||||||
Vector<iSinglet<Simd> > Matp(Ls*LLs);
|
|
||||||
Vector<iSinglet<Simd> > Matm(Ls*LLs);
|
|
||||||
|
|
||||||
for(int s2=0;s2<Ls;s2++){
|
|
||||||
for(int s1=0;s1<LLs;s1++){
|
|
||||||
int istride = LLs;
|
|
||||||
int ostride = 1;
|
|
||||||
Simd Vp;
|
|
||||||
Simd Vm;
|
|
||||||
scalar_type *sp = (scalar_type *)&Vp;
|
|
||||||
scalar_type *sm = (scalar_type *)&Vm;
|
|
||||||
for(int l=0;l<Nsimd;l++){
|
|
||||||
sp[l] = PplusMat (l*istride+s1*ostride ,s2);
|
|
||||||
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
|
||||||
}
|
|
||||||
Matp[LLs*s2+s1] = Vp;
|
|
||||||
Matm[LLs*s2+s1] = Vm;
|
|
||||||
}
|
}
|
||||||
|
if ( !inv ) {
|
||||||
|
MooeeInternalCompute(dag,inv,Matp,Matm);
|
||||||
|
_Matp = &Matp;
|
||||||
|
_Matm = &Matm;
|
||||||
}
|
}
|
||||||
|
assert(_Matp->size()==Ls*LLs);
|
||||||
|
|
||||||
MooeeInvCalls++;
|
MooeeInvCalls++;
|
||||||
MooeeInvTime-=usecond();
|
MooeeInvTime-=usecond();
|
||||||
// Dynamic allocate on stack to get per thread without serialised heap acces
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
|
|
||||||
Vector<SiteHalfSpinor> SitePplus(LLs);
|
if ( switcheroo<Coeff_t>::iscomplex() ) {
|
||||||
Vector<SiteHalfSpinor> SitePminus(LLs);
|
PARALLEL_FOR_LOOP
|
||||||
Vector<SiteHalfSpinor> SiteChiP(LLs);
|
|
||||||
Vector<SiteHalfSpinor> SiteChiM(LLs);
|
|
||||||
Vector<SiteSpinor> SiteChi(LLs);
|
|
||||||
|
|
||||||
SiteHalfSpinor BcastP;
|
|
||||||
SiteHalfSpinor BcastM;
|
|
||||||
|
|
||||||
#pragma omp for
|
|
||||||
for(auto site=0;site<vol;site++){
|
for(auto site=0;site<vol;site++){
|
||||||
|
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
||||||
for(int s=0;s<LLs;s++){
|
|
||||||
int lex = s+LLs*site;
|
|
||||||
spProj5p(SitePplus[s] ,psi[lex]);
|
|
||||||
spProj5m(SitePminus[s],psi[lex]);
|
|
||||||
SiteChiP[s]=zero;
|
|
||||||
SiteChiM[s]=zero;
|
|
||||||
}
|
|
||||||
|
|
||||||
int s=0;
|
|
||||||
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
|
|
||||||
for(int s2=0;s2<LLs;s2++){ // Column loop of right hand side
|
|
||||||
vbroadcast(BcastP,SitePplus [s2],l);
|
|
||||||
vbroadcast(BcastM,SitePminus[s2],l);
|
|
||||||
for(int s1=0;s1<LLs;s1++){ // Column loop of reduction variables
|
|
||||||
SiteChiP[s1]=SiteChiP[s1]+Matp[LLs*s+s1]*BcastP;
|
|
||||||
SiteChiM[s1]=SiteChiM[s1]+Matm[LLs*s+s1]*BcastM;
|
|
||||||
}
|
|
||||||
s++;
|
|
||||||
}}
|
|
||||||
|
|
||||||
for(int s=0;s<LLs;s++){
|
|
||||||
int lex = s+LLs*site;
|
|
||||||
spRecon5p(SiteChi[s],SiteChiP[s]);
|
|
||||||
accumRecon5m(SiteChi[s],SiteChiM[s]);
|
|
||||||
chi[lex] = SiteChi[s]*0.5;
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(auto site=0;site<vol;site++){
|
||||||
|
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MooeeInvTime+=usecond();
|
MooeeInvTime+=usecond();
|
||||||
@ -414,4 +815,5 @@ template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const Fermion
|
|||||||
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -48,6 +48,8 @@ namespace Grid {
|
|||||||
|
|
||||||
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
|
FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {};
|
||||||
|
|
||||||
|
virtual FermionField &tmp(void) = 0;
|
||||||
|
|
||||||
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
|
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
|
||||||
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
|
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
|
||||||
|
|
||||||
|
@ -61,7 +61,9 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
|||||||
LebesgueEvenOdd(_cbgrid),
|
LebesgueEvenOdd(_cbgrid),
|
||||||
Umu(&Fgrid),
|
Umu(&Fgrid),
|
||||||
UmuEven(&Hgrid),
|
UmuEven(&Hgrid),
|
||||||
UmuOdd(&Hgrid) {
|
UmuOdd(&Hgrid),
|
||||||
|
_tmp(&Hgrid)
|
||||||
|
{
|
||||||
// Allocate the required comms buffer
|
// Allocate the required comms buffer
|
||||||
ImportGauge(_Umu);
|
ImportGauge(_Umu);
|
||||||
}
|
}
|
||||||
|
@ -58,6 +58,9 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
|||||||
GridBase *FermionGrid(void) { return _grid; }
|
GridBase *FermionGrid(void) { return _grid; }
|
||||||
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
||||||
|
|
||||||
|
FermionField _tmp;
|
||||||
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// override multiply; cut number routines if pass dagger argument
|
// override multiply; cut number routines if pass dagger argument
|
||||||
// and also make interface more uniformly consistent
|
// and also make interface more uniformly consistent
|
||||||
|
@ -60,7 +60,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
UmuEven(_FourDimRedBlackGrid),
|
UmuEven(_FourDimRedBlackGrid),
|
||||||
UmuOdd (_FourDimRedBlackGrid),
|
UmuOdd (_FourDimRedBlackGrid),
|
||||||
Lebesgue(_FourDimGrid),
|
Lebesgue(_FourDimGrid),
|
||||||
LebesgueEvenOdd(_FourDimRedBlackGrid)
|
LebesgueEvenOdd(_FourDimRedBlackGrid),
|
||||||
|
_tmp(&FiveDimRedBlackGrid)
|
||||||
{
|
{
|
||||||
if (Impl::LsVectorised) {
|
if (Impl::LsVectorised) {
|
||||||
|
|
||||||
|
@ -74,6 +74,9 @@ namespace QCD {
|
|||||||
typedef WilsonKernels<Impl> Kernels;
|
typedef WilsonKernels<Impl> Kernels;
|
||||||
PmuStat stat;
|
PmuStat stat;
|
||||||
|
|
||||||
|
FermionField _tmp;
|
||||||
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
void Report(void);
|
void Report(void);
|
||||||
void ZeroCounters(void);
|
void ZeroCounters(void);
|
||||||
double DhopCalls;
|
double DhopCalls;
|
||||||
|
@ -32,6 +32,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
// Vector IO utilities ///////////////////////////////////////////////////////
|
||||||
// helper function to read space-separated values
|
// helper function to read space-separated values
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector<T> strToVec(const std::string s)
|
std::vector<T> strToVec(const std::string s)
|
||||||
@ -67,6 +68,77 @@ namespace Grid {
|
|||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Vector element trait //////////////////////////////////////////////////////
|
||||||
|
template <typename T>
|
||||||
|
struct element
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
static constexpr bool is_number = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct element<std::vector<T>>
|
||||||
|
{
|
||||||
|
typedef typename element<T>::type type;
|
||||||
|
static constexpr bool is_number = std::is_arithmetic<T>::value
|
||||||
|
or is_complex<T>::value
|
||||||
|
or element<T>::is_number;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Vector flatening utility class ////////////////////////////////////////////
|
||||||
|
// Class to flatten a multidimensional std::vector
|
||||||
|
template <typename V>
|
||||||
|
class Flatten
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef typename element<V>::type Element;
|
||||||
|
public:
|
||||||
|
explicit Flatten(const V &vector);
|
||||||
|
const V & getVector(void);
|
||||||
|
const std::vector<Element> & getFlatVector(void);
|
||||||
|
const std::vector<size_t> & getDim(void);
|
||||||
|
private:
|
||||||
|
void accumulate(const Element &e);
|
||||||
|
template <typename W>
|
||||||
|
void accumulate(const W &v);
|
||||||
|
void accumulateDim(const Element &e);
|
||||||
|
template <typename W>
|
||||||
|
void accumulateDim(const W &v);
|
||||||
|
private:
|
||||||
|
const V &vector_;
|
||||||
|
std::vector<Element> flatVector_;
|
||||||
|
std::vector<size_t> dim_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Class to reconstruct a multidimensional std::vector
|
||||||
|
template <typename V>
|
||||||
|
class Reconstruct
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef typename element<V>::type Element;
|
||||||
|
public:
|
||||||
|
Reconstruct(const std::vector<Element> &flatVector,
|
||||||
|
const std::vector<size_t> &dim);
|
||||||
|
const V & getVector(void);
|
||||||
|
const std::vector<Element> & getFlatVector(void);
|
||||||
|
const std::vector<size_t> & getDim(void);
|
||||||
|
private:
|
||||||
|
void fill(std::vector<Element> &v);
|
||||||
|
template <typename W>
|
||||||
|
void fill(W &v);
|
||||||
|
void resize(std::vector<Element> &v, const unsigned int dim);
|
||||||
|
template <typename W>
|
||||||
|
void resize(W &v, const unsigned int dim);
|
||||||
|
private:
|
||||||
|
V vector_;
|
||||||
|
const std::vector<Element> &flatVector_;
|
||||||
|
std::vector<size_t> dim_;
|
||||||
|
size_t ind_{0};
|
||||||
|
unsigned int dimInd_{0};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Abstract writer/reader classes ////////////////////////////////////////////
|
||||||
// static polymorphism implemented using CRTP idiom
|
// static polymorphism implemented using CRTP idiom
|
||||||
class Serializable;
|
class Serializable;
|
||||||
|
|
||||||
@ -83,12 +155,7 @@ namespace Grid {
|
|||||||
typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
|
typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
|
||||||
write(const std::string& s, const U &output);
|
write(const std::string& s, const U &output);
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<std::is_enum<U>::value, void>::type
|
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
|
||||||
write(const std::string& s, const U &output);
|
|
||||||
template <typename U>
|
|
||||||
typename std::enable_if<
|
|
||||||
!(std::is_base_of<Serializable, U>::value or std::is_enum<U>::value),
|
|
||||||
void>::type
|
|
||||||
write(const std::string& s, const U &output);
|
write(const std::string& s, const U &output);
|
||||||
private:
|
private:
|
||||||
T *upcast;
|
T *upcast;
|
||||||
@ -107,12 +174,7 @@ namespace Grid {
|
|||||||
typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
|
typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
|
||||||
read(const std::string& s, U &output);
|
read(const std::string& s, U &output);
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<std::is_enum<U>::value, void>::type
|
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
|
||||||
read(const std::string& s, U &output);
|
|
||||||
template <typename U>
|
|
||||||
typename std::enable_if<
|
|
||||||
!(std::is_base_of<Serializable, U>::value or std::is_enum<U>::value),
|
|
||||||
void>::type
|
|
||||||
read(const std::string& s, U &output);
|
read(const std::string& s, U &output);
|
||||||
protected:
|
protected:
|
||||||
template <typename U>
|
template <typename U>
|
||||||
@ -142,7 +204,128 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Generic writer interface
|
// Flatten class template implementation /////////////////////////////////////
|
||||||
|
template <typename V>
|
||||||
|
void Flatten<V>::accumulate(const Element &e)
|
||||||
|
{
|
||||||
|
flatVector_.push_back(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
template <typename W>
|
||||||
|
void Flatten<V>::accumulate(const W &v)
|
||||||
|
{
|
||||||
|
for (auto &e: v)
|
||||||
|
{
|
||||||
|
accumulate(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
void Flatten<V>::accumulateDim(const Element &e) {};
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
template <typename W>
|
||||||
|
void Flatten<V>::accumulateDim(const W &v)
|
||||||
|
{
|
||||||
|
dim_.push_back(v.size());
|
||||||
|
accumulateDim(v[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
Flatten<V>::Flatten(const V &vector)
|
||||||
|
: vector_(vector)
|
||||||
|
{
|
||||||
|
accumulate(vector_);
|
||||||
|
accumulateDim(vector_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
const V & Flatten<V>::getVector(void)
|
||||||
|
{
|
||||||
|
return vector_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
const std::vector<typename Flatten<V>::Element> &
|
||||||
|
Flatten<V>::getFlatVector(void)
|
||||||
|
{
|
||||||
|
return flatVector_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
const std::vector<size_t> & Flatten<V>::getDim(void)
|
||||||
|
{
|
||||||
|
return dim_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstruct class template implementation /////////////////////////////////
|
||||||
|
template <typename V>
|
||||||
|
void Reconstruct<V>::fill(std::vector<Element> &v)
|
||||||
|
{
|
||||||
|
for (auto &e: v)
|
||||||
|
{
|
||||||
|
e = flatVector_[ind_++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
template <typename W>
|
||||||
|
void Reconstruct<V>::fill(W &v)
|
||||||
|
{
|
||||||
|
for (auto &e: v)
|
||||||
|
{
|
||||||
|
fill(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim)
|
||||||
|
{
|
||||||
|
v.resize(dim_[dim]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
template <typename W>
|
||||||
|
void Reconstruct<V>::resize(W &v, const unsigned int dim)
|
||||||
|
{
|
||||||
|
v.resize(dim_[dim]);
|
||||||
|
for (auto &e: v)
|
||||||
|
{
|
||||||
|
resize(e, dim + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector,
|
||||||
|
const std::vector<size_t> &dim)
|
||||||
|
: flatVector_(flatVector)
|
||||||
|
, dim_(dim)
|
||||||
|
{
|
||||||
|
resize(vector_, 0);
|
||||||
|
fill(vector_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
const V & Reconstruct<V>::getVector(void)
|
||||||
|
{
|
||||||
|
return vector_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
const std::vector<typename Reconstruct<V>::Element> &
|
||||||
|
Reconstruct<V>::getFlatVector(void)
|
||||||
|
{
|
||||||
|
return flatVector_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
const std::vector<size_t> & Reconstruct<V>::getDim(void)
|
||||||
|
{
|
||||||
|
return dim_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generic writer interface //////////////////////////////////////////////////
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void push(Writer<T> &w, const std::string &s)
|
inline void push(Writer<T> &w, const std::string &s)
|
||||||
{
|
{
|
||||||
@ -221,23 +404,13 @@ namespace Grid {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<std::is_enum<U>::value, void>::type
|
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
|
||||||
Writer<T>::write(const std::string &s, const U &output)
|
|
||||||
{
|
|
||||||
EnumIO<U>::write(*this, s, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
template <typename U>
|
|
||||||
typename std::enable_if<
|
|
||||||
!(std::is_base_of<Serializable, U>::value or std::is_enum<U>::value),
|
|
||||||
void>::type
|
|
||||||
Writer<T>::write(const std::string &s, const U &output)
|
Writer<T>::write(const std::string &s, const U &output)
|
||||||
{
|
{
|
||||||
upcast->writeDefault(s, output);
|
upcast->writeDefault(s, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reader template implementation ////////////////////////////////////////////
|
// Reader template implementation
|
||||||
template <typename T>
|
template <typename T>
|
||||||
Reader<T>::Reader(void)
|
Reader<T>::Reader(void)
|
||||||
{
|
{
|
||||||
@ -266,17 +439,7 @@ namespace Grid {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
template <typename U>
|
template <typename U>
|
||||||
typename std::enable_if<std::is_enum<U>::value, void>::type
|
typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
|
||||||
Reader<T>::read(const std::string &s, U &output)
|
|
||||||
{
|
|
||||||
EnumIO<U>::read(*this, s, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
template <typename U>
|
|
||||||
typename std::enable_if<
|
|
||||||
!(std::is_base_of<Serializable, U>::value or std::is_enum<U>::value),
|
|
||||||
void>::type
|
|
||||||
Reader<T>::read(const std::string &s, U &output)
|
Reader<T>::read(const std::string &s, U &output)
|
||||||
{
|
{
|
||||||
upcast->readDefault(s, output);
|
upcast->readDefault(s, output);
|
||||||
@ -300,7 +463,6 @@ namespace Grid {
|
|||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
103
lib/serialisation/Hdf5IO.cc
Normal file
103
lib/serialisation/Hdf5IO.cc
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
#ifndef H5_NO_NAMESPACE
|
||||||
|
using namespace H5NS;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Writer implementation ///////////////////////////////////////////////////////
|
||||||
|
Hdf5Writer::Hdf5Writer(const std::string &fileName)
|
||||||
|
: fileName_(fileName)
|
||||||
|
, file_(fileName.c_str(), H5F_ACC_TRUNC)
|
||||||
|
{
|
||||||
|
group_ = file_.openGroup("/");
|
||||||
|
writeSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold",
|
||||||
|
Hdf5Type<unsigned int>::type());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hdf5Writer::push(const std::string &s)
|
||||||
|
{
|
||||||
|
group_ = group_.createGroup(s);
|
||||||
|
path_.push_back(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hdf5Writer::pop(void)
|
||||||
|
{
|
||||||
|
path_.pop_back();
|
||||||
|
if (path_.empty())
|
||||||
|
{
|
||||||
|
group_ = file_.openGroup("/");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto binOp = [](const std::string &a, const std::string &b)->std::string
|
||||||
|
{
|
||||||
|
return a + "/" + b;
|
||||||
|
};
|
||||||
|
|
||||||
|
group_ = group_.openGroup(std::accumulate(path_.begin(), path_.end(),
|
||||||
|
std::string(""), binOp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void Hdf5Writer::writeDefault(const std::string &s, const std::string &x)
|
||||||
|
{
|
||||||
|
StrType strType(PredType::C_S1, x.size());
|
||||||
|
|
||||||
|
writeSingleAttribute(*(x.data()), s, strType);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hdf5Writer::writeDefault(const std::string &s, const char *x)
|
||||||
|
{
|
||||||
|
std::string sx(x);
|
||||||
|
|
||||||
|
writeDefault(s, sx);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reader implementation ///////////////////////////////////////////////////////
|
||||||
|
Hdf5Reader::Hdf5Reader(const std::string &fileName)
|
||||||
|
: fileName_(fileName)
|
||||||
|
, file_(fileName.c_str(), H5F_ACC_RDONLY)
|
||||||
|
{
|
||||||
|
group_ = file_.openGroup("/");
|
||||||
|
readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold",
|
||||||
|
Hdf5Type<unsigned int>::type());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hdf5Reader::push(const std::string &s)
|
||||||
|
{
|
||||||
|
group_ = group_.openGroup(s);
|
||||||
|
path_.push_back(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Hdf5Reader::pop(void)
|
||||||
|
{
|
||||||
|
path_.pop_back();
|
||||||
|
if (path_.empty())
|
||||||
|
{
|
||||||
|
group_ = file_.openGroup("/");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto binOp = [](const std::string &a, const std::string &b)->std::string
|
||||||
|
{
|
||||||
|
return a + "/" + b;
|
||||||
|
};
|
||||||
|
|
||||||
|
group_ = group_.openGroup(std::accumulate(path_.begin(), path_.end(),
|
||||||
|
std::string(""), binOp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void Hdf5Reader::readDefault(const std::string &s, std::string &x)
|
||||||
|
{
|
||||||
|
Attribute attribute;
|
||||||
|
|
||||||
|
attribute = group_.openAttribute(s);
|
||||||
|
StrType strType = attribute.getStrType();
|
||||||
|
|
||||||
|
x.resize(strType.getSize());
|
||||||
|
attribute.read(strType, &(x[0]));
|
||||||
|
}
|
242
lib/serialisation/Hdf5IO.h
Normal file
242
lib/serialisation/Hdf5IO.h
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
#ifndef GRID_SERIALISATION_HDF5_H
|
||||||
|
#define GRID_SERIALISATION_HDF5_H
|
||||||
|
|
||||||
|
#include <stack>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <H5Cpp.h>
|
||||||
|
#include "Hdf5Type.h"
|
||||||
|
|
||||||
|
#ifndef H5_NO_NAMESPACE
|
||||||
|
#define H5NS H5
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// default thresold above which datasets are used instead of attributes
|
||||||
|
#ifndef HDF5_DEF_DATASET_THRES
|
||||||
|
#define HDF5_DEF_DATASET_THRES 6u
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// name guard for Grid metadata
|
||||||
|
#define HDF5_GRID_GUARD "_Grid_"
|
||||||
|
|
||||||
|
namespace Grid
|
||||||
|
{
|
||||||
|
class Hdf5Writer: public Writer<Hdf5Writer>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Hdf5Writer(const std::string &fileName);
|
||||||
|
virtual ~Hdf5Writer(void) = default;
|
||||||
|
void push(const std::string &s);
|
||||||
|
void pop(void);
|
||||||
|
void writeDefault(const std::string &s, const char *x);
|
||||||
|
template <typename U>
|
||||||
|
void writeDefault(const std::string &s, const U &x);
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
||||||
|
writeDefault(const std::string &s, const std::vector<U> &x);
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
||||||
|
writeDefault(const std::string &s, const std::vector<U> &x);
|
||||||
|
private:
|
||||||
|
template <typename U>
|
||||||
|
void writeSingleAttribute(const U &x, const std::string &name,
|
||||||
|
const H5NS::DataType &type);
|
||||||
|
private:
|
||||||
|
std::string fileName_;
|
||||||
|
std::vector<std::string> path_;
|
||||||
|
H5NS::H5File file_;
|
||||||
|
H5NS::Group group_;
|
||||||
|
unsigned int dataSetThres_{HDF5_DEF_DATASET_THRES};
|
||||||
|
};
|
||||||
|
|
||||||
|
class Hdf5Reader: public Reader<Hdf5Reader>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Hdf5Reader(const std::string &fileName);
|
||||||
|
virtual ~Hdf5Reader(void) = default;
|
||||||
|
void push(const std::string &s);
|
||||||
|
void pop(void);
|
||||||
|
template <typename U>
|
||||||
|
void readDefault(const std::string &s, U &output);
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
||||||
|
readDefault(const std::string &s, std::vector<U> &x);
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
||||||
|
readDefault(const std::string &s, std::vector<U> &x);
|
||||||
|
private:
|
||||||
|
template <typename U>
|
||||||
|
void readSingleAttribute(U &x, const std::string &name,
|
||||||
|
const H5NS::DataType &type);
|
||||||
|
private:
|
||||||
|
std::string fileName_;
|
||||||
|
std::vector<std::string> path_;
|
||||||
|
H5NS::H5File file_;
|
||||||
|
H5NS::Group group_;
|
||||||
|
unsigned int dataSetThres_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Writer template implementation ////////////////////////////////////////////
|
||||||
|
template <typename U>
|
||||||
|
void Hdf5Writer::writeSingleAttribute(const U &x, const std::string &name,
|
||||||
|
const H5NS::DataType &type)
|
||||||
|
{
|
||||||
|
H5NS::Attribute attribute;
|
||||||
|
hsize_t attrDim = 1;
|
||||||
|
H5NS::DataSpace attrSpace(1, &attrDim);
|
||||||
|
|
||||||
|
attribute = group_.createAttribute(name, type, attrSpace);
|
||||||
|
attribute.write(type, &x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
void Hdf5Writer::writeDefault(const std::string &s, const U &x)
|
||||||
|
{
|
||||||
|
writeSingleAttribute(x, s, Hdf5Type<U>::type());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void Hdf5Writer::writeDefault(const std::string &s, const std::string &x);
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
||||||
|
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
|
||||||
|
{
|
||||||
|
// alias to element type
|
||||||
|
typedef typename element<std::vector<U>>::type Element;
|
||||||
|
|
||||||
|
// flatten the vector and getting dimensions
|
||||||
|
Flatten<std::vector<U>> flat(x);
|
||||||
|
std::vector<hsize_t> dim;
|
||||||
|
const auto &flatx = flat.getFlatVector();
|
||||||
|
|
||||||
|
for (auto &d: flat.getDim())
|
||||||
|
{
|
||||||
|
dim.push_back(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
// write to file
|
||||||
|
H5NS::DataSpace dataSpace(dim.size(), dim.data());
|
||||||
|
|
||||||
|
if (flatx.size() > dataSetThres_)
|
||||||
|
{
|
||||||
|
H5NS::DataSet dataSet;
|
||||||
|
|
||||||
|
dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace);
|
||||||
|
dataSet.write(flatx.data(), Hdf5Type<Element>::type());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
H5NS::Attribute attribute;
|
||||||
|
|
||||||
|
attribute = group_.createAttribute(s, Hdf5Type<Element>::type(), dataSpace);
|
||||||
|
attribute.write(Hdf5Type<Element>::type(), flatx.data());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
||||||
|
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
|
||||||
|
{
|
||||||
|
push(s);
|
||||||
|
writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
|
||||||
|
Hdf5Type<uint64_t>::type());
|
||||||
|
for (hsize_t i = 0; i < x.size(); ++i)
|
||||||
|
{
|
||||||
|
write(s + "_" + std::to_string(i), x[i]);
|
||||||
|
}
|
||||||
|
pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reader template implementation ////////////////////////////////////////////
|
||||||
|
template <typename U>
|
||||||
|
void Hdf5Reader::readSingleAttribute(U &x, const std::string &name,
|
||||||
|
const H5NS::DataType &type)
|
||||||
|
{
|
||||||
|
H5NS::Attribute attribute;
|
||||||
|
|
||||||
|
attribute = group_.openAttribute(name);
|
||||||
|
attribute.read(type, &x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
void Hdf5Reader::readDefault(const std::string &s, U &output)
|
||||||
|
{
|
||||||
|
readSingleAttribute(output, s, Hdf5Type<U>::type());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void Hdf5Reader::readDefault(const std::string &s, std::string &x);
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
|
||||||
|
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
|
||||||
|
{
|
||||||
|
// alias to element type
|
||||||
|
typedef typename element<std::vector<U>>::type Element;
|
||||||
|
|
||||||
|
// read the dimensions
|
||||||
|
H5NS::DataSpace dataSpace;
|
||||||
|
std::vector<hsize_t> hdim;
|
||||||
|
std::vector<size_t> dim;
|
||||||
|
hsize_t size = 1;
|
||||||
|
|
||||||
|
if (group_.attrExists(s))
|
||||||
|
{
|
||||||
|
dataSpace = group_.openAttribute(s).getSpace();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dataSpace = group_.openDataSet(s).getSpace();
|
||||||
|
}
|
||||||
|
hdim.resize(dataSpace.getSimpleExtentNdims());
|
||||||
|
dataSpace.getSimpleExtentDims(hdim.data());
|
||||||
|
for (auto &d: hdim)
|
||||||
|
{
|
||||||
|
dim.push_back(d);
|
||||||
|
size *= d;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the flat vector
|
||||||
|
std::vector<Element> buf(size);
|
||||||
|
|
||||||
|
if (size > dataSetThres_)
|
||||||
|
{
|
||||||
|
H5NS::DataSet dataSet;
|
||||||
|
|
||||||
|
dataSet = group_.openDataSet(s);
|
||||||
|
dataSet.read(buf.data(), Hdf5Type<Element>::type());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
H5NS::Attribute attribute;
|
||||||
|
|
||||||
|
attribute = group_.openAttribute(s);
|
||||||
|
attribute.read(Hdf5Type<Element>::type(), buf.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconstruct the multidimensional vector
|
||||||
|
Reconstruct<std::vector<U>> r(buf, dim);
|
||||||
|
|
||||||
|
x = r.getVector();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
|
||||||
|
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
|
||||||
|
{
|
||||||
|
uint64_t size;
|
||||||
|
|
||||||
|
push(s);
|
||||||
|
readSingleAttribute(size, HDF5_GRID_GUARD "vector_size",
|
||||||
|
Hdf5Type<uint64_t>::type());
|
||||||
|
x.resize(size);
|
||||||
|
for (hsize_t i = 0; i < x.size(); ++i)
|
||||||
|
{
|
||||||
|
read(s + "_" + std::to_string(i), x[i]);
|
||||||
|
}
|
||||||
|
pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
77
lib/serialisation/Hdf5Type.h
Normal file
77
lib/serialisation/Hdf5Type.h
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
#ifndef GRID_SERIALISATION_HDF5_TYPE_H
|
||||||
|
#define GRID_SERIALISATION_HDF5_TYPE_H
|
||||||
|
|
||||||
|
#include <H5Cpp.h>
|
||||||
|
#include <complex>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#ifndef H5_NO_NAMESPACE
|
||||||
|
#define H5NS H5
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define HDF5_NATIVE_TYPE(predType, cType)\
|
||||||
|
template <>\
|
||||||
|
class Hdf5Type<cType>\
|
||||||
|
{\
|
||||||
|
public:\
|
||||||
|
static inline const H5NS::DataType & type(void)\
|
||||||
|
{\
|
||||||
|
return H5NS::PredType::predType;\
|
||||||
|
}\
|
||||||
|
static constexpr bool isNative = true;\
|
||||||
|
};
|
||||||
|
|
||||||
|
#define DEFINE_HDF5_NATIVE_TYPES \
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_B8, bool);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_CHAR, char);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_SCHAR, signed char);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_UCHAR, unsigned char);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_SHORT, short);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_USHORT, unsigned short);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_INT, int);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_UINT, unsigned int);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_LONG, long);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_ULONG, unsigned long);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_LLONG, long long);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_ULLONG, unsigned long long);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_FLOAT, float);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_DOUBLE, double);\
|
||||||
|
HDF5_NATIVE_TYPE(NATIVE_LDOUBLE, long double);
|
||||||
|
|
||||||
|
namespace Grid
|
||||||
|
{
|
||||||
|
template <typename T> class Hdf5Type
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr bool isNative = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
DEFINE_HDF5_NATIVE_TYPES;
|
||||||
|
|
||||||
|
template <typename R>
|
||||||
|
class Hdf5Type<std::complex<R>>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static inline const H5NS::DataType & type(void)
|
||||||
|
{
|
||||||
|
if (typePtr_ == nullptr)
|
||||||
|
{
|
||||||
|
typePtr_.reset(new H5NS::CompType(sizeof(std::complex<R>)));
|
||||||
|
typePtr_->insertMember("re", 0, Hdf5Type<R>::type());
|
||||||
|
typePtr_->insertMember("im", sizeof(R), Hdf5Type<R>::type());
|
||||||
|
}
|
||||||
|
|
||||||
|
return *typePtr_;
|
||||||
|
}
|
||||||
|
static constexpr bool isNative = false;
|
||||||
|
private:
|
||||||
|
static std::unique_ptr<H5NS::CompType> typePtr_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename R>
|
||||||
|
std::unique_ptr<H5NS::CompType> Hdf5Type<std::complex<R>>::typePtr_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef HDF5_NATIVE_TYPE
|
||||||
|
|
||||||
|
#endif /* GRID_SERIALISATION_HDF5_TYPE_H */
|
@ -109,40 +109,36 @@ THE SOFTWARE.
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define GRID_MACRO_MEMBER(A,B) A B;
|
#define GRID_MACRO_MEMBER(A,B) A B;
|
||||||
|
#define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B));
|
||||||
#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <<std::endl;
|
#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <<std::endl;
|
||||||
#define GRID_MACRO_READ_MEMBER(A,B) Grid::read(RD,#B,obj. B);
|
#define GRID_MACRO_READ_MEMBER(A,B) Grid::read(RD,#B,obj. B);
|
||||||
#define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B);
|
#define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B);
|
||||||
|
|
||||||
#define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\
|
#define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\
|
||||||
\
|
|
||||||
\
|
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
|
||||||
\
|
|
||||||
\
|
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \
|
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \
|
||||||
push(WR,s);\
|
push(WR,s);\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
|
||||||
pop(WR);\
|
pop(WR);\
|
||||||
}\
|
}\
|
||||||
\
|
|
||||||
\
|
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void read(Reader<T> &RD,const std::string &s, cname &obj){ \
|
static inline void read(Reader<T> &RD,const std::string &s, cname &obj){ \
|
||||||
push(RD,s);\
|
push(RD,s);\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_READ_MEMBER,__VA_ARGS__)) \
|
||||||
pop(RD);\
|
pop(RD);\
|
||||||
}\
|
}\
|
||||||
\
|
|
||||||
\
|
|
||||||
friend inline std::ostream & operator << (std::ostream &os, const cname &obj ) { \
|
friend inline std::ostream & operator << (std::ostream &os, const cname &obj ) { \
|
||||||
os<<"class "<<#cname<<" {"<<std::endl;\
|
os<<"class "<<#cname<<" {"<<std::endl;\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_OS_WRITE_MEMBER,__VA_ARGS__)) \
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_OS_WRITE_MEMBER,__VA_ARGS__)) \
|
||||||
os<<"}"; \
|
os<<"}"; \
|
||||||
return os;\
|
return os;\
|
||||||
};
|
}\
|
||||||
|
friend inline bool operator==(const cname &lhs, const cname &rhs) {\
|
||||||
|
bool result = true;\
|
||||||
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_COMP_MEMBER,__VA_ARGS__))\
|
||||||
|
return result;\
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_ENUM_TYPE(obj) std::remove_reference<decltype(obj)>::type
|
#define GRID_ENUM_TYPE(obj) std::remove_reference<decltype(obj)>::type
|
||||||
#define GRID_MACRO_ENUMVAL(A,B) A = B,
|
#define GRID_MACRO_ENUMVAL(A,B) A = B,
|
||||||
@ -150,44 +146,52 @@ THE SOFTWARE.
|
|||||||
#define GRID_MACRO_ENUMTEST(A,B) else if (buf == #A) {obj = GRID_ENUM_TYPE(obj)::A;}
|
#define GRID_MACRO_ENUMTEST(A,B) else if (buf == #A) {obj = GRID_ENUM_TYPE(obj)::A;}
|
||||||
#define GRID_MACRO_ENUMCASEIO(A,B) case GRID_ENUM_TYPE(obj)::A: os << #A; break;
|
#define GRID_MACRO_ENUMCASEIO(A,B) case GRID_ENUM_TYPE(obj)::A: os << #A; break;
|
||||||
|
|
||||||
namespace Grid {
|
|
||||||
template <typename U>
|
|
||||||
class EnumIO {};
|
|
||||||
}
|
|
||||||
|
|
||||||
#define GRID_SERIALIZABLE_ENUM(name,undefname,...)\
|
#define GRID_SERIALIZABLE_ENUM(name,undefname,...)\
|
||||||
enum class name {\
|
class name: public Grid::Serializable\
|
||||||
|
{\
|
||||||
|
public:\
|
||||||
|
enum EnumType\
|
||||||
|
{\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMVAL,__VA_ARGS__))\
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMVAL,__VA_ARGS__))\
|
||||||
undefname = -1\
|
undefname = -1\
|
||||||
};\
|
};\
|
||||||
\
|
|
||||||
template<>\
|
|
||||||
class EnumIO<name> {\
|
|
||||||
public:\
|
public:\
|
||||||
|
name(void): value_(undefname) {};\
|
||||||
|
name(EnumType value): value_(value) {};\
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void write(Writer<T> &WR,const std::string &s, const name &obj){ \
|
static inline void write(Grid::Writer<T> &WR,const std::string &s, const name &obj)\
|
||||||
switch (obj) {\
|
{\
|
||||||
|
switch (obj.value_)\
|
||||||
|
{\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASE,__VA_ARGS__))\
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASE,__VA_ARGS__))\
|
||||||
default: Grid::write(WR,s,#undefname); break;\
|
default: Grid::write(WR,s,#undefname); break;\
|
||||||
}\
|
}\
|
||||||
}\
|
}\
|
||||||
\
|
\
|
||||||
template <typename T>\
|
template <typename T>\
|
||||||
static inline void read(Reader<T> &RD,const std::string &s, name &obj){ \
|
static inline void read(Grid::Reader<T> &RD,const std::string &s, name &obj)\
|
||||||
|
{\
|
||||||
std::string buf;\
|
std::string buf;\
|
||||||
Grid::read(RD, s, buf);\
|
Grid::read(RD, s, buf);\
|
||||||
if (buf == #undefname) {obj = name::undefname;}\
|
if (buf == #undefname) {obj = name::undefname;}\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMTEST,__VA_ARGS__))\
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMTEST,__VA_ARGS__))\
|
||||||
else {obj = name::undefname;}\
|
else {obj = name::undefname;}\
|
||||||
}\
|
}\
|
||||||
};\
|
inline operator EnumType(void) const\
|
||||||
\
|
{\
|
||||||
inline std::ostream & operator << (std::ostream &os, const name &obj ) { \
|
return value_;\
|
||||||
|
}\
|
||||||
|
inline friend std::ostream & operator<<(std::ostream &os, const name &obj)\
|
||||||
|
{\
|
||||||
switch (obj) {\
|
switch (obj) {\
|
||||||
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASEIO,__VA_ARGS__))\
|
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_ENUMCASEIO,__VA_ARGS__))\
|
||||||
default: os << #undefname; break;\
|
default: os << #undefname; break;\
|
||||||
}\
|
}\
|
||||||
return os;\
|
return os;\
|
||||||
|
}\
|
||||||
|
private:\
|
||||||
|
EnumType value_;\
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -36,6 +36,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include "BinaryIO.h"
|
#include "BinaryIO.h"
|
||||||
#include "TextIO.h"
|
#include "TextIO.h"
|
||||||
#include "XmlIO.h"
|
#include "XmlIO.h"
|
||||||
|
#ifdef HAVE_HDF5
|
||||||
|
#include "Hdf5IO.h"
|
||||||
|
#endif
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// Todo:
|
// Todo:
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
|
@ -213,6 +213,29 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MultRealPart{
|
||||||
|
inline __m256 operator()(__m256 a, __m256 b){
|
||||||
|
__m256 ymm0;
|
||||||
|
ymm0 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
|
||||||
|
return _mm256_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
}
|
||||||
|
inline __m256d operator()(__m256d a, __m256d b){
|
||||||
|
__m256d ymm0;
|
||||||
|
ymm0 = _mm256_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00
|
||||||
|
return _mm256_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct MaddRealPart{
|
||||||
|
inline __m256 operator()(__m256 a, __m256 b, __m256 c){
|
||||||
|
__m256 ymm0 = _mm256_moveldup_ps(a); // ymm0 <- ar ar,
|
||||||
|
return _mm256_add_ps(_mm256_mul_ps( ymm0, b),c);
|
||||||
|
}
|
||||||
|
inline __m256d operator()(__m256d a, __m256d b, __m256d c){
|
||||||
|
__m256d ymm0 = _mm256_shuffle_pd( a, a, 0x0 );
|
||||||
|
return _mm256_add_pd(_mm256_mul_pd( ymm0, b),c);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline __m256 operator()(__m256 a, __m256 b){
|
inline __m256 operator()(__m256 a, __m256 b){
|
||||||
@ -628,6 +651,8 @@ namespace Optimization {
|
|||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
|
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
@ -189,6 +189,29 @@ namespace Optimization {
|
|||||||
// 2mul,4 mac +add+sub = 8 flop type insns
|
// 2mul,4 mac +add+sub = 8 flop type insns
|
||||||
// 3shuf + 2 (+shuf) = 5/6 simd perm and 1/2 the load.
|
// 3shuf + 2 (+shuf) = 5/6 simd perm and 1/2 the load.
|
||||||
|
|
||||||
|
struct MultRealPart{
|
||||||
|
inline __m512 operator()(__m512 a, __m512 b){
|
||||||
|
__m512 ymm0;
|
||||||
|
ymm0 = _mm512_moveldup_ps(a); // ymm0 <- ar ar,
|
||||||
|
return _mm512_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
}
|
||||||
|
inline __m512d operator()(__m512d a, __m512d b){
|
||||||
|
__m512d ymm0;
|
||||||
|
ymm0 = _mm512_shuffle_pd(a,a,0x00); // ymm0 <- ar ar, ar,ar b'00,00
|
||||||
|
return _mm512_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct MaddRealPart{
|
||||||
|
inline __m512 operator()(__m512 a, __m512 b, __m512 c){
|
||||||
|
__m512 ymm0 = _mm512_moveldup_ps(a); // ymm0 <- ar ar,
|
||||||
|
return _mm512_fmadd_ps( ymm0, b, c);
|
||||||
|
}
|
||||||
|
inline __m512d operator()(__m512d a, __m512d b, __m512d c){
|
||||||
|
__m512d ymm0 = _mm512_shuffle_pd( a, a, 0x00 );
|
||||||
|
return _mm512_fmadd_pd( ymm0, b, c);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline __m512 operator()(__m512 a, __m512 b){
|
inline __m512 operator()(__m512 a, __m512 b){
|
||||||
@ -501,6 +524,8 @@ namespace Optimization {
|
|||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
|
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
@ -225,6 +225,21 @@ namespace Optimization {
|
|||||||
c[i] = a[i]*b[i] - a[i+1]*b[i+1];\
|
c[i] = a[i]*b[i] - a[i+1]*b[i+1];\
|
||||||
c[i+1] = a[i]*b[i+1] + a[i+1]*b[i];
|
c[i+1] = a[i]*b[i+1] + a[i+1]*b[i];
|
||||||
|
|
||||||
|
struct MultRealPart{
|
||||||
|
template <typename T>
|
||||||
|
inline vec<T> operator()(vec<T> a, vec<T> b){
|
||||||
|
vec<T> out;
|
||||||
|
|
||||||
|
VECTOR_FOR(i, W<T>::c, 1)
|
||||||
|
{
|
||||||
|
out.v[2*i] = a[2*i]*b[2*i];
|
||||||
|
out.v[2*i+1] = a[2*i]*b[2*i+1];
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex
|
// Complex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -456,6 +471,7 @@ namespace Optimization {
|
|||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
@ -220,6 +220,14 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MultRealPart{
|
||||||
|
// Complex double
|
||||||
|
inline vector4double operator()(vector4double a, vector4double b){
|
||||||
|
// return vec_xmul(b, a);
|
||||||
|
return vec_xmul(a, b);
|
||||||
|
}
|
||||||
|
FLOAT_WRAP_2(operator(), inline)
|
||||||
|
};
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vector4double operator()(vector4double a, vector4double b){
|
inline vector4double operator()(vector4double a, vector4double b){
|
||||||
@ -430,6 +438,7 @@ typedef Optimization::Sub SubSIMD;
|
|||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
@ -177,6 +177,29 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MultRealPart{
|
||||||
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
|
__m128 ymm0;
|
||||||
|
ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
|
||||||
|
return _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
}
|
||||||
|
inline __m128d operator()(__m128d a, __m128d b){
|
||||||
|
__m128d ymm0;
|
||||||
|
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00
|
||||||
|
return _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct MaddRealPart{
|
||||||
|
inline __m128 operator()(__m128 a, __m128 b, __m128 c){
|
||||||
|
__m128 ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar,
|
||||||
|
return _mm_add_ps(_mm_mul_ps( ymm0, b),c);
|
||||||
|
}
|
||||||
|
inline __m128d operator()(__m128d a, __m128d b, __m128d c){
|
||||||
|
__m128d ymm0 = _mm_shuffle_pd( a, a, 0x0 );
|
||||||
|
return _mm_add_pd(_mm_mul_pd( ymm0, b),c);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline __m128 operator()(__m128 a, __m128 b){
|
inline __m128 operator()(__m128 a, __m128 b){
|
||||||
@ -325,8 +348,10 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _mm_alignr_epi64
|
||||||
#define _mm_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16)
|
#define _mm_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16)
|
||||||
#define _mm_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16)
|
#define _mm_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16)
|
||||||
|
#endif
|
||||||
|
|
||||||
template<int n> static inline __m128 tRotate(__m128 in){ return (__m128)_mm_alignr_epi32((__m128i)in,(__m128i)in,n); };
|
template<int n> static inline __m128 tRotate(__m128 in){ return (__m128)_mm_alignr_epi32((__m128i)in,(__m128i)in,n); };
|
||||||
template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_mm_alignr_epi64((__m128i)in,(__m128i)in,n); };
|
template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_mm_alignr_epi64((__m128i)in,(__m128i)in,n); };
|
||||||
@ -415,6 +440,8 @@ namespace Optimization {
|
|||||||
typedef Optimization::Div DivSIMD;
|
typedef Optimization::Div DivSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Mult MultSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::MultRealPart MultRealPartSIMD;
|
||||||
|
typedef Optimization::MaddRealPart MaddRealPartSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
typedef Optimization::Conj ConjSIMD;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
@ -101,6 +101,11 @@ template <typename T> using IfNotInteger = Invoke<std::enable_if<!std::is_integr
|
|||||||
// general forms to allow for vsplat syntax
|
// general forms to allow for vsplat syntax
|
||||||
// need explicit declaration of types when used since
|
// need explicit declaration of types when used since
|
||||||
// clang cannot automatically determine the output type sometimes
|
// clang cannot automatically determine the output type sometimes
|
||||||
|
template <class Out, class Input1, class Input2, class Input3, class Operation>
|
||||||
|
Out trinary(Input1 src_1, Input2 src_2, Input3 src_3, Operation op) {
|
||||||
|
return op(src_1, src_2, src_3);
|
||||||
|
}
|
||||||
|
|
||||||
template <class Out, class Input1, class Input2, class Operation>
|
template <class Out, class Input1, class Input2, class Operation>
|
||||||
Out binary(Input1 src_1, Input2 src_2, Operation op) {
|
Out binary(Input1 src_1, Input2 src_2, Operation op) {
|
||||||
return op(src_1, src_2);
|
return op(src_1, src_2);
|
||||||
@ -178,6 +183,7 @@ class Grid_simd {
|
|||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) * (*r);
|
*y = (*l) * (*r);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend inline void sub(Grid_simd *__restrict__ y,
|
friend inline void sub(Grid_simd *__restrict__ y,
|
||||||
const Grid_simd *__restrict__ l,
|
const Grid_simd *__restrict__ l,
|
||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
@ -188,7 +194,6 @@ class Grid_simd {
|
|||||||
const Grid_simd *__restrict__ r) {
|
const Grid_simd *__restrict__ r) {
|
||||||
*y = (*l) + (*r);
|
*y = (*l) + (*r);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend inline void mac(Grid_simd *__restrict__ y,
|
friend inline void mac(Grid_simd *__restrict__ y,
|
||||||
const Scalar_type *__restrict__ a,
|
const Scalar_type *__restrict__ a,
|
||||||
const Grid_simd *__restrict__ x) {
|
const Grid_simd *__restrict__ x) {
|
||||||
@ -260,7 +265,7 @@ class Grid_simd {
|
|||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// opreator scalar * simd
|
// operator scalar * simd
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
friend inline Grid_simd operator*(const Scalar_type &a, Grid_simd b) {
|
friend inline Grid_simd operator*(const Scalar_type &a, Grid_simd b) {
|
||||||
Grid_simd va;
|
Grid_simd va;
|
||||||
@ -433,6 +438,11 @@ inline void vbroadcast(Grid_simd<S,V> &ret,const Grid_simd<S,V> &src,int lane){
|
|||||||
S* typepun =(S*) &src;
|
S* typepun =(S*) &src;
|
||||||
vsplat(ret,typepun[lane]);
|
vsplat(ret,typepun[lane]);
|
||||||
}
|
}
|
||||||
|
template <class S, class V, IfComplex<S> =0>
|
||||||
|
inline void rbroadcast(Grid_simd<S,V> &ret,const Grid_simd<S,V> &src,int lane){
|
||||||
|
S* typepun =(S*) &src;
|
||||||
|
ret.v = unary<V>(real(typepun[lane]), VsplatSIMD());
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// Splat
|
// Splat
|
||||||
@ -449,6 +459,10 @@ template <class S, class V>
|
|||||||
inline void vsplat(Grid_simd<S, V> &ret, EnableIf<is_complex<S>, S> c) {
|
inline void vsplat(Grid_simd<S, V> &ret, EnableIf<is_complex<S>, S> c) {
|
||||||
vsplat(ret, real(c), imag(c));
|
vsplat(ret, real(c), imag(c));
|
||||||
}
|
}
|
||||||
|
template <class S, class V>
|
||||||
|
inline void rsplat(Grid_simd<S, V> &ret, EnableIf<is_complex<S>, S> c) {
|
||||||
|
vsplat(ret, real(c), real(c));
|
||||||
|
}
|
||||||
|
|
||||||
// if real fill with a, if complex fill with a in the real part (first function
|
// if real fill with a, if complex fill with a in the real part (first function
|
||||||
// above)
|
// above)
|
||||||
@ -550,6 +564,21 @@ inline Grid_simd<S, V> operator-(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
|||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Distinguish between complex types and others
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
inline Grid_simd<S, V> real_mult(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
|
ret.v = binary<V>(a.v, b.v, MultRealPartSIMD());
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
|
inline Grid_simd<S, V> real_madd(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S,V> c) {
|
||||||
|
Grid_simd<S, V> ret;
|
||||||
|
ret.v = trinary<V>(a.v, b.v, c.v, MaddRealPartSIMD());
|
||||||
|
return ret;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// Distinguish between complex types and others
|
// Distinguish between complex types and others
|
||||||
template <class S, class V, IfComplex<S> = 0>
|
template <class S, class V, IfComplex<S> = 0>
|
||||||
inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
|
||||||
|
@ -99,6 +99,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define VBCASTIDUPd(OFF,A,DEST) "vbroadcastsd (" #OFF "*16+8)(" #A ")," #DEST ";\n"
|
#define VBCASTIDUPd(OFF,A,DEST) "vbroadcastsd (" #OFF "*16+8)(" #A ")," #DEST ";\n"
|
||||||
#define VBCASTRDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +0)(" #PTR "), " #DEST ";\n"
|
#define VBCASTRDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +0)(" #PTR "), " #DEST ";\n"
|
||||||
#define VBCASTIDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +4)(" #PTR "), " #DEST ";\n"
|
#define VBCASTIDUPf(OFF,PTR,DEST) "vbroadcastss (" #OFF "*8 +4)(" #PTR "), " #DEST ";\n"
|
||||||
|
#define VBCASTCDUPf(OFF,A,DEST) "vbroadcastsd (" #OFF "*64 )(" #A ")," #DEST ";\n"
|
||||||
|
#define VBCASTZDUPf(OFF,A,DEST) "vbroadcastf32x4 (" #OFF "*64 )(" #A ")," #DEST ";\n"
|
||||||
|
#define VBCASTCDUP(OFF,A,DEST) VBCASTCDUPf(OFF,A,DEST)
|
||||||
|
#define VBCASTZDUP(OFF,A,DEST) VBCASTZDUPf(OFF,A,DEST)
|
||||||
|
|
||||||
#define VMADDSUBf(A,B,accum) "vfmaddsub231ps " #A "," #B "," #accum ";\n"
|
#define VMADDSUBf(A,B,accum) "vfmaddsub231ps " #A "," #B "," #accum ";\n"
|
||||||
#define VMADDSUBd(A,B,accum) "vfmaddsub231pd " #A "," #B "," #accum ";\n"
|
#define VMADDSUBd(A,B,accum) "vfmaddsub231pd " #A "," #B "," #accum ";\n"
|
||||||
@ -106,11 +110,15 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define VMADDSUBMEMd(O,P,B,accum) "vfmaddsub231pd " #O"*64("#P "),"#B "," #accum ";\n"
|
#define VMADDSUBMEMd(O,P,B,accum) "vfmaddsub231pd " #O"*64("#P "),"#B "," #accum ";\n"
|
||||||
|
|
||||||
|
|
||||||
|
#define VMADDRDUPf(O,P,B,accum) "vfmadd231ps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n"
|
||||||
|
#define VMADDIDUPf(O,P,B,accum) "vfmadd231ps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n"
|
||||||
#define VMADDSUBRDUPf(O,P,B,accum) "vfmaddsub231ps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n"
|
#define VMADDSUBRDUPf(O,P,B,accum) "vfmaddsub231ps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n"
|
||||||
#define VMADDSUBIDUPf(O,P,B,accum) "vfmaddsub231ps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n"
|
#define VMADDSUBIDUPf(O,P,B,accum) "vfmaddsub231ps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n"
|
||||||
#define VMULRDUPf(O,P,B,accum) "vmulps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n"
|
#define VMULRDUPf(O,P,B,accum) "vmulps (" #O"*8+0)("#P "){1to16},"#B "," #accum ";\n"
|
||||||
#define VMULIDUPf(O,P,B,accum) "vmulps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n"
|
#define VMULIDUPf(O,P,B,accum) "vmulps (" #O"*8+4)("#P "){1to16},"#B "," #accum ";\n"
|
||||||
|
|
||||||
|
#define VMADDRDUPd(O,P,B,accum) "vfmadd231pd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
|
#define VMADDIDUPd(O,P,B,accum) "vfmadd231pd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
#define VMADDSUBRDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n"
|
#define VMADDSUBRDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
#define VMADDSUBIDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n"
|
#define VMADDSUBIDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
#define VMULRDUPd(O,P,B,accum) "vmulpd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n"
|
#define VMULRDUPd(O,P,B,accum) "vmulpd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
|
@ -87,7 +87,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
VACCTIMESMINUSI1d(A,ACC,tmp) \
|
VACCTIMESMINUSI1d(A,ACC,tmp) \
|
||||||
VACCTIMESMINUSI2d(A,ACC,tmp)
|
VACCTIMESMINUSI2d(A,ACC,tmp)
|
||||||
|
|
||||||
#define LOAD64i(A,ptr) __asm__ ( "movq %0, %" #A : : "r"(ptr) : #A );
|
#define LOAD64a(A,ptr) "movq %0, %" #A : : "r"(ptr) : #A
|
||||||
|
#define LOAD64i(A,ptr) __asm__ ( LOAD64a(A,ptr));
|
||||||
#define LOAD64(A,ptr) LOAD64i(A,ptr)
|
#define LOAD64(A,ptr) LOAD64i(A,ptr)
|
||||||
|
|
||||||
#define VMOVf(A,DEST) "vmovaps " #A ", " #DEST ";\n"
|
#define VMOVf(A,DEST) "vmovaps " #A ", " #DEST ";\n"
|
||||||
@ -108,8 +109,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
//"vprefetche0 "#O"*64("#A");\n" "vprefetche1 ("#O"+12)*64("#A");\n"
|
//"vprefetche0 "#O"*64("#A");\n" "vprefetche1 ("#O"+12)*64("#A");\n"
|
||||||
// "clevict0 "#O"*64("#A");\n"
|
// "clevict0 "#O"*64("#A");\n"
|
||||||
|
|
||||||
#define VLOADf(OFF,PTR,DEST) "vmovaps " #OFF "*64(" #PTR "), " #DEST ";\n"
|
#define VLOADf(OFF,PTR,DEST) "vmovups " #OFF "*64(" #PTR "), " #DEST ";\n"
|
||||||
#define VLOADd(OFF,PTR,DEST) "vmovapd " #OFF "*64(" #PTR "), " #DEST ";\n"
|
#define VLOADd(OFF,PTR,DEST) "vmovupd " #OFF "*64(" #PTR "), " #DEST ";\n"
|
||||||
|
|
||||||
#define VADDf(A,B,DEST) "vaddps " #A "," #B "," #DEST ";\n"
|
#define VADDf(A,B,DEST) "vaddps " #A "," #B "," #DEST ";\n"
|
||||||
#define VADDd(A,B,DEST) "vaddpd " #A "," #B "," #DEST ";\n"
|
#define VADDd(A,B,DEST) "vaddpd " #A "," #B "," #DEST ";\n"
|
||||||
@ -143,8 +144,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define VSTOREf(OFF,PTR,SRC) "vmovntps " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
#define VSTOREf(OFF,PTR,SRC) "vmovntps " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
||||||
#define VSTOREd(OFF,PTR,SRC) "vmovntpd " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
#define VSTOREd(OFF,PTR,SRC) "vmovntpd " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
||||||
#else
|
#else
|
||||||
#define VSTOREf(OFF,PTR,SRC) "vmovaps " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
#define VSTOREf(OFF,PTR,SRC) "vmovups " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
||||||
#define VSTOREd(OFF,PTR,SRC) "vmovapd " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
#define VSTOREd(OFF,PTR,SRC) "vmovupd " #SRC "," #OFF "*64(" #PTR ")" ";\n"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Swaps Re/Im ; could unify this with IMCI
|
// Swaps Re/Im ; could unify this with IMCI
|
||||||
|
@ -144,10 +144,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define VMADDSUBMEM(O,P,B,accum) VMADDSUBMEMd(O,P,B,accum)
|
#define VMADDSUBMEM(O,P,B,accum) VMADDSUBMEMd(O,P,B,accum)
|
||||||
#define VMADDMEM(O,P,B,accum) VMADDMEMd(O,P,B,accum)
|
#define VMADDMEM(O,P,B,accum) VMADDMEMd(O,P,B,accum)
|
||||||
#define VMULMEM(O,P,B,accum) VMULMEMd(O,P,B,accum)
|
#define VMULMEM(O,P,B,accum) VMULMEMd(O,P,B,accum)
|
||||||
|
#undef VMADDRDUP
|
||||||
#undef VMADDSUBRDUP
|
#undef VMADDSUBRDUP
|
||||||
#undef VMADDSUBIDUP
|
#undef VMADDSUBIDUP
|
||||||
#undef VMULRDUP
|
#undef VMULRDUP
|
||||||
#undef VMULIDUP
|
#undef VMULIDUP
|
||||||
|
#define VMADDRDUP(O,P,B,accum) VMADDRDUPd(O,P,B,accum)
|
||||||
#define VMADDSUBRDUP(O,P,B,accum) VMADDSUBRDUPd(O,P,B,accum)
|
#define VMADDSUBRDUP(O,P,B,accum) VMADDSUBRDUPd(O,P,B,accum)
|
||||||
#define VMADDSUBIDUP(O,P,B,accum) VMADDSUBIDUPd(O,P,B,accum)
|
#define VMADDSUBIDUP(O,P,B,accum) VMADDSUBIDUPd(O,P,B,accum)
|
||||||
#define VMULRDUP(O,P,B,accum) VMULRDUPd(O,P,B,accum)
|
#define VMULRDUP(O,P,B,accum) VMULRDUPd(O,P,B,accum)
|
||||||
|
@ -144,10 +144,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define VMADDMEM(O,P,B,accum) VMADDMEMf(O,P,B,accum)
|
#define VMADDMEM(O,P,B,accum) VMADDMEMf(O,P,B,accum)
|
||||||
#define VMULMEM(O,P,B,accum) VMULMEMf(O,P,B,accum)
|
#define VMULMEM(O,P,B,accum) VMULMEMf(O,P,B,accum)
|
||||||
|
|
||||||
|
#undef VMADDRDUP
|
||||||
#undef VMADDSUBRDUP
|
#undef VMADDSUBRDUP
|
||||||
#undef VMADDSUBIDUP
|
#undef VMADDSUBIDUP
|
||||||
#undef VMULRDUP
|
#undef VMULRDUP
|
||||||
#undef VMULIDUP
|
#undef VMULIDUP
|
||||||
|
#define VMADDRDUP(O,P,B,accum) VMADDRDUPf(O,P,B,accum)
|
||||||
#define VMADDSUBRDUP(O,P,B,accum) VMADDSUBRDUPf(O,P,B,accum)
|
#define VMADDSUBRDUP(O,P,B,accum) VMADDSUBRDUPf(O,P,B,accum)
|
||||||
#define VMADDSUBIDUP(O,P,B,accum) VMADDSUBIDUPf(O,P,B,accum)
|
#define VMADDSUBIDUP(O,P,B,accum) VMADDSUBIDUPf(O,P,B,accum)
|
||||||
#define VMULRDUP(O,P,B,accum) VMULRDUPf(O,P,B,accum)
|
#define VMULRDUP(O,P,B,accum) VMULRDUPf(O,P,B,accum)
|
||||||
|
@ -4,9 +4,8 @@ home=`pwd`
|
|||||||
|
|
||||||
# library Make.inc
|
# library Make.inc
|
||||||
cd $home/lib
|
cd $home/lib
|
||||||
HFILES=`find . -type f -name '*.h' -not -path '*/Old/*' -not -path '*/Eigen/*'`
|
HFILES=`find . -type f -name '*.h' -not -name '*Hdf5*' -not -path '*/Old/*' -not -path '*/Eigen/*'`
|
||||||
HFILES="$HFILES"
|
CCFILES=`find . -type f -name '*.cc' -not -name '*Communicator*.cc' -not -name '*Hdf5*'`
|
||||||
CCFILES=`find . -type f -name '*.cc' -not -name '*ommunicator*.cc'`
|
|
||||||
echo HFILES=$HFILES > Make.inc
|
echo HFILES=$HFILES > Make.inc
|
||||||
echo >> Make.inc
|
echo >> Make.inc
|
||||||
echo CCFILES=$CCFILES >> Make.inc
|
echo CCFILES=$CCFILES >> Make.inc
|
||||||
|
@ -28,13 +28,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
using namespace Grid;
|
||||||
|
|
||||||
GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3);
|
GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3);
|
||||||
|
|
||||||
class myclass: Serializable {
|
class myclass: Serializable {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
|
||||||
myenum, e,
|
myenum, e,
|
||||||
std::vector<myenum>, ve,
|
std::vector<myenum>, ve,
|
||||||
@ -44,11 +43,14 @@ namespace Grid {
|
|||||||
bool , b,
|
bool , b,
|
||||||
std::vector<double>, array,
|
std::vector<double>, array,
|
||||||
std::vector<std::vector<double>>, twodimarray,
|
std::vector<std::vector<double>>, twodimarray,
|
||||||
|
std::vector<std::vector<std::vector<Complex>>>, cmplx3darray
|
||||||
);
|
);
|
||||||
|
|
||||||
myclass() {}
|
myclass() {}
|
||||||
myclass(int i)
|
myclass(int i)
|
||||||
: array(4,5.1), twodimarray(3,std::vector<double>(2,1.23456)), ve(2, myenum::blue)
|
: array(4,5.1)
|
||||||
|
, twodimarray(3,std::vector<double>(5, 1.23456))
|
||||||
|
, cmplx3darray(3,std::vector<std::vector<Complex>>(5, std::vector<Complex>(7, Complex(1.2, 3.4))))
|
||||||
|
, ve(2, myenum::blue)
|
||||||
{
|
{
|
||||||
e=myenum::red;
|
e=myenum::red;
|
||||||
x=i;
|
x=i;
|
||||||
@ -58,10 +60,6 @@ namespace Grid {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
using namespace Grid;
|
|
||||||
|
|
||||||
int16_t i16 = 1;
|
int16_t i16 = 1;
|
||||||
uint16_t u16 = 2;
|
uint16_t u16 = 2;
|
||||||
int32_t i32 = 3;
|
int32_t i32 = 3;
|
||||||
@ -72,12 +70,34 @@ float f = M_PI;
|
|||||||
double d = 2*M_PI;
|
double d = 2*M_PI;
|
||||||
bool b = false;
|
bool b = false;
|
||||||
|
|
||||||
|
template <typename W, typename R, typename O>
|
||||||
|
void ioTest(const std::string &filename, const O &object, const std::string &name)
|
||||||
|
{
|
||||||
|
// writer needs to be destroyed so that writing physically happens
|
||||||
|
{
|
||||||
|
W writer(filename);
|
||||||
|
|
||||||
|
write(writer, "testobject", object);
|
||||||
|
}
|
||||||
|
|
||||||
|
R reader(filename);
|
||||||
|
O buf;
|
||||||
|
bool good;
|
||||||
|
|
||||||
|
read(reader, "testobject", buf);
|
||||||
|
good = (object == buf);
|
||||||
|
std::cout << name << " IO test: " << (good ? "success" : "failure");
|
||||||
|
std::cout << std::endl;
|
||||||
|
if (!good) exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc,char **argv)
|
int main(int argc,char **argv)
|
||||||
{
|
{
|
||||||
{
|
std::cout << "==== basic IO" << std::endl;
|
||||||
XmlWriter WR("bother.xml");
|
XmlWriter WR("bother.xml");
|
||||||
|
|
||||||
// test basic type writing
|
// test basic type writing
|
||||||
|
std::cout << "-- basic writing to 'bother.xml'..." << std::endl;
|
||||||
push(WR,"BasicTypes");
|
push(WR,"BasicTypes");
|
||||||
write(WR,std::string("i16"),i16);
|
write(WR,std::string("i16"),i16);
|
||||||
write(WR,"u16",u16);
|
write(WR,"u16",u16);
|
||||||
@ -92,66 +112,68 @@ int main(int argc,char **argv)
|
|||||||
|
|
||||||
// test serializable class writing
|
// test serializable class writing
|
||||||
myclass obj(1234); // non-trivial constructor
|
myclass obj(1234); // non-trivial constructor
|
||||||
|
std::vector<myclass> vec;
|
||||||
|
|
||||||
|
std::cout << "-- serialisable class writing to 'bother.xml'..." << std::endl;
|
||||||
write(WR,"obj",obj);
|
write(WR,"obj",obj);
|
||||||
WR.write("obj2", obj);
|
WR.write("obj2", obj);
|
||||||
std::cout << obj << std::endl;
|
|
||||||
|
|
||||||
std::vector<myclass> vec;
|
|
||||||
vec.push_back(myclass(1234));
|
vec.push_back(myclass(1234));
|
||||||
vec.push_back(myclass(5678));
|
vec.push_back(myclass(5678));
|
||||||
vec.push_back(myclass(3838));
|
vec.push_back(myclass(3838));
|
||||||
write(WR, "objvec", vec);
|
write(WR, "objvec", vec);
|
||||||
};
|
std::cout << "-- serialisable class writing to std::cout:" << std::endl;
|
||||||
|
std::cout << obj << std::endl;
|
||||||
|
std::cout << "-- serialisable class comparison:" << std::endl;
|
||||||
|
std::cout << "vec[0] == obj: " << ((vec[0] == obj) ? "true" : "false") << std::endl;
|
||||||
|
std::cout << "vec[1] == obj: " << ((vec[1] == obj) ? "true" : "false") << std::endl;
|
||||||
|
|
||||||
// read tests
|
// read tests
|
||||||
myclass copy1, copy2, copy3;
|
std::cout << "\n==== IO self-consistency tests" << std::endl;
|
||||||
std::vector<myclass> veccopy1, veccopy2, veccopy3;
|
|
||||||
//// XML
|
//// XML
|
||||||
{
|
ioTest<XmlWriter, XmlReader>("iotest.xml", obj, "XML (object) ");
|
||||||
XmlReader RD("bother.xml");
|
ioTest<XmlWriter, XmlReader>("iotest.xml", vec, "XML (vector of objects)");
|
||||||
read(RD,"obj",copy1);
|
|
||||||
read(RD,"objvec", veccopy1);
|
|
||||||
std::cout << "Loaded (XML) -----------------" << std::endl;
|
|
||||||
std::cout << copy1 << std::endl << veccopy1 << std::endl;
|
|
||||||
}
|
|
||||||
//// binary
|
//// binary
|
||||||
{
|
ioTest<BinaryWriter, BinaryReader>("iotest.bin", obj, "binary (object) ");
|
||||||
BinaryWriter BWR("bother.bin");
|
ioTest<BinaryWriter, BinaryReader>("iotest.bin", vec, "binary (vector of objects)");
|
||||||
write(BWR,"discard",copy1 );
|
|
||||||
write(BWR,"discard",veccopy1 );
|
|
||||||
}
|
|
||||||
{
|
|
||||||
BinaryReader BRD("bother.bin");
|
|
||||||
read (BRD,"discard",copy2 );
|
|
||||||
read (BRD,"discard",veccopy2 );
|
|
||||||
std::cout << "Loaded (bin) -----------------" << std::endl;
|
|
||||||
std::cout << copy2 << std::endl << veccopy2 << std::endl;
|
|
||||||
}
|
|
||||||
//// text
|
//// text
|
||||||
{
|
ioTest<TextWriter, TextReader>("iotest.dat", obj, "text (object) ");
|
||||||
TextWriter TWR("bother.txt");
|
ioTest<TextWriter, TextReader>("iotest.dat", vec, "text (vector of objects)");
|
||||||
write(TWR,"discard",copy1 );
|
//// HDF5
|
||||||
write(TWR,"discard",veccopy1 );
|
#ifdef HAVE_HDF5
|
||||||
}
|
ioTest<Hdf5Writer, Hdf5Reader>("iotest.h5", obj, "HDF5 (object) ");
|
||||||
{
|
ioTest<Hdf5Writer, Hdf5Reader>("iotest.h5", vec, "HDF5 (vector of objects)");
|
||||||
TextReader TRD("bother.txt");
|
#endif
|
||||||
read (TRD,"discard",copy3 );
|
|
||||||
read (TRD,"discard",veccopy3 );
|
|
||||||
std::cout << "Loaded (txt) -----------------" << std::endl;
|
|
||||||
std::cout << copy3 << std::endl << veccopy3 << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> iv = strToVec<int>("1 2 2 4");
|
std::cout << "\n==== vector flattening/reconstruction" << std::endl;
|
||||||
std::vector<std::string> sv = strToVec<std::string>("bli bla blu");
|
typedef std::vector<std::vector<std::vector<double>>> vec3d;
|
||||||
|
|
||||||
for (auto &e: iv)
|
vec3d dv, buf;
|
||||||
|
double d = 0.;
|
||||||
|
|
||||||
|
dv.resize(4);
|
||||||
|
for (auto &v1: dv)
|
||||||
{
|
{
|
||||||
std::cout << e << " ";
|
v1.resize(3);
|
||||||
}
|
for (auto &v2: v1)
|
||||||
std::cout << std::endl;
|
|
||||||
for (auto &e: sv)
|
|
||||||
{
|
{
|
||||||
std::cout << e << " ";
|
v2.resize(5);
|
||||||
|
for (auto &x: v2)
|
||||||
|
{
|
||||||
|
x = d++;
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
}
|
||||||
|
}
|
||||||
|
std::cout << "original 3D vector:" << std::endl;
|
||||||
|
std::cout << dv << std::endl;
|
||||||
|
|
||||||
|
Flatten<vec3d> flatdv(dv);
|
||||||
|
|
||||||
|
std::cout << "\ndimensions:" << std::endl;
|
||||||
|
std::cout << flatdv.getDim() << std::endl;
|
||||||
|
std::cout << "\nflattened vector:" << std::endl;
|
||||||
|
std::cout << flatdv.getFlatVector() << std::endl;
|
||||||
|
|
||||||
|
Reconstruct<vec3d> rec(flatdv.getFlatVector(), flatdv.getDim());
|
||||||
|
std::cout << "\nreconstructed vector:" << std::endl;
|
||||||
|
std::cout << flatdv.getVector() << std::endl;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user