mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
b74940b3d4
@ -487,7 +487,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
|||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
|
@ -61,9 +61,9 @@ Group & Hdf5Writer::getGroup(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Reader implementation ///////////////////////////////////////////////////////
|
// Reader implementation ///////////////////////////////////////////////////////
|
||||||
Hdf5Reader::Hdf5Reader(const std::string &fileName)
|
Hdf5Reader::Hdf5Reader(const std::string &fileName, const bool readOnly)
|
||||||
: fileName_(fileName)
|
: fileName_(fileName)
|
||||||
, file_(fileName.c_str(), H5F_ACC_RDWR)
|
, file_(fileName.c_str(), readOnly ? H5F_ACC_RDONLY : H5F_ACC_RDWR)
|
||||||
{
|
{
|
||||||
group_ = file_.openGroup("/");
|
group_ = file_.openGroup("/");
|
||||||
readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold",
|
readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold",
|
||||||
|
@ -54,7 +54,7 @@ namespace Grid
|
|||||||
class Hdf5Reader: public Reader<Hdf5Reader>
|
class Hdf5Reader: public Reader<Hdf5Reader>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Hdf5Reader(const std::string &fileName);
|
Hdf5Reader(const std::string &fileName, const bool readOnly = true);
|
||||||
virtual ~Hdf5Reader(void) = default;
|
virtual ~Hdf5Reader(void) = default;
|
||||||
bool push(const std::string &s);
|
bool push(const std::string &s);
|
||||||
void pop(void);
|
void pop(void);
|
||||||
|
@ -47,6 +47,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#else
|
#else
|
||||||
#define PARALLEL_FOR_LOOP
|
#define PARALLEL_FOR_LOOP
|
||||||
#define PARALLEL_FOR_LOOP_INTERN
|
#define PARALLEL_FOR_LOOP_INTERN
|
||||||
|
#define PARALLEL_FOR_LOOP_REDUCE(op, var)
|
||||||
#define PARALLEL_NESTED_LOOP2
|
#define PARALLEL_NESTED_LOOP2
|
||||||
#define PARALLEL_NESTED_LOOP5
|
#define PARALLEL_NESTED_LOOP5
|
||||||
#define PARALLEL_REGION
|
#define PARALLEL_REGION
|
||||||
@ -58,6 +59,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
|
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
|
||||||
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
|
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
|
||||||
#define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for
|
#define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for
|
||||||
|
#define parallel_critical PARALLEL_CRITICAL
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -28,16 +28,31 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
}
|
}
|
||||||
|
#ifdef USE_IPP
|
||||||
|
#include "ipp.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
class GridChecksum
|
class GridChecksum
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static inline uint32_t crc32(void *data,size_t bytes)
|
static inline uint32_t crc32(const void *data, size_t bytes)
|
||||||
{
|
{
|
||||||
return ::crc32(0L,(unsigned char *)data,bytes);
|
return ::crc32(0L,(unsigned char *)data,bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_IPP
|
||||||
|
static inline uint32_t crc32c(const void* data, size_t bytes)
|
||||||
|
{
|
||||||
|
uint32_t crc32c = ~(uint32_t)0;
|
||||||
|
ippsCRC32C_8u(reinterpret_cast<const unsigned char *>(data), bytes, &crc32c);
|
||||||
|
ippsSwapBytes_32u_I(&crc32c, 1);
|
||||||
|
|
||||||
|
return ~crc32c;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline std::string sha256_string(const std::vector<T> &hash)
|
static inline std::string sha256_string(const std::vector<T> &hash)
|
||||||
{
|
{
|
||||||
|
@ -32,11 +32,19 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/Global.hpp>
|
#include <Hadrons/Global.hpp>
|
||||||
#include <Hadrons/TimerArray.hpp>
|
#include <Hadrons/TimerArray.hpp>
|
||||||
#include <Grid/Eigen/unsupported/CXX11/Tensor>
|
#include <Grid/Eigen/unsupported/CXX11/Tensor>
|
||||||
|
#ifdef USE_MKL
|
||||||
|
#include "mkl.h"
|
||||||
|
#include "mkl_cblas.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef HADRONS_A2AM_NAME
|
#ifndef HADRONS_A2AM_NAME
|
||||||
#define HADRONS_A2AM_NAME "a2aMatrix"
|
#define HADRONS_A2AM_NAME "a2aMatrix"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef HADRONS_A2AM_IO_TYPE
|
||||||
|
#define HADRONS_A2AM_IO_TYPE ComplexF
|
||||||
|
#endif
|
||||||
|
|
||||||
#define HADRONS_A2AM_PARALLEL_IO
|
#define HADRONS_A2AM_PARALLEL_IO
|
||||||
|
|
||||||
BEGIN_HADRONS_NAMESPACE
|
BEGIN_HADRONS_NAMESPACE
|
||||||
@ -51,6 +59,12 @@ BEGIN_HADRONS_NAMESPACE
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
|
using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
* Abstract class for A2A kernels *
|
* Abstract class for A2A kernels *
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
@ -76,10 +90,15 @@ public:
|
|||||||
// constructors
|
// constructors
|
||||||
A2AMatrixIo(void) = default;
|
A2AMatrixIo(void) = default;
|
||||||
A2AMatrixIo(std::string filename, std::string dataname,
|
A2AMatrixIo(std::string filename, std::string dataname,
|
||||||
const unsigned int nt, const unsigned int ni,
|
const unsigned int nt, const unsigned int ni = 0,
|
||||||
const unsigned int nj);
|
const unsigned int nj = 0);
|
||||||
// destructor
|
// destructor
|
||||||
~A2AMatrixIo(void) = default;
|
~A2AMatrixIo(void) = default;
|
||||||
|
// access
|
||||||
|
unsigned int getNi(void) const;
|
||||||
|
unsigned int getNj(void) const;
|
||||||
|
unsigned int getNt(void) const;
|
||||||
|
size_t getSize(void) const;
|
||||||
// file allocation
|
// file allocation
|
||||||
template <typename MetadataType>
|
template <typename MetadataType>
|
||||||
void initFile(const MetadataType &d, const unsigned int chunkSize);
|
void initFile(const MetadataType &d, const unsigned int chunkSize);
|
||||||
@ -88,9 +107,11 @@ public:
|
|||||||
const unsigned int blockSizei, const unsigned int blockSizej);
|
const unsigned int blockSizei, const unsigned int blockSizej);
|
||||||
void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
|
void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
|
||||||
const unsigned int i, const unsigned int j);
|
const unsigned int i, const unsigned int j);
|
||||||
|
template <template <class> class Vec, typename VecT>
|
||||||
|
void load(Vec<VecT> &v, double *tRead = nullptr);
|
||||||
private:
|
private:
|
||||||
std::string filename_, dataname_;
|
std::string filename_{""}, dataname_{""};
|
||||||
unsigned int nt_, ni_, nj_;
|
unsigned int nt_{0}, ni_{0}, nj_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
@ -136,6 +157,226 @@ private:
|
|||||||
std::vector<IoHelper> nodeIo_;
|
std::vector<IoHelper> nodeIo_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
* A2A matrix contraction kernels *
|
||||||
|
******************************************************************************/
|
||||||
|
class A2AContraction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
// accTrMul(acc, a, b): acc += tr(a*b)
|
||||||
|
template <typename C, typename MatLeft, typename MatRight>
|
||||||
|
static inline void accTrMul(C &acc, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
if ((MatLeft::Options == Eigen::RowMajor) and
|
||||||
|
(MatRight::Options == Eigen::ColMajor))
|
||||||
|
{
|
||||||
|
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
|
||||||
|
{
|
||||||
|
C tmp;
|
||||||
|
#ifdef USE_MKL
|
||||||
|
dotuRow(tmp, r, a, b);
|
||||||
|
#else
|
||||||
|
tmp = a.row(r).conjugate().dot(b.col(r));
|
||||||
|
#endif
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
acc += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
|
||||||
|
{
|
||||||
|
C tmp;
|
||||||
|
#ifdef USE_MKL
|
||||||
|
dotuCol(tmp, c, a, b);
|
||||||
|
#else
|
||||||
|
tmp = a.col(c).conjugate().dot(b.row(c));
|
||||||
|
#endif
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
acc += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline double accTrMulFlops(const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
double n = a.rows()*a.cols();
|
||||||
|
|
||||||
|
return 8.*n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// mul(res, a, b): res = a*b
|
||||||
|
#ifdef USE_MKL
|
||||||
|
template <template <class, int...> class Mat, int... Opts>
|
||||||
|
static inline void mul(Mat<ComplexD, Opts...> &res,
|
||||||
|
const Mat<ComplexD, Opts...> &a,
|
||||||
|
const Mat<ComplexD, Opts...> &b)
|
||||||
|
{
|
||||||
|
static const ComplexD one(1., 0.), zero(0., 0.);
|
||||||
|
|
||||||
|
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
|
||||||
|
{
|
||||||
|
res.resize(a.rows(), b.cols());
|
||||||
|
}
|
||||||
|
if (Mat<ComplexD, Opts...>::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
|
||||||
|
res.data(), res.cols());
|
||||||
|
}
|
||||||
|
else if (Mat<ComplexD, Opts...>::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
|
||||||
|
res.data(), res.rows());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <template <class, int...> class Mat, int... Opts>
|
||||||
|
static inline void mul(Mat<ComplexF, Opts...> &res,
|
||||||
|
const Mat<ComplexF, Opts...> &a,
|
||||||
|
const Mat<ComplexF, Opts...> &b)
|
||||||
|
{
|
||||||
|
static const ComplexF one(1., 0.), zero(0., 0.);
|
||||||
|
|
||||||
|
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
|
||||||
|
{
|
||||||
|
res.resize(a.rows(), b.cols());
|
||||||
|
}
|
||||||
|
if (Mat<ComplexF, Opts...>::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
|
||||||
|
res.data(), res.cols());
|
||||||
|
}
|
||||||
|
else if (Mat<ComplexF, Opts...>::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
|
||||||
|
res.data(), res.rows());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <typename Mat>
|
||||||
|
static inline void mul(Mat &res, const Mat &a, const Mat &b)
|
||||||
|
{
|
||||||
|
res = a*b;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
template <typename Mat>
|
||||||
|
static inline double mulFlops(const Mat &a, const Mat &b)
|
||||||
|
{
|
||||||
|
double nr = a.rows(), nc = a.cols();
|
||||||
|
|
||||||
|
return nr*nr*(6.*nc + 2.*(nc - 1.));
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
template <typename C, typename MatLeft, typename MatRight>
|
||||||
|
static inline void makeDotRowPt(C * &aPt, unsigned int &aInc, C * &bPt,
|
||||||
|
unsigned int &bInc, const unsigned int aRow,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
if (MatLeft::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aRow*a.cols();
|
||||||
|
aInc = 1;
|
||||||
|
}
|
||||||
|
else if (MatLeft::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aRow;
|
||||||
|
aInc = a.rows();
|
||||||
|
}
|
||||||
|
if (MatRight::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aRow;
|
||||||
|
bInc = b.cols();
|
||||||
|
}
|
||||||
|
else if (MatRight::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aRow*b.rows();
|
||||||
|
bInc = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_MKL
|
||||||
|
template <typename C, typename MatLeft, typename MatRight>
|
||||||
|
static inline void makeDotColPt(C * &aPt, unsigned int &aInc, C * &bPt,
|
||||||
|
unsigned int &bInc, const unsigned int aCol,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
if (MatLeft::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aCol;
|
||||||
|
aInc = a.cols();
|
||||||
|
}
|
||||||
|
else if (MatLeft::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aCol*a.rows();
|
||||||
|
aInc = 1;
|
||||||
|
}
|
||||||
|
if (MatRight::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aCol*b.cols();
|
||||||
|
bInc = 1;
|
||||||
|
}
|
||||||
|
else if (MatRight::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aCol;
|
||||||
|
bInc = b.rows();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuRow(ComplexF &res, const unsigned int aRow,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexF *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
|
||||||
|
cblas_cdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuCol(ComplexF &res, const unsigned int aCol,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexF *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
|
||||||
|
cblas_cdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuRow(ComplexD &res, const unsigned int aRow,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexD *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
|
||||||
|
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuCol(ComplexD &res, const unsigned int aCol,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexD *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
|
||||||
|
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
* A2AMatrixIo template implementation *
|
* A2AMatrixIo template implementation *
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
@ -148,6 +389,31 @@ A2AMatrixIo<T>::A2AMatrixIo(std::string filename, std::string dataname,
|
|||||||
, nt_(nt), ni_(ni), nj_(nj)
|
, nt_(nt), ni_(ni), nj_(nj)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
// access //////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename T>
|
||||||
|
unsigned int A2AMatrixIo<T>::getNt(void) const
|
||||||
|
{
|
||||||
|
return nt_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int A2AMatrixIo<T>::getNi(void) const
|
||||||
|
{
|
||||||
|
return ni_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
unsigned int A2AMatrixIo<T>::getNj(void) const
|
||||||
|
{
|
||||||
|
return nj_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
size_t A2AMatrixIo<T>::getSize(void) const
|
||||||
|
{
|
||||||
|
return nt_*ni_*nj_*sizeof(T);
|
||||||
|
}
|
||||||
|
|
||||||
// file allocation /////////////////////////////////////////////////////////////
|
// file allocation /////////////////////////////////////////////////////////////
|
||||||
template <typename T>
|
template <typename T>
|
||||||
template <typename MetadataType>
|
template <typename MetadataType>
|
||||||
@ -171,7 +437,7 @@ void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSiz
|
|||||||
}
|
}
|
||||||
|
|
||||||
// create the dataset
|
// create the dataset
|
||||||
Hdf5Reader reader(filename_);
|
Hdf5Reader reader(filename_, false);
|
||||||
|
|
||||||
push(reader, dataname_);
|
push(reader, dataname_);
|
||||||
auto &group = reader.getGroup();
|
auto &group = reader.getGroup();
|
||||||
@ -191,7 +457,7 @@ void A2AMatrixIo<T>::saveBlock(const T *data,
|
|||||||
const unsigned int blockSizej)
|
const unsigned int blockSizej)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_HDF5
|
#ifdef HAVE_HDF5
|
||||||
Hdf5Reader reader(filename_);
|
Hdf5Reader reader(filename_, false);
|
||||||
std::vector<hsize_t> count = {nt_, blockSizei, blockSizej},
|
std::vector<hsize_t> count = {nt_, blockSizei, blockSizej},
|
||||||
offset = {0, static_cast<hsize_t>(i),
|
offset = {0, static_cast<hsize_t>(i),
|
||||||
static_cast<hsize_t>(j)},
|
static_cast<hsize_t>(j)},
|
||||||
@ -226,6 +492,82 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
|
|||||||
saveBlock(m.data() + offset, i, j, blockSizei, blockSizej);
|
saveBlock(m.data() + offset, i, j, blockSizei, blockSizej);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
template <template <class> class Vec, typename VecT>
|
||||||
|
void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_HDF5
|
||||||
|
Hdf5Reader reader(filename_);
|
||||||
|
std::vector<hsize_t> hdim;
|
||||||
|
H5NS::DataSet dataset;
|
||||||
|
H5NS::DataSpace dataspace;
|
||||||
|
H5NS::CompType datatype;
|
||||||
|
H5NS::DSetCreatPropList plist;
|
||||||
|
|
||||||
|
push(reader, dataname_);
|
||||||
|
auto &group = reader.getGroup();
|
||||||
|
dataset = group.openDataSet(HADRONS_A2AM_NAME);
|
||||||
|
datatype = dataset.getCompType();
|
||||||
|
dataspace = dataset.getSpace();
|
||||||
|
plist = dataset.getCreatePlist();
|
||||||
|
hdim.resize(dataspace.getSimpleExtentNdims());
|
||||||
|
dataspace.getSimpleExtentDims(hdim.data());
|
||||||
|
if ((nt_*ni_*nj_ != 0) and
|
||||||
|
((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_)))
|
||||||
|
{
|
||||||
|
HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got "
|
||||||
|
+ std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x"
|
||||||
|
+ std::to_string(hdim[2]) + ", expected "
|
||||||
|
+ std::to_string(nt_) + "x" + std::to_string(ni_) + "x"
|
||||||
|
+ std::to_string(nj_));
|
||||||
|
}
|
||||||
|
else if (ni_*nj_ == 0)
|
||||||
|
{
|
||||||
|
if (hdim[0] != nt_)
|
||||||
|
{
|
||||||
|
HADRONS_ERROR(Size, "all-to-all time size mismatch (got "
|
||||||
|
+ std::to_string(hdim[0]) + ", expected "
|
||||||
|
+ std::to_string(nt_) + ")");
|
||||||
|
}
|
||||||
|
ni_ = hdim[1];
|
||||||
|
nj_ = hdim[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
A2AMatrix<T> buf(ni_, nj_);
|
||||||
|
std::vector<hsize_t> count = {1, static_cast<hsize_t>(ni_),
|
||||||
|
static_cast<hsize_t>(nj_)},
|
||||||
|
stride = {1, 1, 1},
|
||||||
|
block = {1, 1, 1},
|
||||||
|
memCount = {static_cast<hsize_t>(ni_),
|
||||||
|
static_cast<hsize_t>(nj_)};
|
||||||
|
H5NS::DataSpace memspace(memCount.size(), memCount.data());
|
||||||
|
|
||||||
|
std::cout << "Loading timeslice";
|
||||||
|
std::cout.flush();
|
||||||
|
*tRead = 0.;
|
||||||
|
for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
|
||||||
|
{
|
||||||
|
unsigned int t = tp1 - 1;
|
||||||
|
std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
|
||||||
|
|
||||||
|
if (t % 10 == 0)
|
||||||
|
{
|
||||||
|
std::cout << " " << t;
|
||||||
|
std::cout.flush();
|
||||||
|
}
|
||||||
|
dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
|
||||||
|
stride.data(), block.data());
|
||||||
|
if (tRead) *tRead -= usecond();
|
||||||
|
dataset.read(buf.data(), datatype, memspace, dataspace);
|
||||||
|
if (tRead) *tRead += usecond();
|
||||||
|
v[t] = buf.template cast<VecT>();
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
#else
|
||||||
|
HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
* A2AMatrixBlockComputation template implementation *
|
* A2AMatrixBlockComputation template implementation *
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
@ -29,6 +29,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#define Hadrons_DiskVector_hpp_
|
#define Hadrons_DiskVector_hpp_
|
||||||
|
|
||||||
#include <Hadrons/Global.hpp>
|
#include <Hadrons/Global.hpp>
|
||||||
|
#include <Hadrons/A2AMatrix.hpp>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <ftw.h>
|
#include <ftw.h>
|
||||||
@ -59,14 +60,18 @@ public:
|
|||||||
: master_(master), cmaster_(master), i_(i) {}
|
: master_(master), cmaster_(master), i_(i) {}
|
||||||
|
|
||||||
// operator=: somebody is trying to store a vector element
|
// operator=: somebody is trying to store a vector element
|
||||||
// write to disk and cache
|
// write to cache and tag as modified
|
||||||
T &operator=(const T &obj) const
|
T &operator=(const T &obj) const
|
||||||
{
|
{
|
||||||
|
auto &cache = *master_.cachePtr_;
|
||||||
|
auto &modified = *master_.modifiedPtr_;
|
||||||
|
auto &index = *master_.indexPtr_;
|
||||||
|
|
||||||
DV_DEBUG_MSG(&master_, "writing to " << i_);
|
DV_DEBUG_MSG(&master_, "writing to " << i_);
|
||||||
master_.cacheInsert(i_, obj);
|
master_.cacheInsert(i_, obj);
|
||||||
master_.save(master_.filename(i_), obj);
|
modified[index.at(i_)] = true;
|
||||||
|
|
||||||
return master_.cachePtr_->at(i_);
|
return cache[index.at(i_)];
|
||||||
}
|
}
|
||||||
|
|
||||||
// implicit cast to const object reference and redirection
|
// implicit cast to const object reference and redirection
|
||||||
@ -83,6 +88,7 @@ public:
|
|||||||
public:
|
public:
|
||||||
DiskVectorBase(const std::string dirname, const unsigned int size = 0,
|
DiskVectorBase(const std::string dirname, const unsigned int size = 0,
|
||||||
const unsigned int cacheSize = 1, const bool clean = true);
|
const unsigned int cacheSize = 1, const bool clean = true);
|
||||||
|
DiskVectorBase(DiskVectorBase<T> &&v) = default;
|
||||||
virtual ~DiskVectorBase(void);
|
virtual ~DiskVectorBase(void);
|
||||||
const T & operator[](const unsigned int i) const;
|
const T & operator[](const unsigned int i) const;
|
||||||
RwAccessHelper operator[](const unsigned int i);
|
RwAccessHelper operator[](const unsigned int i);
|
||||||
@ -103,7 +109,10 @@ private:
|
|||||||
bool clean_;
|
bool clean_;
|
||||||
// using pointers to allow modifications when class is const
|
// using pointers to allow modifications when class is const
|
||||||
// semantic: const means data unmodified, but cache modification allowed
|
// semantic: const means data unmodified, but cache modification allowed
|
||||||
std::unique_ptr<std::map<unsigned int, T>> cachePtr_;
|
std::unique_ptr<std::vector<T>> cachePtr_;
|
||||||
|
std::unique_ptr<std::vector<bool>> modifiedPtr_;
|
||||||
|
std::unique_ptr<std::map<unsigned int, unsigned int>> indexPtr_;
|
||||||
|
std::unique_ptr<std::stack<unsigned int>> freePtr_;
|
||||||
std::unique_ptr<std::deque<unsigned int>> loadsPtr_;
|
std::unique_ptr<std::deque<unsigned int>> loadsPtr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -135,7 +144,7 @@ private:
|
|||||||
* Specialisation for Eigen matrices *
|
* Specialisation for Eigen matrices *
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using EigenDiskVectorMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>;
|
using EigenDiskVectorMat = A2AMatrix<T>;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>>
|
class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>>
|
||||||
@ -153,23 +162,30 @@ private:
|
|||||||
virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
|
virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
|
||||||
{
|
{
|
||||||
std::ifstream f(filename, std::ios::binary);
|
std::ifstream f(filename, std::ios::binary);
|
||||||
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
|
uint32_t crc, check;
|
||||||
Eigen::Index nRow, nCol;
|
Eigen::Index nRow, nCol;
|
||||||
size_t matSize;
|
size_t matSize;
|
||||||
double t;
|
double tRead, tHash;
|
||||||
|
|
||||||
f.read(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char));
|
f.read(reinterpret_cast<char *>(&crc), sizeof(crc));
|
||||||
f.read(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index));
|
f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow));
|
||||||
f.read(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index));
|
f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol));
|
||||||
obj.resize(nRow, nCol);
|
obj.resize(nRow, nCol);
|
||||||
matSize = nRow*nCol*sizeof(T);
|
matSize = nRow*nCol*sizeof(T);
|
||||||
t = -usecond();
|
tRead = -usecond();
|
||||||
f.read(reinterpret_cast<char *>(obj.data()), matSize);
|
f.read(reinterpret_cast<char *>(obj.data()), matSize);
|
||||||
t += usecond();
|
tRead += usecond();
|
||||||
DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s");
|
tHash = -usecond();
|
||||||
auto check = GridChecksum::sha256(obj.data(), matSize);
|
#ifdef USE_IPP
|
||||||
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(check));
|
check = GridChecksum::crc32c(obj.data(), matSize);
|
||||||
if (hash != check)
|
#else
|
||||||
|
check = GridChecksum::crc32(obj.data(), matSize);
|
||||||
|
#endif
|
||||||
|
tHash += usecond();
|
||||||
|
DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s");
|
||||||
|
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec
|
||||||
|
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
|
||||||
|
if (crc != check)
|
||||||
{
|
{
|
||||||
HADRONS_ERROR(Io, "checksum failed")
|
HADRONS_ERROR(Io, "checksum failed")
|
||||||
}
|
}
|
||||||
@ -178,23 +194,30 @@ private:
|
|||||||
virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
|
virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
|
||||||
{
|
{
|
||||||
std::ofstream f(filename, std::ios::binary);
|
std::ofstream f(filename, std::ios::binary);
|
||||||
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
|
uint32_t crc;
|
||||||
Eigen::Index nRow, nCol;
|
Eigen::Index nRow, nCol;
|
||||||
size_t matSize;
|
size_t matSize;
|
||||||
double t;
|
double tWrite, tHash;
|
||||||
|
|
||||||
nRow = obj.rows();
|
nRow = obj.rows();
|
||||||
nCol = obj.cols();
|
nCol = obj.cols();
|
||||||
matSize = nRow*nCol*sizeof(T);
|
matSize = nRow*nCol*sizeof(T);
|
||||||
hash = GridChecksum::sha256(obj.data(), matSize);
|
tHash = -usecond();
|
||||||
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(hash));
|
#ifdef USE_IPP
|
||||||
f.write(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char));
|
crc = GridChecksum::crc32c(obj.data(), matSize);
|
||||||
f.write(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index));
|
#else
|
||||||
f.write(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index));
|
crc = GridChecksum::crc32(obj.data(), matSize);
|
||||||
t = -usecond();
|
#endif
|
||||||
|
tHash += usecond();
|
||||||
|
f.write(reinterpret_cast<char *>(&crc), sizeof(crc));
|
||||||
|
f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow));
|
||||||
|
f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol));
|
||||||
|
tWrite = -usecond();
|
||||||
f.write(reinterpret_cast<const char *>(obj.data()), matSize);
|
f.write(reinterpret_cast<const char *>(obj.data()), matSize);
|
||||||
t += usecond();
|
tWrite += usecond();
|
||||||
DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s");
|
DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s");
|
||||||
|
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec
|
||||||
|
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -207,7 +230,10 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
|
|||||||
const unsigned int cacheSize,
|
const unsigned int cacheSize,
|
||||||
const bool clean)
|
const bool clean)
|
||||||
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
|
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
|
||||||
, cachePtr_(new std::map<unsigned int, T>())
|
, cachePtr_(new std::vector<T>(size))
|
||||||
|
, modifiedPtr_(new std::vector<bool>(size, false))
|
||||||
|
, indexPtr_(new std::map<unsigned int, unsigned int>())
|
||||||
|
, freePtr_(new std::stack<unsigned int>)
|
||||||
, loadsPtr_(new std::deque<unsigned int>())
|
, loadsPtr_(new std::deque<unsigned int>())
|
||||||
{
|
{
|
||||||
struct stat s;
|
struct stat s;
|
||||||
@ -217,6 +243,10 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
|
|||||||
HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
|
HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
|
||||||
}
|
}
|
||||||
mkdir(dirname);
|
mkdir(dirname);
|
||||||
|
for (unsigned int i = 0; i < cacheSize_; ++i)
|
||||||
|
{
|
||||||
|
freePtr_->push(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -232,6 +262,8 @@ template <typename T>
|
|||||||
const T & DiskVectorBase<T>::operator[](const unsigned int i) const
|
const T & DiskVectorBase<T>::operator[](const unsigned int i) const
|
||||||
{
|
{
|
||||||
auto &cache = *cachePtr_;
|
auto &cache = *cachePtr_;
|
||||||
|
auto &index = *indexPtr_;
|
||||||
|
auto &freeInd = *freePtr_;
|
||||||
auto &loads = *loadsPtr_;
|
auto &loads = *loadsPtr_;
|
||||||
|
|
||||||
DV_DEBUG_MSG(this, "accessing " << i << " (RO)");
|
DV_DEBUG_MSG(this, "accessing " << i << " (RO)");
|
||||||
@ -241,7 +273,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
|
|||||||
HADRONS_ERROR(Size, "index out of range");
|
HADRONS_ERROR(Size, "index out of range");
|
||||||
}
|
}
|
||||||
const_cast<double &>(access_)++;
|
const_cast<double &>(access_)++;
|
||||||
if (cache.find(i) == cache.end())
|
if (index.find(i) == index.end())
|
||||||
{
|
{
|
||||||
// cache miss
|
// cache miss
|
||||||
DV_DEBUG_MSG(this, "cache miss");
|
DV_DEBUG_MSG(this, "cache miss");
|
||||||
@ -268,7 +300,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
|
|||||||
DV_DEBUG_MSG(this, "in cache: " << msg);
|
DV_DEBUG_MSG(this, "in cache: " << msg);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return cache.at(i);
|
return cache[index.at(i)];
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -307,12 +339,23 @@ template <typename T>
|
|||||||
void DiskVectorBase<T>::evict(void) const
|
void DiskVectorBase<T>::evict(void) const
|
||||||
{
|
{
|
||||||
auto &cache = *cachePtr_;
|
auto &cache = *cachePtr_;
|
||||||
|
auto &modified = *modifiedPtr_;
|
||||||
|
auto &index = *indexPtr_;
|
||||||
|
auto &freeInd = *freePtr_;
|
||||||
auto &loads = *loadsPtr_;
|
auto &loads = *loadsPtr_;
|
||||||
|
|
||||||
if (cache.size() >= cacheSize_)
|
if (index.size() >= cacheSize_)
|
||||||
{
|
{
|
||||||
DV_DEBUG_MSG(this, "evicting " << loads.front());
|
unsigned int i = loads.front();
|
||||||
cache.erase(loads.front());
|
|
||||||
|
DV_DEBUG_MSG(this, "evicting " << i);
|
||||||
|
if (modified[index.at(i)])
|
||||||
|
{
|
||||||
|
DV_DEBUG_MSG(this, "element " << i << " modified, saving to disk");
|
||||||
|
save(filename(i), cache[index.at(i)]);
|
||||||
|
}
|
||||||
|
freeInd.push(index.at(i));
|
||||||
|
index.erase(i);
|
||||||
loads.pop_front();
|
loads.pop_front();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -321,29 +364,43 @@ template <typename T>
|
|||||||
void DiskVectorBase<T>::fetch(const unsigned int i) const
|
void DiskVectorBase<T>::fetch(const unsigned int i) const
|
||||||
{
|
{
|
||||||
auto &cache = *cachePtr_;
|
auto &cache = *cachePtr_;
|
||||||
|
auto &modified = *modifiedPtr_;
|
||||||
|
auto &index = *indexPtr_;
|
||||||
|
auto &freeInd = *freePtr_;
|
||||||
auto &loads = *loadsPtr_;
|
auto &loads = *loadsPtr_;
|
||||||
|
|
||||||
struct stat s;
|
struct stat s;
|
||||||
|
|
||||||
DV_DEBUG_MSG(this, "loading " << i << " from disk");
|
DV_DEBUG_MSG(this, "loading " << i << " from disk");
|
||||||
|
|
||||||
evict();
|
evict();
|
||||||
|
|
||||||
if(stat(filename(i).c_str(), &s) != 0)
|
if(stat(filename(i).c_str(), &s) != 0)
|
||||||
{
|
{
|
||||||
HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised");
|
HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised");
|
||||||
}
|
}
|
||||||
load(cache[i], filename(i));
|
index[i] = freeInd.top();
|
||||||
|
freeInd.pop();
|
||||||
|
load(cache[index.at(i)], filename(i));
|
||||||
loads.push_back(i);
|
loads.push_back(i);
|
||||||
|
modified[index.at(i)] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
|
void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
|
||||||
{
|
{
|
||||||
auto &cache = *cachePtr_;
|
auto &cache = *cachePtr_;
|
||||||
|
auto &modified = *modifiedPtr_;
|
||||||
|
auto &index = *indexPtr_;
|
||||||
|
auto &freeInd = *freePtr_;
|
||||||
auto &loads = *loadsPtr_;
|
auto &loads = *loadsPtr_;
|
||||||
|
|
||||||
evict();
|
evict();
|
||||||
cache[i] = obj;
|
index[i] = freeInd.top();
|
||||||
|
freeInd.pop();
|
||||||
|
cache[index.at(i)] = obj;
|
||||||
loads.push_back(i);
|
loads.push_back(i);
|
||||||
|
modified[index.at(i)] = false;
|
||||||
|
|
||||||
#ifdef DV_DEBUG
|
#ifdef DV_DEBUG
|
||||||
std::string msg;
|
std::string msg;
|
||||||
|
@ -166,7 +166,13 @@ std::string Hadrons::dirname(const std::string &s)
|
|||||||
|
|
||||||
void Hadrons::makeFileDir(const std::string filename, GridBase *g)
|
void Hadrons::makeFileDir(const std::string filename, GridBase *g)
|
||||||
{
|
{
|
||||||
if (g->IsBoss())
|
bool doIt = true;
|
||||||
|
|
||||||
|
if (g)
|
||||||
|
{
|
||||||
|
doIt = g->IsBoss();
|
||||||
|
}
|
||||||
|
if (doIt)
|
||||||
{
|
{
|
||||||
std::string dir = dirname(filename);
|
std::string dir = dirname(filename);
|
||||||
int status = mkdir(dir);
|
int status = mkdir(dir);
|
||||||
|
@ -32,6 +32,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
|
#include <regex>
|
||||||
#include <Grid/Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <cxxabi.h>
|
#include <cxxabi.h>
|
||||||
|
|
||||||
@ -217,15 +218,15 @@ typedef XmlReader ResultReader;
|
|||||||
typedef XmlWriter ResultWriter;
|
typedef XmlWriter ResultWriter;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define RESULT_FILE_NAME(name) \
|
#define RESULT_FILE_NAME(name, traj) \
|
||||||
name + "." + std::to_string(vm().getTrajectory()) + "." + resultFileExt
|
name + "." + std::to_string(traj) + "." + resultFileExt
|
||||||
|
|
||||||
// recursive mkdir
|
// recursive mkdir
|
||||||
#define MAX_PATH_LENGTH 512u
|
#define MAX_PATH_LENGTH 512u
|
||||||
int mkdir(const std::string dirName);
|
int mkdir(const std::string dirName);
|
||||||
std::string basename(const std::string &s);
|
std::string basename(const std::string &s);
|
||||||
std::string dirname(const std::string &s);
|
std::string dirname(const std::string &s);
|
||||||
void makeFileDir(const std::string filename, GridBase *g);
|
void makeFileDir(const std::string filename, GridBase *g = nullptr);
|
||||||
|
|
||||||
// default Schur convention
|
// default Schur convention
|
||||||
#ifndef HADRONS_DEFAULT_SCHUR
|
#ifndef HADRONS_DEFAULT_SCHUR
|
||||||
@ -248,6 +249,20 @@ void makeFileDir(const std::string filename, GridBase *g);
|
|||||||
// pretty print time profile
|
// pretty print time profile
|
||||||
void printTimeProfile(const std::map<std::string, GridTime> &timing, GridTime total);
|
void printTimeProfile(const std::map<std::string, GridTime> &timing, GridTime total);
|
||||||
|
|
||||||
|
// token replacement utility
|
||||||
|
template <typename T>
|
||||||
|
void tokenReplace(std::string &str, const std::string token,
|
||||||
|
const T &x, const std::string mark = "@")
|
||||||
|
{
|
||||||
|
std::string fullToken = mark + token + mark;
|
||||||
|
|
||||||
|
auto pos = str.find(fullToken);
|
||||||
|
if (pos != std::string::npos)
|
||||||
|
{
|
||||||
|
str.replace(pos, fullToken.size(), std::to_string(x));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
END_HADRONS_NAMESPACE
|
END_HADRONS_NAMESPACE
|
||||||
|
|
||||||
#include <Hadrons/Exceptions.hpp>
|
#include <Hadrons/Exceptions.hpp>
|
||||||
|
@ -5,17 +5,17 @@ lib_LIBRARIES = libHadrons.a
|
|||||||
include modules.inc
|
include modules.inc
|
||||||
|
|
||||||
libHadrons_a_SOURCES = \
|
libHadrons_a_SOURCES = \
|
||||||
$(modules_cc) \
|
|
||||||
Application.cc \
|
Application.cc \
|
||||||
Environment.cc \
|
Environment.cc \
|
||||||
Exceptions.cc \
|
Exceptions.cc \
|
||||||
Global.cc \
|
Global.cc \
|
||||||
Module.cc \
|
Module.cc \
|
||||||
TimerArray.cc \
|
TimerArray.cc \
|
||||||
VirtualMachine.cc
|
VirtualMachine.cc \
|
||||||
|
$(modules_cc)
|
||||||
|
|
||||||
libHadrons_adir = $(includedir)/Hadrons
|
libHadrons_adir = $(includedir)/Hadrons
|
||||||
nobase_libHadrons_a_HEADERS = \
|
nobase_libHadrons_a_HEADERS = \
|
||||||
$(modules_hpp) \
|
|
||||||
A2AVectors.hpp \
|
A2AVectors.hpp \
|
||||||
A2AMatrix.hpp \
|
A2AMatrix.hpp \
|
||||||
Application.hpp \
|
Application.hpp \
|
||||||
@ -33,4 +33,5 @@ nobase_libHadrons_a_HEADERS = \
|
|||||||
ModuleFactory.hpp \
|
ModuleFactory.hpp \
|
||||||
Solver.hpp \
|
Solver.hpp \
|
||||||
TimerArray.hpp \
|
TimerArray.hpp \
|
||||||
VirtualMachine.hpp
|
VirtualMachine.hpp \
|
||||||
|
$(modules_hpp)
|
||||||
|
@ -144,7 +144,7 @@ if (env().getGrid()->IsBoss() and !ioStem.empty())\
|
|||||||
{\
|
{\
|
||||||
makeFileDir(ioStem, env().getGrid());\
|
makeFileDir(ioStem, env().getGrid());\
|
||||||
{\
|
{\
|
||||||
ResultWriter _writer(RESULT_FILE_NAME(ioStem));\
|
ResultWriter _writer(RESULT_FILE_NAME(ioStem, vm().getTrajectory()));\
|
||||||
write(_writer, name, result);\
|
write(_writer, name, result);\
|
||||||
}\
|
}\
|
||||||
}
|
}
|
||||||
|
@ -24,7 +24,7 @@
|
|||||||
#include <Hadrons/Modules/MSolver/Guesser.hpp>
|
#include <Hadrons/Modules/MSolver/Guesser.hpp>
|
||||||
#include <Hadrons/Modules/MSolver/RBPrecCG.hpp>
|
#include <Hadrons/Modules/MSolver/RBPrecCG.hpp>
|
||||||
#include <Hadrons/Modules/MSolver/A2AVectors.hpp>
|
#include <Hadrons/Modules/MSolver/A2AVectors.hpp>
|
||||||
#include <Hadrons/Modules/MSolver/A2AAslashVector.hpp>
|
#include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
|
||||||
#include <Hadrons/Modules/MGauge/UnitEm.hpp>
|
#include <Hadrons/Modules/MGauge/UnitEm.hpp>
|
||||||
#include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
|
#include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
|
||||||
#include <Hadrons/Modules/MGauge/Unit.hpp>
|
#include <Hadrons/Modules/MGauge/Unit.hpp>
|
||||||
|
@ -33,10 +33,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/ModuleFactory.hpp>
|
#include <Hadrons/ModuleFactory.hpp>
|
||||||
#include <Hadrons/A2AMatrix.hpp>
|
#include <Hadrons/A2AMatrix.hpp>
|
||||||
|
|
||||||
#ifndef ASF_IO_TYPE
|
|
||||||
#define ASF_IO_TYPE ComplexF
|
|
||||||
#endif
|
|
||||||
|
|
||||||
BEGIN_HADRONS_NAMESPACE
|
BEGIN_HADRONS_NAMESPACE
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
@ -113,7 +109,7 @@ public:
|
|||||||
typedef A2AMatrixBlockComputation<Complex,
|
typedef A2AMatrixBlockComputation<Complex,
|
||||||
FermionField,
|
FermionField,
|
||||||
A2AAslashFieldMetadata,
|
A2AAslashFieldMetadata,
|
||||||
ASF_IO_TYPE> Computation;
|
HADRONS_A2AM_IO_TYPE> Computation;
|
||||||
typedef AslashFieldKernel<Complex, FImpl> Kernel;
|
typedef AslashFieldKernel<Complex, FImpl> Kernel;
|
||||||
public:
|
public:
|
||||||
// constructor
|
// constructor
|
||||||
@ -196,7 +192,7 @@ void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
|
|||||||
LOG(Message) << " " << name << std::endl;
|
LOG(Message) << " " << name << std::endl;
|
||||||
}
|
}
|
||||||
LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j
|
LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j
|
||||||
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(ASF_IO_TYPE))
|
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE))
|
||||||
<< "/EM field)" << std::endl;
|
<< "/EM field)" << std::endl;
|
||||||
|
|
||||||
// preparing "B" complexified fields
|
// preparing "B" complexified fields
|
||||||
|
@ -35,10 +35,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/ModuleFactory.hpp>
|
#include <Hadrons/ModuleFactory.hpp>
|
||||||
#include <Hadrons/A2AMatrix.hpp>
|
#include <Hadrons/A2AMatrix.hpp>
|
||||||
|
|
||||||
#ifndef MF_IO_TYPE
|
|
||||||
#define MF_IO_TYPE ComplexF
|
|
||||||
#endif
|
|
||||||
|
|
||||||
BEGIN_HADRONS_NAMESPACE
|
BEGIN_HADRONS_NAMESPACE
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
@ -118,7 +114,7 @@ public:
|
|||||||
typedef A2AMatrixBlockComputation<Complex,
|
typedef A2AMatrixBlockComputation<Complex,
|
||||||
FermionField,
|
FermionField,
|
||||||
A2AMesonFieldMetadata,
|
A2AMesonFieldMetadata,
|
||||||
MF_IO_TYPE> Computation;
|
HADRONS_A2AM_IO_TYPE> Computation;
|
||||||
typedef MesonFieldKernel<Complex, FImpl> Kernel;
|
typedef MesonFieldKernel<Complex, FImpl> Kernel;
|
||||||
public:
|
public:
|
||||||
// constructor
|
// constructor
|
||||||
@ -248,7 +244,7 @@ void TA2AMesonField<FImpl>::execute(void)
|
|||||||
LOG(Message) << " " << g << std::endl;
|
LOG(Message) << " " << g << std::endl;
|
||||||
}
|
}
|
||||||
LOG(Message) << "Meson field size: " << nt << "*" << N_i << "*" << N_j
|
LOG(Message) << "Meson field size: " << nt << "*" << N_i << "*" << N_j
|
||||||
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(MF_IO_TYPE))
|
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE))
|
||||||
<< "/momentum/bilinear)" << std::endl;
|
<< "/momentum/bilinear)" << std::endl;
|
||||||
|
|
||||||
auto &ph = envGet(std::vector<ComplexField>, momphName_);
|
auto &ph = envGet(std::vector<ComplexField>, momphName_);
|
||||||
|
@ -146,7 +146,7 @@ void TChargedProp::execute(void)
|
|||||||
std::vector<int> siteCoor;
|
std::vector<int> siteCoor;
|
||||||
|
|
||||||
LOG(Message) << "Saving momentum-projected propagator to '"
|
LOG(Message) << "Saving momentum-projected propagator to '"
|
||||||
<< RESULT_FILE_NAME(par().output) << "'..."
|
<< RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
result.projection.resize(par().outputMom.size());
|
result.projection.resize(par().outputMom.size());
|
||||||
result.lattice_size = env().getGrid()->_fdimensions;
|
result.lattice_size = env().getGrid()->_fdimensions;
|
||||||
|
@ -462,7 +462,7 @@ void TScalarVP::execute(void)
|
|||||||
if (!par().output.empty())
|
if (!par().output.empty())
|
||||||
{
|
{
|
||||||
LOG(Message) << "Saving momentum-projected HVP to '"
|
LOG(Message) << "Saving momentum-projected HVP to '"
|
||||||
<< RESULT_FILE_NAME(par().output) << "'..."
|
<< RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
saveResult(par().output, "HVP", outputData);
|
saveResult(par().output, "HVP", outputData);
|
||||||
}
|
}
|
||||||
|
@ -239,7 +239,7 @@ void TVPCounterTerms::execute(void)
|
|||||||
if (!par().output.empty())
|
if (!par().output.empty())
|
||||||
{
|
{
|
||||||
LOG(Message) << "Saving momentum-projected correlators to '"
|
LOG(Message) << "Saving momentum-projected correlators to '"
|
||||||
<< RESULT_FILE_NAME(par().output) << "'..."
|
<< RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
saveResult(par().output, "scalar_loops", outputData);
|
saveResult(par().output, "scalar_loops", outputData);
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: Hadrons/Modules/MSolver/A2AAslashVector.cc
|
Source file: Hadrons/Modules/MSolver/A2AAslashVectors.cc
|
||||||
|
|
||||||
Copyright (C) 2015-2018
|
Copyright (C) 2015-2018
|
||||||
|
|
||||||
@ -25,11 +25,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Hadrons/Modules/MSolver/A2AAslashVector.hpp>
|
#include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace Hadrons;
|
using namespace Hadrons;
|
||||||
using namespace MSolver;
|
using namespace MSolver;
|
||||||
|
|
||||||
template class Grid::Hadrons::MSolver::TA2AAslashVector<FIMPL>;
|
template class Grid::Hadrons::MSolver::TA2AAslashVectors<FIMPL>;
|
||||||
template class Grid::Hadrons::MSolver::TA2AAslashVector<ZFIMPL>;
|
template class Grid::Hadrons::MSolver::TA2AAslashVectors<ZFIMPL>;
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: Hadrons/Modules/MSolver/A2AAslashVector.hpp
|
Source file: Hadrons/Modules/MSolver/A2AAslashVectors.hpp
|
||||||
|
|
||||||
Copyright (C) 2015-2018
|
Copyright (C) 2015-2018
|
||||||
|
|
||||||
@ -25,13 +25,14 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef Hadrons_MSolver_A2AAslashVector_hpp_
|
#ifndef Hadrons_MSolver_A2AAslashVectors_hpp_
|
||||||
#define Hadrons_MSolver_A2AAslashVector_hpp_
|
#define Hadrons_MSolver_A2AAslashVectors_hpp_
|
||||||
|
|
||||||
#include <Hadrons/Global.hpp>
|
#include <Hadrons/Global.hpp>
|
||||||
#include <Hadrons/Module.hpp>
|
#include <Hadrons/Module.hpp>
|
||||||
#include <Hadrons/ModuleFactory.hpp>
|
#include <Hadrons/ModuleFactory.hpp>
|
||||||
#include <Hadrons/Solver.hpp>
|
#include <Hadrons/Solver.hpp>
|
||||||
|
#include <Hadrons/A2AVectors.hpp>
|
||||||
|
|
||||||
BEGIN_HADRONS_NAMESPACE
|
BEGIN_HADRONS_NAMESPACE
|
||||||
|
|
||||||
@ -54,18 +55,19 @@ BEGIN_MODULE_NAMESPACE(MSolver)
|
|||||||
*
|
*
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
|
class A2AAslashVectorsPar: Serializable
|
||||||
class A2AAslashVectorPar: Serializable
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashVectorPar,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashVectorsPar,
|
||||||
std::string, vector,
|
std::string, vector,
|
||||||
std::string, emField,
|
std::string, emField,
|
||||||
std::string, solver);
|
std::string, solver,
|
||||||
|
std::string, output,
|
||||||
|
bool, multiFile);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename FImpl>
|
template <typename FImpl>
|
||||||
class TA2AAslashVector : public Module<A2AAslashVectorPar>
|
class TA2AAslashVectors : public Module<A2AAslashVectorsPar>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
FERM_TYPE_ALIASES(FImpl,);
|
FERM_TYPE_ALIASES(FImpl,);
|
||||||
@ -74,9 +76,9 @@ public:
|
|||||||
typedef PhotonR::GaugeField EmField;
|
typedef PhotonR::GaugeField EmField;
|
||||||
public:
|
public:
|
||||||
// constructor
|
// constructor
|
||||||
TA2AAslashVector(const std::string name);
|
TA2AAslashVectors(const std::string name);
|
||||||
// destructor
|
// destructor
|
||||||
virtual ~TA2AAslashVector(void) {};
|
virtual ~TA2AAslashVectors(void) {};
|
||||||
// dependency relation
|
// dependency relation
|
||||||
virtual std::vector<std::string> getInput(void);
|
virtual std::vector<std::string> getInput(void);
|
||||||
virtual std::vector<std::string> getOutput(void);
|
virtual std::vector<std::string> getOutput(void);
|
||||||
@ -88,21 +90,21 @@ private:
|
|||||||
unsigned int Ls_;
|
unsigned int Ls_;
|
||||||
};
|
};
|
||||||
|
|
||||||
MODULE_REGISTER_TMP(A2AAslashVector,TA2AAslashVector<FIMPL>, MSolver);
|
MODULE_REGISTER_TMP(A2AAslashVectors, TA2AAslashVectors<FIMPL>, MSolver);
|
||||||
MODULE_REGISTER_TMP(ZA2AAslashVector,TA2AAslashVector<ZFIMPL>, MSolver);
|
MODULE_REGISTER_TMP(ZA2AAslashVectors, TA2AAslashVectors<ZFIMPL>, MSolver);
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
* TA2AAslashVector implementation *
|
* TA2AAslashVectors implementation *
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
// constructor /////////////////////////////////////////////////////////////////
|
// constructor /////////////////////////////////////////////////////////////////
|
||||||
template <typename FImpl>
|
template <typename FImpl>
|
||||||
TA2AAslashVector<FImpl>::TA2AAslashVector(const std::string name)
|
TA2AAslashVectors<FImpl>::TA2AAslashVectors(const std::string name)
|
||||||
: Module<A2AAslashVectorPar>(name)
|
: Module<A2AAslashVectorsPar>(name)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// dependencies/products ///////////////////////////////////////////////////////
|
// dependencies/products ///////////////////////////////////////////////////////
|
||||||
template <typename FImpl>
|
template <typename FImpl>
|
||||||
std::vector<std::string> TA2AAslashVector<FImpl>::getInput(void)
|
std::vector<std::string> TA2AAslashVectors<FImpl>::getInput(void)
|
||||||
{
|
{
|
||||||
std::vector<std::string> in = {par().vector, par().emField, par().solver};
|
std::vector<std::string> in = {par().vector, par().emField, par().solver};
|
||||||
|
|
||||||
@ -110,7 +112,7 @@ std::vector<std::string> TA2AAslashVector<FImpl>::getInput(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename FImpl>
|
template <typename FImpl>
|
||||||
std::vector<std::string> TA2AAslashVector<FImpl>::getOutput(void)
|
std::vector<std::string> TA2AAslashVectors<FImpl>::getOutput(void)
|
||||||
{
|
{
|
||||||
std::vector<std::string> out = {getName()};
|
std::vector<std::string> out = {getName()};
|
||||||
|
|
||||||
@ -119,7 +121,7 @@ std::vector<std::string> TA2AAslashVector<FImpl>::getOutput(void)
|
|||||||
|
|
||||||
// setup ///////////////////////////////////////////////////////////////////////
|
// setup ///////////////////////////////////////////////////////////////////////
|
||||||
template <typename FImpl>
|
template <typename FImpl>
|
||||||
void TA2AAslashVector<FImpl>::setup(void)
|
void TA2AAslashVectors<FImpl>::setup(void)
|
||||||
{
|
{
|
||||||
Ls_ = env().getObjectLs(par().solver);
|
Ls_ = env().getObjectLs(par().solver);
|
||||||
auto &vvector = envGet(std::vector<FermionField>, par().vector);
|
auto &vvector = envGet(std::vector<FermionField>, par().vector);
|
||||||
@ -134,7 +136,7 @@ void TA2AAslashVector<FImpl>::setup(void)
|
|||||||
|
|
||||||
// execution ///////////////////////////////////////////////////////////////////
|
// execution ///////////////////////////////////////////////////////////////////
|
||||||
template <typename FImpl>
|
template <typename FImpl>
|
||||||
void TA2AAslashVector<FImpl>::execute(void)
|
void TA2AAslashVectors<FImpl>::execute(void)
|
||||||
{
|
{
|
||||||
auto &solver = envGet(Solver, par().solver);
|
auto &solver = envGet(Solver, par().solver);
|
||||||
auto &stoch_photon = envGet(EmField, par().emField);
|
auto &stoch_photon = envGet(EmField, par().emField);
|
||||||
@ -148,13 +150,9 @@ void TA2AAslashVector<FImpl>::execute(void)
|
|||||||
|
|
||||||
Complex ci(0.0,1.0);
|
Complex ci(0.0,1.0);
|
||||||
|
|
||||||
|
|
||||||
startTimer("Seq Aslash");
|
startTimer("Seq Aslash");
|
||||||
|
LOG(Message) << "Calculate Sequential propagator on Aslash * v with the A2A vector "
|
||||||
LOG(Message) << "Calculate Sequential propagator on Aslash * v with the A2A vector " << par().vector
|
<< par().vector << " and the photon field " << par().emField << std::endl;
|
||||||
<< " and the photon field " << par().emField << std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
for(unsigned int i=0; i<Nmodes; i++)
|
for(unsigned int i=0; i<Nmodes; i++)
|
||||||
{
|
{
|
||||||
v4dtmp = zero;
|
v4dtmp = zero;
|
||||||
@ -166,6 +164,7 @@ void TA2AAslashVector<FImpl>::execute(void)
|
|||||||
}
|
}
|
||||||
stopTimer("Multiply Aslash");
|
stopTimer("Multiply Aslash");
|
||||||
|
|
||||||
|
startTimer("Inversion");
|
||||||
if (Ls_ == 1)
|
if (Ls_ == 1)
|
||||||
{
|
{
|
||||||
solver(Aslashv[i], v4dtmp);
|
solver(Aslashv[i], v4dtmp);
|
||||||
@ -177,13 +176,19 @@ void TA2AAslashVector<FImpl>::execute(void)
|
|||||||
mat.ExportPhysicalFermionSolution(v5dtmp_sol, v4dtmp);
|
mat.ExportPhysicalFermionSolution(v5dtmp_sol, v4dtmp);
|
||||||
Aslashv[i] = v4dtmp;
|
Aslashv[i] = v4dtmp;
|
||||||
}
|
}
|
||||||
|
stopTimer("Inversion");
|
||||||
}
|
}
|
||||||
|
|
||||||
stopTimer("Seq Aslash");
|
stopTimer("Seq Aslash");
|
||||||
|
if (!par().output.empty())
|
||||||
|
{
|
||||||
|
startTimer("I/O");
|
||||||
|
A2AVectorsIo::write(par().output, Aslashv, par().multiFile, vm().getTrajectory());
|
||||||
|
stopTimer("I/O");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
END_MODULE_NAMESPACE
|
END_MODULE_NAMESPACE
|
||||||
|
|
||||||
END_HADRONS_NAMESPACE
|
END_HADRONS_NAMESPACE
|
||||||
|
|
||||||
#endif // Hadrons_MSolver_A2AAslashVector_hpp_
|
#endif // Hadrons_MSolver_A2AAslashVectors_hpp_
|
454
Hadrons/Utilities/Contractor.cc
Normal file
454
Hadrons/Utilities/Contractor.cc
Normal file
@ -0,0 +1,454 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: Hadrons/Utilities/Contractor.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015-2018
|
||||||
|
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Hadrons/Global.hpp>
|
||||||
|
#include <Hadrons/A2AMatrix.hpp>
|
||||||
|
#include <Hadrons/DiskVector.hpp>
|
||||||
|
#include <Hadrons/TimerArray.hpp>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace QCD;
|
||||||
|
using namespace Hadrons;
|
||||||
|
|
||||||
|
#define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
|
||||||
|
|
||||||
|
namespace Contractor
|
||||||
|
{
|
||||||
|
class TrajRange: Serializable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
|
||||||
|
unsigned int, start,
|
||||||
|
unsigned int, end,
|
||||||
|
unsigned int, step);
|
||||||
|
};
|
||||||
|
|
||||||
|
class GlobalPar: Serializable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
|
||||||
|
TrajRange, trajCounter,
|
||||||
|
unsigned int, nt,
|
||||||
|
std::string, diskVectorDir,
|
||||||
|
std::string, output);
|
||||||
|
};
|
||||||
|
|
||||||
|
class A2AMatrixPar: Serializable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
|
||||||
|
std::string, file,
|
||||||
|
std::string, dataset,
|
||||||
|
unsigned int, cacheSize,
|
||||||
|
std::string, name);
|
||||||
|
};
|
||||||
|
|
||||||
|
class ProductPar: Serializable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
|
||||||
|
std::string, terms,
|
||||||
|
std::vector<std::string>, times,
|
||||||
|
std::string, translations,
|
||||||
|
bool, translationAverage);
|
||||||
|
};
|
||||||
|
|
||||||
|
class CorrelatorResult: Serializable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
|
||||||
|
std::vector<Contractor::A2AMatrixPar>, a2aMatrix,
|
||||||
|
ProductPar, contraction,
|
||||||
|
std::vector<unsigned int>, times,
|
||||||
|
std::vector<ComplexD>, correlator);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ContractorPar
|
||||||
|
{
|
||||||
|
Contractor::GlobalPar global;
|
||||||
|
std::vector<Contractor::A2AMatrixPar> a2aMatrix;
|
||||||
|
std::vector<Contractor::ProductPar> product;
|
||||||
|
};
|
||||||
|
|
||||||
|
void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
|
||||||
|
const std::vector<std::set<unsigned int>> ×,
|
||||||
|
std::vector<unsigned int> ¤t,
|
||||||
|
const unsigned int depth)
|
||||||
|
{
|
||||||
|
if (depth > 0)
|
||||||
|
{
|
||||||
|
for (auto t: times[times.size() - depth])
|
||||||
|
{
|
||||||
|
current[times.size() - depth] = t;
|
||||||
|
makeTimeSeq(timeSeq, times, current, depth - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
timeSeq.push_back(current);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
|
||||||
|
const std::vector<std::set<unsigned int>> ×)
|
||||||
|
{
|
||||||
|
std::vector<unsigned int> current(times.size());
|
||||||
|
|
||||||
|
makeTimeSeq(timeSeq, times, current, times.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void saveCorrelator(const Contractor::CorrelatorResult &result, const std::string dir,
|
||||||
|
const unsigned int dt, const unsigned int traj)
|
||||||
|
{
|
||||||
|
std::string fileStem = "", filename;
|
||||||
|
std::vector<std::string> terms = strToVec<std::string>(result.contraction.terms);
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < terms.size() - 1; i++)
|
||||||
|
{
|
||||||
|
fileStem += terms[i] + "_" + std::to_string(result.times[i]) + "_";
|
||||||
|
}
|
||||||
|
fileStem += terms.back();
|
||||||
|
if (!result.contraction.translationAverage)
|
||||||
|
{
|
||||||
|
fileStem += "_dt_" + std::to_string(dt);
|
||||||
|
}
|
||||||
|
filename = dir + "/" + RESULT_FILE_NAME(fileStem, traj);
|
||||||
|
std::cout << "Saving correlator to '" << filename << "'" << std::endl;
|
||||||
|
makeFileDir(dir);
|
||||||
|
ResultWriter writer(filename);
|
||||||
|
write(writer, fileStem, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
|
||||||
|
{
|
||||||
|
std::regex rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))");
|
||||||
|
std::smatch sm;
|
||||||
|
std::vector<std::string> rstr = strToVec<std::string>(str);
|
||||||
|
std::set<unsigned int> tSet;
|
||||||
|
|
||||||
|
for (auto &s: rstr)
|
||||||
|
{
|
||||||
|
std::regex_match(s, sm, rex);
|
||||||
|
if (sm[1].matched)
|
||||||
|
{
|
||||||
|
unsigned int t;
|
||||||
|
|
||||||
|
t = std::stoi(sm[1].str());
|
||||||
|
if (t >= nt)
|
||||||
|
{
|
||||||
|
HADRONS_ERROR(Range, "time out of range (from expression '" + str + "')");
|
||||||
|
}
|
||||||
|
tSet.insert(t);
|
||||||
|
}
|
||||||
|
else if (sm[2].matched)
|
||||||
|
{
|
||||||
|
unsigned int ta, tb;
|
||||||
|
|
||||||
|
ta = std::stoi(sm[3].str());
|
||||||
|
tb = std::stoi(sm[4].str());
|
||||||
|
if ((ta >= nt) or (tb >= nt))
|
||||||
|
{
|
||||||
|
HADRONS_ERROR(Range, "time out of range (from expression '" + str + "')");
|
||||||
|
}
|
||||||
|
for (unsigned int ti = ta; ti <= tb; ++ti)
|
||||||
|
{
|
||||||
|
tSet.insert(ti);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Sec
|
||||||
|
{
|
||||||
|
Sec(const double usec)
|
||||||
|
{
|
||||||
|
seconds = usec/1.0e6;
|
||||||
|
}
|
||||||
|
|
||||||
|
double seconds;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream & operator<< (std::ostream& s, const Sec &&sec)
|
||||||
|
{
|
||||||
|
s << std::setw(10) << sec.seconds << " sec";
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Flops
|
||||||
|
{
|
||||||
|
Flops(const double flops, const double fusec)
|
||||||
|
{
|
||||||
|
gFlopsPerSec = flops/fusec/1.0e3;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gFlopsPerSec;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream & operator<< (std::ostream& s, const Flops &&f)
|
||||||
|
{
|
||||||
|
s << std::setw(10) << f.gFlopsPerSec << " GFlop/s";
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Bytes
|
||||||
|
{
|
||||||
|
Bytes(const double bytes, const double busec)
|
||||||
|
{
|
||||||
|
gBytesPerSec = bytes/busec*1.0e6/1024/1024/1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gBytesPerSec;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream & operator<< (std::ostream& s, const Bytes &&b)
|
||||||
|
{
|
||||||
|
s << std::setw(10) << b.gBytesPerSec << " GB/s";
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// parse command line
|
||||||
|
std::string parFilename;
|
||||||
|
|
||||||
|
if (argc != 2)
|
||||||
|
{
|
||||||
|
std::cerr << "usage: " << argv[0] << " <parameter file>";
|
||||||
|
std::cerr << std::endl;
|
||||||
|
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
parFilename = argv[1];
|
||||||
|
|
||||||
|
// parse parameter file
|
||||||
|
ContractorPar par;
|
||||||
|
unsigned int nMat, nCont;
|
||||||
|
XmlReader reader(parFilename);
|
||||||
|
|
||||||
|
read(reader, "global", par.global);
|
||||||
|
read(reader, "a2aMatrix", par.a2aMatrix);
|
||||||
|
read(reader, "product", par.product);
|
||||||
|
nMat = par.a2aMatrix.size();
|
||||||
|
nCont = par.product.size();
|
||||||
|
|
||||||
|
// create diskvectors
|
||||||
|
std::map<std::string, EigenDiskVector<ComplexD>> a2aMat;
|
||||||
|
unsigned int cacheSize;
|
||||||
|
|
||||||
|
for (auto &p: par.a2aMatrix)
|
||||||
|
{
|
||||||
|
std::string dirName = par.global.diskVectorDir + "/" + p.name;
|
||||||
|
|
||||||
|
a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
|
||||||
|
}
|
||||||
|
|
||||||
|
// trajectory loop
|
||||||
|
for (unsigned int traj = par.global.trajCounter.start;
|
||||||
|
traj < par.global.trajCounter.end; traj += par.global.trajCounter.step)
|
||||||
|
{
|
||||||
|
std::cout << ":::::::: Trajectory " << traj << std::endl;
|
||||||
|
|
||||||
|
// load data
|
||||||
|
for (auto &p: par.a2aMatrix)
|
||||||
|
{
|
||||||
|
std::string filename = p.file;
|
||||||
|
double t, size;
|
||||||
|
|
||||||
|
tokenReplace(filename, "traj", traj);
|
||||||
|
std::cout << "======== Loading '" << filename << "'" << std::endl;
|
||||||
|
|
||||||
|
A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
|
||||||
|
|
||||||
|
a2aIo.load(a2aMat.at(p.name), &t);
|
||||||
|
std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6
|
||||||
|
<< " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// contract
|
||||||
|
EigenDiskVector<ComplexD>::Matrix buf;
|
||||||
|
|
||||||
|
for (auto &p: par.product)
|
||||||
|
{
|
||||||
|
std::vector<std::string> term = strToVec<std::string>(p.terms);
|
||||||
|
std::vector<std::set<unsigned int>> times;
|
||||||
|
std::vector<std::vector<unsigned int>> timeSeq;
|
||||||
|
std::set<unsigned int> translations;
|
||||||
|
std::vector<A2AMatrixTr<ComplexD>> lastTerm(par.global.nt);
|
||||||
|
A2AMatrix<ComplexD> prod, buf, tmp;
|
||||||
|
TimerArray tAr;
|
||||||
|
double fusec, busec, flops, bytes, tusec;
|
||||||
|
Contractor::CorrelatorResult result;
|
||||||
|
|
||||||
|
tAr.startTimer("Total");
|
||||||
|
std::cout << "======== Contraction tr(";
|
||||||
|
for (unsigned int g = 0; g < term.size(); ++g)
|
||||||
|
{
|
||||||
|
std::cout << term[g] << ((g == term.size() - 1) ? ')' : '*');
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
if (term.size() != p.times.size() + 1)
|
||||||
|
{
|
||||||
|
HADRONS_ERROR(Size, "number of terms (" + std::to_string(term.size())
|
||||||
|
+ ") different from number of times ("
|
||||||
|
+ std::to_string(p.times.size() + 1) + ")");
|
||||||
|
}
|
||||||
|
for (auto &s: p.times)
|
||||||
|
{
|
||||||
|
times.push_back(parseTimeRange(s, par.global.nt));
|
||||||
|
}
|
||||||
|
for (auto &m: par.a2aMatrix)
|
||||||
|
{
|
||||||
|
if (std::find(result.a2aMatrix.begin(), result.a2aMatrix.end(), m) == result.a2aMatrix.end())
|
||||||
|
{
|
||||||
|
result.a2aMatrix.push_back(m);
|
||||||
|
tokenReplace(result.a2aMatrix.back().file, "traj", traj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.contraction = p;
|
||||||
|
result.correlator.resize(par.global.nt, 0.);
|
||||||
|
|
||||||
|
translations = parseTimeRange(p.translations, par.global.nt);
|
||||||
|
makeTimeSeq(timeSeq, times);
|
||||||
|
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
|
||||||
|
<< timeSeq.size()*translations.size()*par.global.nt << " tr(A*B)"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
std::cout << "* Caching transposed last term" << std::endl;
|
||||||
|
for (unsigned int t = 0; t < par.global.nt; ++t)
|
||||||
|
{
|
||||||
|
tAr.startTimer("Disk vector overhead");
|
||||||
|
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
|
||||||
|
tAr.stopTimer("Disk vector overhead");
|
||||||
|
|
||||||
|
tAr.startTimer("Transpose caching");
|
||||||
|
lastTerm[t].resize(ref.rows(), ref.cols());
|
||||||
|
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
|
||||||
|
for (unsigned int i = 0; i < ref.rows(); ++i)
|
||||||
|
{
|
||||||
|
lastTerm[t](i, j) = ref(i, j);
|
||||||
|
}
|
||||||
|
tAr.stopTimer("Transpose caching");
|
||||||
|
}
|
||||||
|
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
|
||||||
|
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
|
||||||
|
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
|
||||||
|
for (unsigned int i = 0; i < timeSeq.size(); ++i)
|
||||||
|
{
|
||||||
|
unsigned int dti = 0;
|
||||||
|
auto &t = timeSeq[i];
|
||||||
|
|
||||||
|
result.times = t;
|
||||||
|
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||||
|
{
|
||||||
|
result.correlator[tLast] = 0.;
|
||||||
|
}
|
||||||
|
for (auto &dt: translations)
|
||||||
|
{
|
||||||
|
std::cout << "* Step " << i*translations.size() + dti + 1
|
||||||
|
<< "/" << timeSeq.size()*translations.size()
|
||||||
|
<< " -- positions= " << t << ", dt= " << dt << std::endl;
|
||||||
|
if (term.size() > 2)
|
||||||
|
{
|
||||||
|
std::cout << std::setw(8) << "products";
|
||||||
|
}
|
||||||
|
flops = 0.;
|
||||||
|
bytes = 0.;
|
||||||
|
fusec = tAr.getDTimer("A*B algebra");
|
||||||
|
busec = tAr.getDTimer("A*B total");
|
||||||
|
tAr.startTimer("Linear algebra");
|
||||||
|
tAr.startTimer("Disk vector overhead");
|
||||||
|
prod = a2aMat.at(term[0])[TIME_MOD(t[0] + dt)];
|
||||||
|
tAr.stopTimer("Disk vector overhead");
|
||||||
|
for (unsigned int j = 1; j < term.size() - 1; ++j)
|
||||||
|
{
|
||||||
|
tAr.startTimer("Disk vector overhead");
|
||||||
|
const A2AMatrix<ComplexD> &ref = a2aMat.at(term[j])[TIME_MOD(t[j] + dt)];
|
||||||
|
tAr.stopTimer("Disk vector overhead");
|
||||||
|
|
||||||
|
tAr.startTimer("A*B total");
|
||||||
|
tAr.startTimer("A*B algebra");
|
||||||
|
A2AContraction::mul(tmp, prod, ref);
|
||||||
|
tAr.stopTimer("A*B algebra");
|
||||||
|
flops += A2AContraction::mulFlops(prod, ref);
|
||||||
|
prod = tmp;
|
||||||
|
tAr.stopTimer("A*B total");
|
||||||
|
bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
|
||||||
|
}
|
||||||
|
if (term.size() > 2)
|
||||||
|
{
|
||||||
|
std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
|
||||||
|
<< Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " "
|
||||||
|
<< Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << std::setw(8) << "traces";
|
||||||
|
flops = 0.;
|
||||||
|
bytes = 0.;
|
||||||
|
fusec = tAr.getDTimer("tr(A*B)");
|
||||||
|
busec = tAr.getDTimer("tr(A*B)");
|
||||||
|
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||||
|
{
|
||||||
|
tAr.startTimer("tr(A*B)");
|
||||||
|
A2AContraction::accTrMul(result.correlator[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
|
||||||
|
tAr.stopTimer("tr(A*B)");
|
||||||
|
flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
|
||||||
|
bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
|
||||||
|
}
|
||||||
|
tAr.stopTimer("Linear algebra");
|
||||||
|
std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
|
||||||
|
<< Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " "
|
||||||
|
<< Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
|
||||||
|
if (!p.translationAverage)
|
||||||
|
{
|
||||||
|
saveCorrelator(result, par.global.output, dt, traj);
|
||||||
|
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||||
|
{
|
||||||
|
result.correlator[tLast] = 0.;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dti++;
|
||||||
|
}
|
||||||
|
if (p.translationAverage)
|
||||||
|
{
|
||||||
|
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
|
||||||
|
{
|
||||||
|
result.correlator[tLast] /= translations.size();
|
||||||
|
}
|
||||||
|
saveCorrelator(result, par.global.output, 0, traj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tAr.stopTimer("Total");
|
||||||
|
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
12
Hadrons/Utilities/Contractor.hpp
Normal file
12
Hadrons/Utilities/Contractor.hpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef Hadrons_Contractor_hpp_
|
||||||
|
#define Hadrons_Contractor_hpp_
|
||||||
|
|
||||||
|
#include <Hadrons/Global.hpp>
|
||||||
|
|
||||||
|
BEGIN_HADRONS_NAMESPACE
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
END_HADRONS_NAMESPACE
|
||||||
|
|
||||||
|
#endif // Hadrons_Contractor_hpp_
|
434
Hadrons/Utilities/ContractorBenchmark.cc
Normal file
434
Hadrons/Utilities/ContractorBenchmark.cc
Normal file
@ -0,0 +1,434 @@
|
|||||||
|
#include <Hadrons/Global.hpp>
|
||||||
|
#include <Hadrons/A2AMatrix.hpp>
|
||||||
|
#ifdef USE_MKL
|
||||||
|
#include "mkl.h"
|
||||||
|
#include "mkl_cblas.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Hadrons;
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
#define GET_RANK(rank, nMpi) \
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &(nMpi));\
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &(rank))
|
||||||
|
#define BARRIER() MPI_Barrier(MPI_COMM_WORLD)
|
||||||
|
#define INIT() MPI_Init(NULL, NULL)
|
||||||
|
#define FINALIZE() MPI_Finalize()
|
||||||
|
#else
|
||||||
|
#define GET_RANK(rank, nMpi) (nMpi) = 1; (rank) = 0
|
||||||
|
#define BARRIER()
|
||||||
|
#define INIT()
|
||||||
|
#define FINALIZE()
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename Function, typename MatLeft, typename MatRight>
|
||||||
|
inline void trBenchmark(const std::string name, const MatLeft &left,
|
||||||
|
const MatRight &right, const ComplexD ref, Function fn)
|
||||||
|
{
|
||||||
|
double t, flops, bytes, n = left[0].rows()*left[0].cols();
|
||||||
|
unsigned int nMat = left.size();
|
||||||
|
int nMpi, rank;
|
||||||
|
ComplexD buf;
|
||||||
|
|
||||||
|
t = 0.;
|
||||||
|
GET_RANK(rank, nMpi);
|
||||||
|
t = -usecond();
|
||||||
|
BARRIER();
|
||||||
|
for (unsigned int i = rank*nMat/nMpi; i < (rank+1)*nMat/nMpi; ++i)
|
||||||
|
{
|
||||||
|
fn(buf, left[i], right[i]);
|
||||||
|
}
|
||||||
|
BARRIER();
|
||||||
|
t += usecond();
|
||||||
|
flops = nMat*(6.*n + 2.*(n - 1.));
|
||||||
|
bytes = nMat*(2.*n*sizeof(ComplexD));
|
||||||
|
|
||||||
|
if (rank == 0)
|
||||||
|
{
|
||||||
|
std::cout << std::setw(34) << name << ": diff= "
|
||||||
|
<< std::setw(12) << std::norm(buf-ref)
|
||||||
|
<< std::setw(10) << t/1.0e6 << " sec "
|
||||||
|
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
|
||||||
|
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
::sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Function, typename MatV, typename Mat>
|
||||||
|
inline void mulBenchmark(const std::string name, const MatV &left,
|
||||||
|
const MatV &right, const Mat &ref, Function fn)
|
||||||
|
{
|
||||||
|
double t, flops, bytes;
|
||||||
|
double nr = left[0].rows(), nc = left[0].cols(), n = nr*nc;
|
||||||
|
unsigned int nMat = left.size();
|
||||||
|
int nMpi, rank;
|
||||||
|
Mat buf(left[0].rows(), left[0].rows());
|
||||||
|
|
||||||
|
t = 0.;
|
||||||
|
GET_RANK(rank, nMpi);
|
||||||
|
t = -usecond();
|
||||||
|
BARRIER();
|
||||||
|
for (unsigned int i = rank*nMat/nMpi; i < (rank+1)*nMat/nMpi; ++i)
|
||||||
|
{
|
||||||
|
fn(buf, left[i], right[i]);
|
||||||
|
}
|
||||||
|
BARRIER();
|
||||||
|
t += usecond();
|
||||||
|
flops = nMat*(nr*nr*(6.*nc + 2.*(nc - 1.)));
|
||||||
|
bytes = nMat*(2*nc*nr*sizeof(ComplexD));
|
||||||
|
|
||||||
|
if (rank == 0)
|
||||||
|
{
|
||||||
|
std::cout << std::setw(34) << name << ": diff= "
|
||||||
|
<< std::setw(12) << (buf-ref).squaredNorm()
|
||||||
|
<< std::setw(10) << t/1.0e6 << " sec "
|
||||||
|
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
|
||||||
|
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
::sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_MKL
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void zdotuRow(ComplexD &res, const unsigned int aRow,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexD *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
if (MatLeft::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aRow*a.cols();
|
||||||
|
aInc = 1;
|
||||||
|
}
|
||||||
|
else if (MatLeft::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aRow;
|
||||||
|
aInc = a.rows();
|
||||||
|
}
|
||||||
|
if (MatRight::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aRow;
|
||||||
|
bInc = b.cols();
|
||||||
|
}
|
||||||
|
else if (MatRight::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aRow*b.rows();
|
||||||
|
bInc = 1;
|
||||||
|
}
|
||||||
|
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void zdotuCol(ComplexD &res, const unsigned int aCol,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexD *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
if (MatLeft::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aCol;
|
||||||
|
aInc = a.cols();
|
||||||
|
}
|
||||||
|
else if (MatLeft::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aCol*a.rows();
|
||||||
|
aInc = 1;
|
||||||
|
}
|
||||||
|
if (MatRight::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aCol*b.cols();
|
||||||
|
bInc = 1;
|
||||||
|
}
|
||||||
|
else if (MatRight::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aCol;
|
||||||
|
bInc = b.rows();
|
||||||
|
}
|
||||||
|
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
|
||||||
|
{
|
||||||
|
std::vector<MatLeft> left;
|
||||||
|
std::vector<MatRight> right;
|
||||||
|
MatRight buf;
|
||||||
|
ComplexD ref;
|
||||||
|
int rank, nMpi;
|
||||||
|
|
||||||
|
left.resize(nMat, MatLeft::Random(ni, nj));
|
||||||
|
right.resize(nMat, MatRight::Random(nj, ni));
|
||||||
|
GET_RANK(rank, nMpi);
|
||||||
|
if (rank == 0)
|
||||||
|
{
|
||||||
|
std::cout << "==== tr(A*B) benchmarks" << std::endl;
|
||||||
|
std::cout << "A matrices use ";
|
||||||
|
if (MatLeft::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
std::cout << "row-major ordering" << std::endl;
|
||||||
|
}
|
||||||
|
else if (MatLeft::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
std::cout << "col-major ordering" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "B matrices use ";
|
||||||
|
if (MatRight::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
std::cout << "row-major ordering" << std::endl;
|
||||||
|
}
|
||||||
|
else if (MatRight::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
std::cout << "col-major ordering" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
BARRIER();
|
||||||
|
ref = (left.back()*right.back()).trace();
|
||||||
|
trBenchmark("Hadrons A2AContraction::accTrMul", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = 0.;
|
||||||
|
A2AContraction::accTrMul(res, a, b);
|
||||||
|
});
|
||||||
|
trBenchmark("Naive loop rows first", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
auto nr = a.rows(), nc = a.cols();
|
||||||
|
|
||||||
|
res = 0.;
|
||||||
|
parallel_for (unsigned int i = 0; i < nr; ++i)
|
||||||
|
{
|
||||||
|
ComplexD tmp = 0.;
|
||||||
|
|
||||||
|
for (unsigned int j = 0; j < nc; ++j)
|
||||||
|
{
|
||||||
|
tmp += a(i, j)*b(j, i);
|
||||||
|
}
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
res += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
trBenchmark("Naive loop cols first", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
auto nr = a.rows(), nc = a.cols();
|
||||||
|
|
||||||
|
res = 0.;
|
||||||
|
parallel_for (unsigned int j = 0; j < nc; ++j)
|
||||||
|
{
|
||||||
|
ComplexD tmp = 0.;
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < nr; ++i)
|
||||||
|
{
|
||||||
|
tmp += a(i, j)*b(j, i);
|
||||||
|
}
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
res += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
trBenchmark("Eigen tr(A*B)", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = (a*b).trace();
|
||||||
|
});
|
||||||
|
trBenchmark("Eigen row-wise dot", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = 0.;
|
||||||
|
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
|
||||||
|
{
|
||||||
|
ComplexD tmp;
|
||||||
|
|
||||||
|
tmp = a.row(r).conjugate().dot(b.col(r));
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
res += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
trBenchmark("Eigen col-wise dot", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = 0.;
|
||||||
|
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
|
||||||
|
{
|
||||||
|
ComplexD tmp;
|
||||||
|
|
||||||
|
tmp = a.col(c).conjugate().dot(b.row(c));
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
res += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
trBenchmark("Eigen Hadamard", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = a.cwiseProduct(b.transpose()).sum();
|
||||||
|
});
|
||||||
|
#ifdef USE_MKL
|
||||||
|
trBenchmark("MKL row-wise zdotu", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = 0.;
|
||||||
|
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
|
||||||
|
{
|
||||||
|
ComplexD tmp;
|
||||||
|
|
||||||
|
zdotuRow(tmp, r, a, b);
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
res += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
trBenchmark("MKL col-wise zdotu", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = 0.;
|
||||||
|
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
|
||||||
|
{
|
||||||
|
ComplexD tmp;
|
||||||
|
|
||||||
|
zdotuCol(tmp, c, a, b);
|
||||||
|
parallel_critical
|
||||||
|
{
|
||||||
|
res += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
#endif
|
||||||
|
BARRIER();
|
||||||
|
if (rank == 0)
|
||||||
|
{
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Mat>
|
||||||
|
void fullMulBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
|
||||||
|
{
|
||||||
|
std::vector<Mat> left, right;
|
||||||
|
Mat ref;
|
||||||
|
int rank, nMpi;
|
||||||
|
|
||||||
|
left.resize(nMat, Mat::Random(ni, nj));
|
||||||
|
right.resize(nMat, Mat::Random(nj, ni));
|
||||||
|
GET_RANK(rank, nMpi);
|
||||||
|
if (rank == 0)
|
||||||
|
{
|
||||||
|
std::cout << "==== A*B benchmarks" << std::endl;
|
||||||
|
std::cout << "all matrices use ";
|
||||||
|
if (Mat::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
std::cout << "row-major ordering" << std::endl;
|
||||||
|
}
|
||||||
|
else if (Mat::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
std::cout << "col-major ordering" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
BARRIER();
|
||||||
|
ref = left.back()*right.back();
|
||||||
|
mulBenchmark("Hadrons A2AContraction::mul", left, right, ref,
|
||||||
|
[](Mat &res, const Mat &a, const Mat &b)
|
||||||
|
{
|
||||||
|
A2AContraction::mul(res, a, b);
|
||||||
|
});
|
||||||
|
mulBenchmark("Eigen A*B", left, right, ref,
|
||||||
|
[](Mat &res, const Mat &a, const Mat &b)
|
||||||
|
{
|
||||||
|
res = a*b;
|
||||||
|
});
|
||||||
|
#ifdef USE_MKL
|
||||||
|
mulBenchmark("MKL A*B", left, right, ref,
|
||||||
|
[](Mat &res, const Mat &a, const Mat &b)
|
||||||
|
{
|
||||||
|
const ComplexD one(1., 0.), zero(0., 0.);
|
||||||
|
if (Mat::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
|
||||||
|
res.data(), res.cols());
|
||||||
|
}
|
||||||
|
else if (Mat::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
|
||||||
|
res.data(), res.rows());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
#endif
|
||||||
|
BARRIER();
|
||||||
|
if (rank == 0)
|
||||||
|
{
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
// parse command line
|
||||||
|
Eigen::Index ni, nj, nMat;
|
||||||
|
int nMpi, rank;
|
||||||
|
|
||||||
|
if (argc != 4)
|
||||||
|
{
|
||||||
|
std::cerr << "usage: " << argv[0] << " <Ni> <Nj> <#matrices>";
|
||||||
|
std::cerr << std::endl;
|
||||||
|
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
ni = std::stoi(argv[1]);
|
||||||
|
nj = std::stoi(argv[2]);
|
||||||
|
nMat = std::stoi(argv[3]);
|
||||||
|
|
||||||
|
INIT();
|
||||||
|
GET_RANK(rank, nMpi);
|
||||||
|
if (rank == 0)
|
||||||
|
{
|
||||||
|
std::cout << "\n*** ALL-TO-ALL MATRIX CONTRACTION BENCHMARK ***\n" << std::endl;
|
||||||
|
std::cout << nMat << " couples of " << ni << "x" << nj << " matrices\n" << std::endl;
|
||||||
|
|
||||||
|
std::cout << nMpi << " MPI processes" << std::endl;
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp single
|
||||||
|
std::cout << omp_get_num_threads() << " threads\n" << std::endl;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
std::cout << "Single-threaded\n" << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_MKL_ALL
|
||||||
|
std::cout << "Eigen uses the MKL" << std::endl;
|
||||||
|
#endif
|
||||||
|
std::cout << "Eigen uses " << Eigen::nbThreads() << " threads" << std::endl;
|
||||||
|
#ifdef USE_MKL
|
||||||
|
std::cout << "MKL uses " << mkl_get_max_threads() << " threads" << std::endl;
|
||||||
|
#endif
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
|
||||||
|
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
|
||||||
|
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
|
||||||
|
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
|
||||||
|
fullMulBenchmark<A2AMatrix<ComplexD>>(ni, nj, nMat);
|
||||||
|
fullMulBenchmark<A2AMatrixTr<ComplexD>>(ni, nj, nMat);
|
||||||
|
FINALIZE();
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32
|
bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32 HadronsContractor HadronsContractorBenchmark
|
||||||
|
|
||||||
HadronsXmlRun_SOURCES = HadronsXmlRun.cc
|
HadronsXmlRun_SOURCES = HadronsXmlRun.cc
|
||||||
HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a
|
HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a
|
||||||
@ -6,3 +6,9 @@ HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a
|
|||||||
HadronsFermionEP64To32_SOURCES = EigenPackCast.cc
|
HadronsFermionEP64To32_SOURCES = EigenPackCast.cc
|
||||||
HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
|
HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
|
||||||
HadronsFermionEP64To32_LDADD = ../libHadrons.a ../../Grid/libGrid.a
|
HadronsFermionEP64To32_LDADD = ../libHadrons.a ../../Grid/libGrid.a
|
||||||
|
|
||||||
|
HadronsContractor_SOURCES = Contractor.cc
|
||||||
|
HadronsContractor_LDADD = ../libHadrons.a ../../Grid/libGrid.a
|
||||||
|
|
||||||
|
HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc
|
||||||
|
HadronsContractorBenchmark_LDADD = ../libHadrons.a ../../Grid/libGrid.a
|
||||||
|
@ -20,7 +20,7 @@ modules_cc =\
|
|||||||
Modules/MSink/Point.cc \
|
Modules/MSink/Point.cc \
|
||||||
Modules/MSink/Smear.cc \
|
Modules/MSink/Smear.cc \
|
||||||
Modules/MSolver/A2AVectors.cc \
|
Modules/MSolver/A2AVectors.cc \
|
||||||
Modules/MSolver/A2AAslashVector.cc \
|
Modules/MSolver/A2AAslashVectors.cc \
|
||||||
Modules/MSolver/RBPrecCG.cc \
|
Modules/MSolver/RBPrecCG.cc \
|
||||||
Modules/MSolver/MixedPrecisionRBPrecCG.cc \
|
Modules/MSolver/MixedPrecisionRBPrecCG.cc \
|
||||||
Modules/MSolver/LocalCoherenceLanczos.cc \
|
Modules/MSolver/LocalCoherenceLanczos.cc \
|
||||||
@ -97,7 +97,7 @@ modules_hpp =\
|
|||||||
Modules/MSolver/Guesser.hpp \
|
Modules/MSolver/Guesser.hpp \
|
||||||
Modules/MSolver/RBPrecCG.hpp \
|
Modules/MSolver/RBPrecCG.hpp \
|
||||||
Modules/MSolver/A2AVectors.hpp \
|
Modules/MSolver/A2AVectors.hpp \
|
||||||
Modules/MSolver/A2AAslashVector.hpp \
|
Modules/MSolver/A2AAslashVectors.hpp \
|
||||||
Modules/MGauge/UnitEm.hpp \
|
Modules/MGauge/UnitEm.hpp \
|
||||||
Modules/MGauge/StoutSmearing.hpp \
|
Modules/MGauge/StoutSmearing.hpp \
|
||||||
Modules/MGauge/Unit.hpp \
|
Modules/MGauge/Unit.hpp \
|
||||||
|
24
configure.ac
24
configure.ac
@ -123,10 +123,13 @@ case ${ac_SFW_FP16} in
|
|||||||
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
|
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
############### MKL
|
############### Intel libraries
|
||||||
AC_ARG_ENABLE([mkl],
|
AC_ARG_ENABLE([mkl],
|
||||||
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
|
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
|
||||||
[ac_MKL=${enable_mkl}], [ac_MKL=no])
|
[ac_MKL=${enable_mkl}], [ac_MKL=no])
|
||||||
|
AC_ARG_ENABLE([ipp],
|
||||||
|
[AC_HELP_STRING([--enable-ipp=yes|no|prefix], [enable Intel IPP for fast CRC32C])],
|
||||||
|
[ac_IPP=${enable_mkl}], [ac_IPP=no])
|
||||||
|
|
||||||
case ${ac_MKL} in
|
case ${ac_MKL} in
|
||||||
no)
|
no)
|
||||||
@ -139,6 +142,17 @@ case ${ac_MKL} in
|
|||||||
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
case ${ac_IPP} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([USE_IPP], [1], [Define to 1 if you use the Intel IPP]);;
|
||||||
|
*)
|
||||||
|
AM_CXXFLAGS="-I$ac_IPP/include $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="-L$ac_IPP/lib $AM_LDFLAGS"
|
||||||
|
AC_DEFINE([USE_IPP], [1], [Define to 1 if you use the Intel IPP]);;
|
||||||
|
esac
|
||||||
|
|
||||||
############### HDF5
|
############### HDF5
|
||||||
AC_ARG_WITH([hdf5],
|
AC_ARG_WITH([hdf5],
|
||||||
[AS_HELP_STRING([--with-hdf5=prefix],
|
[AS_HELP_STRING([--with-hdf5=prefix],
|
||||||
@ -170,7 +184,13 @@ AC_CHECK_FUNCS([gettimeofday])
|
|||||||
|
|
||||||
if test "${ac_MKL}x" != "nox"; then
|
if test "${ac_MKL}x" != "nox"; then
|
||||||
AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [],
|
AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [],
|
||||||
[AC_MSG_ERROR("MKL enabled but library not found")])
|
[AC_MSG_ERROR("Intel MKL enabled but library not found")])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "${ac_IPP}x" != "nox"; then
|
||||||
|
AC_SEARCH_LIBS([ippsCRC32C_8u], [ippdc],
|
||||||
|
[LIBS="${LIBS} -lippdc -lippvm -lipps -lippcore"],
|
||||||
|
[AC_MSG_ERROR("Intel IPP enabled but library not found")])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||||
|
Loading…
Reference in New Issue
Block a user