1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 15:55:37 +00:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
fionnoh 2018-11-23 14:08:29 +00:00
commit b74940b3d4
25 changed files with 1506 additions and 145 deletions

View File

@ -487,7 +487,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
template<class vobj> template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;

View File

@ -61,9 +61,9 @@ Group & Hdf5Writer::getGroup(void)
} }
// Reader implementation /////////////////////////////////////////////////////// // Reader implementation ///////////////////////////////////////////////////////
Hdf5Reader::Hdf5Reader(const std::string &fileName) Hdf5Reader::Hdf5Reader(const std::string &fileName, const bool readOnly)
: fileName_(fileName) : fileName_(fileName)
, file_(fileName.c_str(), H5F_ACC_RDWR) , file_(fileName.c_str(), readOnly ? H5F_ACC_RDONLY : H5F_ACC_RDWR)
{ {
group_ = file_.openGroup("/"); group_ = file_.openGroup("/");
readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold",

View File

@ -54,7 +54,7 @@ namespace Grid
class Hdf5Reader: public Reader<Hdf5Reader> class Hdf5Reader: public Reader<Hdf5Reader>
{ {
public: public:
Hdf5Reader(const std::string &fileName); Hdf5Reader(const std::string &fileName, const bool readOnly = true);
virtual ~Hdf5Reader(void) = default; virtual ~Hdf5Reader(void) = default;
bool push(const std::string &s); bool push(const std::string &s);
void pop(void); void pop(void);

View File

@ -47,6 +47,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#else #else
#define PARALLEL_FOR_LOOP #define PARALLEL_FOR_LOOP
#define PARALLEL_FOR_LOOP_INTERN #define PARALLEL_FOR_LOOP_INTERN
#define PARALLEL_FOR_LOOP_REDUCE(op, var)
#define PARALLEL_NESTED_LOOP2 #define PARALLEL_NESTED_LOOP2
#define PARALLEL_NESTED_LOOP5 #define PARALLEL_NESTED_LOOP5
#define PARALLEL_REGION #define PARALLEL_REGION
@ -58,6 +59,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for #define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
#define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for #define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for
#define parallel_critical PARALLEL_CRITICAL
namespace Grid { namespace Grid {

View File

@ -28,16 +28,31 @@
extern "C" { extern "C" {
#include <openssl/sha.h> #include <openssl/sha.h>
} }
#ifdef USE_IPP
#include "ipp.h"
#endif
#pragma once #pragma once
class GridChecksum class GridChecksum
{ {
public: public:
static inline uint32_t crc32(void *data,size_t bytes) static inline uint32_t crc32(const void *data, size_t bytes)
{ {
return ::crc32(0L,(unsigned char *)data,bytes); return ::crc32(0L,(unsigned char *)data,bytes);
} }
#ifdef USE_IPP
static inline uint32_t crc32c(const void* data, size_t bytes)
{
uint32_t crc32c = ~(uint32_t)0;
ippsCRC32C_8u(reinterpret_cast<const unsigned char *>(data), bytes, &crc32c);
ippsSwapBytes_32u_I(&crc32c, 1);
return ~crc32c;
}
#endif
template <typename T> template <typename T>
static inline std::string sha256_string(const std::vector<T> &hash) static inline std::string sha256_string(const std::vector<T> &hash)
{ {

View File

@ -32,11 +32,19 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Hadrons/Global.hpp> #include <Hadrons/Global.hpp>
#include <Hadrons/TimerArray.hpp> #include <Hadrons/TimerArray.hpp>
#include <Grid/Eigen/unsupported/CXX11/Tensor> #include <Grid/Eigen/unsupported/CXX11/Tensor>
#ifdef USE_MKL
#include "mkl.h"
#include "mkl_cblas.h"
#endif
#ifndef HADRONS_A2AM_NAME #ifndef HADRONS_A2AM_NAME
#define HADRONS_A2AM_NAME "a2aMatrix" #define HADRONS_A2AM_NAME "a2aMatrix"
#endif #endif
#ifndef HADRONS_A2AM_IO_TYPE
#define HADRONS_A2AM_IO_TYPE ComplexF
#endif
#define HADRONS_A2AM_PARALLEL_IO #define HADRONS_A2AM_PARALLEL_IO
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
@ -51,6 +59,12 @@ BEGIN_HADRONS_NAMESPACE
template <typename T> template <typename T>
using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>; using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
template <typename T>
using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
template <typename T>
using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
/****************************************************************************** /******************************************************************************
* Abstract class for A2A kernels * * Abstract class for A2A kernels *
******************************************************************************/ ******************************************************************************/
@ -76,10 +90,15 @@ public:
// constructors // constructors
A2AMatrixIo(void) = default; A2AMatrixIo(void) = default;
A2AMatrixIo(std::string filename, std::string dataname, A2AMatrixIo(std::string filename, std::string dataname,
const unsigned int nt, const unsigned int ni, const unsigned int nt, const unsigned int ni = 0,
const unsigned int nj); const unsigned int nj = 0);
// destructor // destructor
~A2AMatrixIo(void) = default; ~A2AMatrixIo(void) = default;
// access
unsigned int getNi(void) const;
unsigned int getNj(void) const;
unsigned int getNt(void) const;
size_t getSize(void) const;
// file allocation // file allocation
template <typename MetadataType> template <typename MetadataType>
void initFile(const MetadataType &d, const unsigned int chunkSize); void initFile(const MetadataType &d, const unsigned int chunkSize);
@ -88,9 +107,11 @@ public:
const unsigned int blockSizei, const unsigned int blockSizej); const unsigned int blockSizei, const unsigned int blockSizej);
void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str, void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
const unsigned int i, const unsigned int j); const unsigned int i, const unsigned int j);
template <template <class> class Vec, typename VecT>
void load(Vec<VecT> &v, double *tRead = nullptr);
private: private:
std::string filename_, dataname_; std::string filename_{""}, dataname_{""};
unsigned int nt_, ni_, nj_; unsigned int nt_{0}, ni_{0}, nj_{0};
}; };
/****************************************************************************** /******************************************************************************
@ -136,6 +157,226 @@ private:
std::vector<IoHelper> nodeIo_; std::vector<IoHelper> nodeIo_;
}; };
/******************************************************************************
* A2A matrix contraction kernels *
******************************************************************************/
class A2AContraction
{
public:
// accTrMul(acc, a, b): acc += tr(a*b)
template <typename C, typename MatLeft, typename MatRight>
static inline void accTrMul(C &acc, const MatLeft &a, const MatRight &b)
{
if ((MatLeft::Options == Eigen::RowMajor) and
(MatRight::Options == Eigen::ColMajor))
{
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
{
C tmp;
#ifdef USE_MKL
dotuRow(tmp, r, a, b);
#else
tmp = a.row(r).conjugate().dot(b.col(r));
#endif
parallel_critical
{
acc += tmp;
}
}
}
else
{
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
{
C tmp;
#ifdef USE_MKL
dotuCol(tmp, c, a, b);
#else
tmp = a.col(c).conjugate().dot(b.row(c));
#endif
parallel_critical
{
acc += tmp;
}
}
}
}
template <typename MatLeft, typename MatRight>
static inline double accTrMulFlops(const MatLeft &a, const MatRight &b)
{
double n = a.rows()*a.cols();
return 8.*n;
}
// mul(res, a, b): res = a*b
#ifdef USE_MKL
template <template <class, int...> class Mat, int... Opts>
static inline void mul(Mat<ComplexD, Opts...> &res,
const Mat<ComplexD, Opts...> &a,
const Mat<ComplexD, Opts...> &b)
{
static const ComplexD one(1., 0.), zero(0., 0.);
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
{
res.resize(a.rows(), b.cols());
}
if (Mat<ComplexD, Opts...>::Options == Eigen::RowMajor)
{
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat<ComplexD, Opts...>::Options == Eigen::ColMajor)
{
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
}
template <template <class, int...> class Mat, int... Opts>
static inline void mul(Mat<ComplexF, Opts...> &res,
const Mat<ComplexF, Opts...> &a,
const Mat<ComplexF, Opts...> &b)
{
static const ComplexF one(1., 0.), zero(0., 0.);
if ((res.rows() != a.rows()) or (res.cols() != b.cols()))
{
res.resize(a.rows(), b.cols());
}
if (Mat<ComplexF, Opts...>::Options == Eigen::RowMajor)
{
cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat<ComplexF, Opts...>::Options == Eigen::ColMajor)
{
cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
}
#else
template <typename Mat>
static inline void mul(Mat &res, const Mat &a, const Mat &b)
{
res = a*b;
}
#endif
template <typename Mat>
static inline double mulFlops(const Mat &a, const Mat &b)
{
double nr = a.rows(), nc = a.cols();
return nr*nr*(6.*nc + 2.*(nc - 1.));
}
private:
template <typename C, typename MatLeft, typename MatRight>
static inline void makeDotRowPt(C * &aPt, unsigned int &aInc, C * &bPt,
unsigned int &bInc, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aRow*a.cols();
aInc = 1;
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aRow;
aInc = a.rows();
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aRow;
bInc = b.cols();
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aRow*b.rows();
bInc = 1;
}
}
#ifdef USE_MKL
template <typename C, typename MatLeft, typename MatRight>
static inline void makeDotColPt(C * &aPt, unsigned int &aInc, C * &bPt,
unsigned int &bInc, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aCol;
aInc = a.cols();
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aCol*a.rows();
aInc = 1;
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aCol*b.cols();
bInc = 1;
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aCol;
bInc = b.rows();
}
}
template <typename MatLeft, typename MatRight>
static inline void dotuRow(ComplexF &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexF *aPt, *bPt;
unsigned int aInc, bInc;
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
cblas_cdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuCol(ComplexF &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexF *aPt, *bPt;
unsigned int aInc, bInc;
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
cblas_cdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuRow(ComplexD &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void dotuCol(ComplexD &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
#endif
};
/****************************************************************************** /******************************************************************************
* A2AMatrixIo template implementation * * A2AMatrixIo template implementation *
******************************************************************************/ ******************************************************************************/
@ -148,6 +389,31 @@ A2AMatrixIo<T>::A2AMatrixIo(std::string filename, std::string dataname,
, nt_(nt), ni_(ni), nj_(nj) , nt_(nt), ni_(ni), nj_(nj)
{} {}
// access //////////////////////////////////////////////////////////////////////
template <typename T>
unsigned int A2AMatrixIo<T>::getNt(void) const
{
return nt_;
}
template <typename T>
unsigned int A2AMatrixIo<T>::getNi(void) const
{
return ni_;
}
template <typename T>
unsigned int A2AMatrixIo<T>::getNj(void) const
{
return nj_;
}
template <typename T>
size_t A2AMatrixIo<T>::getSize(void) const
{
return nt_*ni_*nj_*sizeof(T);
}
// file allocation ///////////////////////////////////////////////////////////// // file allocation /////////////////////////////////////////////////////////////
template <typename T> template <typename T>
template <typename MetadataType> template <typename MetadataType>
@ -171,7 +437,7 @@ void A2AMatrixIo<T>::initFile(const MetadataType &d, const unsigned int chunkSiz
} }
// create the dataset // create the dataset
Hdf5Reader reader(filename_); Hdf5Reader reader(filename_, false);
push(reader, dataname_); push(reader, dataname_);
auto &group = reader.getGroup(); auto &group = reader.getGroup();
@ -191,7 +457,7 @@ void A2AMatrixIo<T>::saveBlock(const T *data,
const unsigned int blockSizej) const unsigned int blockSizej)
{ {
#ifdef HAVE_HDF5 #ifdef HAVE_HDF5
Hdf5Reader reader(filename_); Hdf5Reader reader(filename_, false);
std::vector<hsize_t> count = {nt_, blockSizei, blockSizej}, std::vector<hsize_t> count = {nt_, blockSizei, blockSizej},
offset = {0, static_cast<hsize_t>(i), offset = {0, static_cast<hsize_t>(i),
static_cast<hsize_t>(j)}, static_cast<hsize_t>(j)},
@ -226,6 +492,82 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
saveBlock(m.data() + offset, i, j, blockSizei, blockSizej); saveBlock(m.data() + offset, i, j, blockSizei, blockSizej);
} }
template <typename T>
template <template <class> class Vec, typename VecT>
void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
{
#ifdef HAVE_HDF5
Hdf5Reader reader(filename_);
std::vector<hsize_t> hdim;
H5NS::DataSet dataset;
H5NS::DataSpace dataspace;
H5NS::CompType datatype;
H5NS::DSetCreatPropList plist;
push(reader, dataname_);
auto &group = reader.getGroup();
dataset = group.openDataSet(HADRONS_A2AM_NAME);
datatype = dataset.getCompType();
dataspace = dataset.getSpace();
plist = dataset.getCreatePlist();
hdim.resize(dataspace.getSimpleExtentNdims());
dataspace.getSimpleExtentDims(hdim.data());
if ((nt_*ni_*nj_ != 0) and
((hdim[0] != nt_) or (hdim[1] != ni_) or (hdim[2] != nj_)))
{
HADRONS_ERROR(Size, "all-to-all matrix size mismatch (got "
+ std::to_string(hdim[0]) + "x" + std::to_string(hdim[1]) + "x"
+ std::to_string(hdim[2]) + ", expected "
+ std::to_string(nt_) + "x" + std::to_string(ni_) + "x"
+ std::to_string(nj_));
}
else if (ni_*nj_ == 0)
{
if (hdim[0] != nt_)
{
HADRONS_ERROR(Size, "all-to-all time size mismatch (got "
+ std::to_string(hdim[0]) + ", expected "
+ std::to_string(nt_) + ")");
}
ni_ = hdim[1];
nj_ = hdim[2];
}
A2AMatrix<T> buf(ni_, nj_);
std::vector<hsize_t> count = {1, static_cast<hsize_t>(ni_),
static_cast<hsize_t>(nj_)},
stride = {1, 1, 1},
block = {1, 1, 1},
memCount = {static_cast<hsize_t>(ni_),
static_cast<hsize_t>(nj_)};
H5NS::DataSpace memspace(memCount.size(), memCount.data());
std::cout << "Loading timeslice";
std::cout.flush();
*tRead = 0.;
for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
{
unsigned int t = tp1 - 1;
std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
if (t % 10 == 0)
{
std::cout << " " << t;
std::cout.flush();
}
dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
stride.data(), block.data());
if (tRead) *tRead -= usecond();
dataset.read(buf.data(), datatype, memspace, dataspace);
if (tRead) *tRead += usecond();
v[t] = buf.template cast<VecT>();
}
std::cout << std::endl;
#else
HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
#endif
}
/****************************************************************************** /******************************************************************************
* A2AMatrixBlockComputation template implementation * * A2AMatrixBlockComputation template implementation *
******************************************************************************/ ******************************************************************************/

View File

@ -29,6 +29,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
#define Hadrons_DiskVector_hpp_ #define Hadrons_DiskVector_hpp_
#include <Hadrons/Global.hpp> #include <Hadrons/Global.hpp>
#include <Hadrons/A2AMatrix.hpp>
#include <deque> #include <deque>
#include <sys/stat.h> #include <sys/stat.h>
#include <ftw.h> #include <ftw.h>
@ -59,14 +60,18 @@ public:
: master_(master), cmaster_(master), i_(i) {} : master_(master), cmaster_(master), i_(i) {}
// operator=: somebody is trying to store a vector element // operator=: somebody is trying to store a vector element
// write to disk and cache // write to cache and tag as modified
T &operator=(const T &obj) const T &operator=(const T &obj) const
{ {
auto &cache = *master_.cachePtr_;
auto &modified = *master_.modifiedPtr_;
auto &index = *master_.indexPtr_;
DV_DEBUG_MSG(&master_, "writing to " << i_); DV_DEBUG_MSG(&master_, "writing to " << i_);
master_.cacheInsert(i_, obj); master_.cacheInsert(i_, obj);
master_.save(master_.filename(i_), obj); modified[index.at(i_)] = true;
return master_.cachePtr_->at(i_); return cache[index.at(i_)];
} }
// implicit cast to const object reference and redirection // implicit cast to const object reference and redirection
@ -83,6 +88,7 @@ public:
public: public:
DiskVectorBase(const std::string dirname, const unsigned int size = 0, DiskVectorBase(const std::string dirname, const unsigned int size = 0,
const unsigned int cacheSize = 1, const bool clean = true); const unsigned int cacheSize = 1, const bool clean = true);
DiskVectorBase(DiskVectorBase<T> &&v) = default;
virtual ~DiskVectorBase(void); virtual ~DiskVectorBase(void);
const T & operator[](const unsigned int i) const; const T & operator[](const unsigned int i) const;
RwAccessHelper operator[](const unsigned int i); RwAccessHelper operator[](const unsigned int i);
@ -103,7 +109,10 @@ private:
bool clean_; bool clean_;
// using pointers to allow modifications when class is const // using pointers to allow modifications when class is const
// semantic: const means data unmodified, but cache modification allowed // semantic: const means data unmodified, but cache modification allowed
std::unique_ptr<std::map<unsigned int, T>> cachePtr_; std::unique_ptr<std::vector<T>> cachePtr_;
std::unique_ptr<std::vector<bool>> modifiedPtr_;
std::unique_ptr<std::map<unsigned int, unsigned int>> indexPtr_;
std::unique_ptr<std::stack<unsigned int>> freePtr_;
std::unique_ptr<std::deque<unsigned int>> loadsPtr_; std::unique_ptr<std::deque<unsigned int>> loadsPtr_;
}; };
@ -135,7 +144,7 @@ private:
* Specialisation for Eigen matrices * * Specialisation for Eigen matrices *
******************************************************************************/ ******************************************************************************/
template <typename T> template <typename T>
using EigenDiskVectorMat = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>; using EigenDiskVectorMat = A2AMatrix<T>;
template <typename T> template <typename T>
class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>> class EigenDiskVector: public DiskVectorBase<EigenDiskVectorMat<T>>
@ -153,23 +162,30 @@ private:
virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
{ {
std::ifstream f(filename, std::ios::binary); std::ifstream f(filename, std::ios::binary);
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); uint32_t crc, check;
Eigen::Index nRow, nCol; Eigen::Index nRow, nCol;
size_t matSize; size_t matSize;
double t; double tRead, tHash;
f.read(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); f.read(reinterpret_cast<char *>(&crc), sizeof(crc));
f.read(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.read(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index)); f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol));
obj.resize(nRow, nCol); obj.resize(nRow, nCol);
matSize = nRow*nCol*sizeof(T); matSize = nRow*nCol*sizeof(T);
t = -usecond(); tRead = -usecond();
f.read(reinterpret_cast<char *>(obj.data()), matSize); f.read(reinterpret_cast<char *>(obj.data()), matSize);
t += usecond(); tRead += usecond();
DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s"); tHash = -usecond();
auto check = GridChecksum::sha256(obj.data(), matSize); #ifdef USE_IPP
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(check)); check = GridChecksum::crc32c(obj.data(), matSize);
if (hash != check) #else
check = GridChecksum::crc32(obj.data(), matSize);
#endif
tHash += usecond();
DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
if (crc != check)
{ {
HADRONS_ERROR(Io, "checksum failed") HADRONS_ERROR(Io, "checksum failed")
} }
@ -178,23 +194,30 @@ private:
virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
{ {
std::ofstream f(filename, std::ios::binary); std::ofstream f(filename, std::ios::binary);
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); uint32_t crc;
Eigen::Index nRow, nCol; Eigen::Index nRow, nCol;
size_t matSize; size_t matSize;
double t; double tWrite, tHash;
nRow = obj.rows(); nRow = obj.rows();
nCol = obj.cols(); nCol = obj.cols();
matSize = nRow*nCol*sizeof(T); matSize = nRow*nCol*sizeof(T);
hash = GridChecksum::sha256(obj.data(), matSize); tHash = -usecond();
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(hash)); #ifdef USE_IPP
f.write(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); crc = GridChecksum::crc32c(obj.data(), matSize);
f.write(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); #else
f.write(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index)); crc = GridChecksum::crc32(obj.data(), matSize);
t = -usecond(); #endif
tHash += usecond();
f.write(reinterpret_cast<char *>(&crc), sizeof(crc));
f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol));
tWrite = -usecond();
f.write(reinterpret_cast<const char *>(obj.data()), matSize); f.write(reinterpret_cast<const char *>(obj.data()), matSize);
t += usecond(); tWrite += usecond();
DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s"); DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec
<< " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s");
} }
}; };
@ -207,7 +230,10 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
const unsigned int cacheSize, const unsigned int cacheSize,
const bool clean) const bool clean)
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean) : dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
, cachePtr_(new std::map<unsigned int, T>()) , cachePtr_(new std::vector<T>(size))
, modifiedPtr_(new std::vector<bool>(size, false))
, indexPtr_(new std::map<unsigned int, unsigned int>())
, freePtr_(new std::stack<unsigned int>)
, loadsPtr_(new std::deque<unsigned int>()) , loadsPtr_(new std::deque<unsigned int>())
{ {
struct stat s; struct stat s;
@ -217,6 +243,10 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
HADRONS_ERROR(Io, "directory '" + dirname + "' already exists") HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
} }
mkdir(dirname); mkdir(dirname);
for (unsigned int i = 0; i < cacheSize_; ++i)
{
freePtr_->push(i);
}
} }
template <typename T> template <typename T>
@ -232,6 +262,8 @@ template <typename T>
const T & DiskVectorBase<T>::operator[](const unsigned int i) const const T & DiskVectorBase<T>::operator[](const unsigned int i) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
DV_DEBUG_MSG(this, "accessing " << i << " (RO)"); DV_DEBUG_MSG(this, "accessing " << i << " (RO)");
@ -241,7 +273,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
HADRONS_ERROR(Size, "index out of range"); HADRONS_ERROR(Size, "index out of range");
} }
const_cast<double &>(access_)++; const_cast<double &>(access_)++;
if (cache.find(i) == cache.end()) if (index.find(i) == index.end())
{ {
// cache miss // cache miss
DV_DEBUG_MSG(this, "cache miss"); DV_DEBUG_MSG(this, "cache miss");
@ -268,7 +300,7 @@ const T & DiskVectorBase<T>::operator[](const unsigned int i) const
DV_DEBUG_MSG(this, "in cache: " << msg); DV_DEBUG_MSG(this, "in cache: " << msg);
#endif #endif
return cache.at(i); return cache[index.at(i)];
} }
template <typename T> template <typename T>
@ -307,12 +339,23 @@ template <typename T>
void DiskVectorBase<T>::evict(void) const void DiskVectorBase<T>::evict(void) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &modified = *modifiedPtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
if (cache.size() >= cacheSize_) if (index.size() >= cacheSize_)
{ {
DV_DEBUG_MSG(this, "evicting " << loads.front()); unsigned int i = loads.front();
cache.erase(loads.front());
DV_DEBUG_MSG(this, "evicting " << i);
if (modified[index.at(i)])
{
DV_DEBUG_MSG(this, "element " << i << " modified, saving to disk");
save(filename(i), cache[index.at(i)]);
}
freeInd.push(index.at(i));
index.erase(i);
loads.pop_front(); loads.pop_front();
} }
} }
@ -321,29 +364,43 @@ template <typename T>
void DiskVectorBase<T>::fetch(const unsigned int i) const void DiskVectorBase<T>::fetch(const unsigned int i) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &modified = *modifiedPtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
struct stat s; struct stat s;
DV_DEBUG_MSG(this, "loading " << i << " from disk"); DV_DEBUG_MSG(this, "loading " << i << " from disk");
evict(); evict();
if(stat(filename(i).c_str(), &s) != 0) if(stat(filename(i).c_str(), &s) != 0)
{ {
HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised"); HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised");
} }
load(cache[i], filename(i)); index[i] = freeInd.top();
freeInd.pop();
load(cache[index.at(i)], filename(i));
loads.push_back(i); loads.push_back(i);
modified[index.at(i)] = false;
} }
template <typename T> template <typename T>
void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &modified = *modifiedPtr_;
auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_; auto &loads = *loadsPtr_;
evict(); evict();
cache[i] = obj; index[i] = freeInd.top();
freeInd.pop();
cache[index.at(i)] = obj;
loads.push_back(i); loads.push_back(i);
modified[index.at(i)] = false;
#ifdef DV_DEBUG #ifdef DV_DEBUG
std::string msg; std::string msg;

View File

@ -166,7 +166,13 @@ std::string Hadrons::dirname(const std::string &s)
void Hadrons::makeFileDir(const std::string filename, GridBase *g) void Hadrons::makeFileDir(const std::string filename, GridBase *g)
{ {
if (g->IsBoss()) bool doIt = true;
if (g)
{
doIt = g->IsBoss();
}
if (doIt)
{ {
std::string dir = dirname(filename); std::string dir = dirname(filename);
int status = mkdir(dir); int status = mkdir(dir);

View File

@ -32,6 +32,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <set> #include <set>
#include <stack> #include <stack>
#include <regex>
#include <Grid/Grid.h> #include <Grid/Grid.h>
#include <cxxabi.h> #include <cxxabi.h>
@ -217,15 +218,15 @@ typedef XmlReader ResultReader;
typedef XmlWriter ResultWriter; typedef XmlWriter ResultWriter;
#endif #endif
#define RESULT_FILE_NAME(name) \ #define RESULT_FILE_NAME(name, traj) \
name + "." + std::to_string(vm().getTrajectory()) + "." + resultFileExt name + "." + std::to_string(traj) + "." + resultFileExt
// recursive mkdir // recursive mkdir
#define MAX_PATH_LENGTH 512u #define MAX_PATH_LENGTH 512u
int mkdir(const std::string dirName); int mkdir(const std::string dirName);
std::string basename(const std::string &s); std::string basename(const std::string &s);
std::string dirname(const std::string &s); std::string dirname(const std::string &s);
void makeFileDir(const std::string filename, GridBase *g); void makeFileDir(const std::string filename, GridBase *g = nullptr);
// default Schur convention // default Schur convention
#ifndef HADRONS_DEFAULT_SCHUR #ifndef HADRONS_DEFAULT_SCHUR
@ -248,6 +249,20 @@ void makeFileDir(const std::string filename, GridBase *g);
// pretty print time profile // pretty print time profile
void printTimeProfile(const std::map<std::string, GridTime> &timing, GridTime total); void printTimeProfile(const std::map<std::string, GridTime> &timing, GridTime total);
// token replacement utility
template <typename T>
void tokenReplace(std::string &str, const std::string token,
const T &x, const std::string mark = "@")
{
std::string fullToken = mark + token + mark;
auto pos = str.find(fullToken);
if (pos != std::string::npos)
{
str.replace(pos, fullToken.size(), std::to_string(x));
}
}
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#include <Hadrons/Exceptions.hpp> #include <Hadrons/Exceptions.hpp>

View File

@ -5,17 +5,17 @@ lib_LIBRARIES = libHadrons.a
include modules.inc include modules.inc
libHadrons_a_SOURCES = \ libHadrons_a_SOURCES = \
$(modules_cc) \
Application.cc \ Application.cc \
Environment.cc \ Environment.cc \
Exceptions.cc \ Exceptions.cc \
Global.cc \ Global.cc \
Module.cc \ Module.cc \
TimerArray.cc \ TimerArray.cc \
VirtualMachine.cc VirtualMachine.cc \
$(modules_cc)
libHadrons_adir = $(includedir)/Hadrons libHadrons_adir = $(includedir)/Hadrons
nobase_libHadrons_a_HEADERS = \ nobase_libHadrons_a_HEADERS = \
$(modules_hpp) \
A2AVectors.hpp \ A2AVectors.hpp \
A2AMatrix.hpp \ A2AMatrix.hpp \
Application.hpp \ Application.hpp \
@ -33,4 +33,5 @@ nobase_libHadrons_a_HEADERS = \
ModuleFactory.hpp \ ModuleFactory.hpp \
Solver.hpp \ Solver.hpp \
TimerArray.hpp \ TimerArray.hpp \
VirtualMachine.hpp VirtualMachine.hpp \
$(modules_hpp)

View File

@ -144,7 +144,7 @@ if (env().getGrid()->IsBoss() and !ioStem.empty())\
{\ {\
makeFileDir(ioStem, env().getGrid());\ makeFileDir(ioStem, env().getGrid());\
{\ {\
ResultWriter _writer(RESULT_FILE_NAME(ioStem));\ ResultWriter _writer(RESULT_FILE_NAME(ioStem, vm().getTrajectory()));\
write(_writer, name, result);\ write(_writer, name, result);\
}\ }\
} }

View File

@ -24,7 +24,7 @@
#include <Hadrons/Modules/MSolver/Guesser.hpp> #include <Hadrons/Modules/MSolver/Guesser.hpp>
#include <Hadrons/Modules/MSolver/RBPrecCG.hpp> #include <Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Hadrons/Modules/MSolver/A2AVectors.hpp> #include <Hadrons/Modules/MSolver/A2AVectors.hpp>
#include <Hadrons/Modules/MSolver/A2AAslashVector.hpp> #include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
#include <Hadrons/Modules/MGauge/UnitEm.hpp> #include <Hadrons/Modules/MGauge/UnitEm.hpp>
#include <Hadrons/Modules/MGauge/StoutSmearing.hpp> #include <Hadrons/Modules/MGauge/StoutSmearing.hpp>
#include <Hadrons/Modules/MGauge/Unit.hpp> #include <Hadrons/Modules/MGauge/Unit.hpp>

View File

@ -33,10 +33,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Hadrons/ModuleFactory.hpp> #include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/A2AMatrix.hpp> #include <Hadrons/A2AMatrix.hpp>
#ifndef ASF_IO_TYPE
#define ASF_IO_TYPE ComplexF
#endif
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
/****************************************************************************** /******************************************************************************
@ -113,7 +109,7 @@ public:
typedef A2AMatrixBlockComputation<Complex, typedef A2AMatrixBlockComputation<Complex,
FermionField, FermionField,
A2AAslashFieldMetadata, A2AAslashFieldMetadata,
ASF_IO_TYPE> Computation; HADRONS_A2AM_IO_TYPE> Computation;
typedef AslashFieldKernel<Complex, FImpl> Kernel; typedef AslashFieldKernel<Complex, FImpl> Kernel;
public: public:
// constructor // constructor
@ -196,7 +192,7 @@ void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
LOG(Message) << " " << name << std::endl; LOG(Message) << " " << name << std::endl;
} }
LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(ASF_IO_TYPE)) << " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE))
<< "/EM field)" << std::endl; << "/EM field)" << std::endl;
// preparing "B" complexified fields // preparing "B" complexified fields

View File

@ -35,10 +35,6 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Hadrons/ModuleFactory.hpp> #include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/A2AMatrix.hpp> #include <Hadrons/A2AMatrix.hpp>
#ifndef MF_IO_TYPE
#define MF_IO_TYPE ComplexF
#endif
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
/****************************************************************************** /******************************************************************************
@ -118,7 +114,7 @@ public:
typedef A2AMatrixBlockComputation<Complex, typedef A2AMatrixBlockComputation<Complex,
FermionField, FermionField,
A2AMesonFieldMetadata, A2AMesonFieldMetadata,
MF_IO_TYPE> Computation; HADRONS_A2AM_IO_TYPE> Computation;
typedef MesonFieldKernel<Complex, FImpl> Kernel; typedef MesonFieldKernel<Complex, FImpl> Kernel;
public: public:
// constructor // constructor
@ -248,7 +244,7 @@ void TA2AMesonField<FImpl>::execute(void)
LOG(Message) << " " << g << std::endl; LOG(Message) << " " << g << std::endl;
} }
LOG(Message) << "Meson field size: " << nt << "*" << N_i << "*" << N_j LOG(Message) << "Meson field size: " << nt << "*" << N_i << "*" << N_j
<< " (filesize " << sizeString(nt*N_i*N_j*sizeof(MF_IO_TYPE)) << " (filesize " << sizeString(nt*N_i*N_j*sizeof(HADRONS_A2AM_IO_TYPE))
<< "/momentum/bilinear)" << std::endl; << "/momentum/bilinear)" << std::endl;
auto &ph = envGet(std::vector<ComplexField>, momphName_); auto &ph = envGet(std::vector<ComplexField>, momphName_);

View File

@ -146,7 +146,7 @@ void TChargedProp::execute(void)
std::vector<int> siteCoor; std::vector<int> siteCoor;
LOG(Message) << "Saving momentum-projected propagator to '" LOG(Message) << "Saving momentum-projected propagator to '"
<< RESULT_FILE_NAME(par().output) << "'..." << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
<< std::endl; << std::endl;
result.projection.resize(par().outputMom.size()); result.projection.resize(par().outputMom.size());
result.lattice_size = env().getGrid()->_fdimensions; result.lattice_size = env().getGrid()->_fdimensions;

View File

@ -462,7 +462,7 @@ void TScalarVP::execute(void)
if (!par().output.empty()) if (!par().output.empty())
{ {
LOG(Message) << "Saving momentum-projected HVP to '" LOG(Message) << "Saving momentum-projected HVP to '"
<< RESULT_FILE_NAME(par().output) << "'..." << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
<< std::endl; << std::endl;
saveResult(par().output, "HVP", outputData); saveResult(par().output, "HVP", outputData);
} }

View File

@ -239,7 +239,7 @@ void TVPCounterTerms::execute(void)
if (!par().output.empty()) if (!par().output.empty())
{ {
LOG(Message) << "Saving momentum-projected correlators to '" LOG(Message) << "Saving momentum-projected correlators to '"
<< RESULT_FILE_NAME(par().output) << "'..." << RESULT_FILE_NAME(par().output, vm().getTrajectory()) << "'..."
<< std::endl; << std::endl;
saveResult(par().output, "scalar_loops", outputData); saveResult(par().output, "scalar_loops", outputData);
} }

View File

@ -2,7 +2,7 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MSolver/A2AAslashVector.cc Source file: Hadrons/Modules/MSolver/A2AAslashVectors.cc
Copyright (C) 2015-2018 Copyright (C) 2015-2018
@ -25,11 +25,11 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Hadrons/Modules/MSolver/A2AAslashVector.hpp> #include <Hadrons/Modules/MSolver/A2AAslashVectors.hpp>
using namespace Grid; using namespace Grid;
using namespace Hadrons; using namespace Hadrons;
using namespace MSolver; using namespace MSolver;
template class Grid::Hadrons::MSolver::TA2AAslashVector<FIMPL>; template class Grid::Hadrons::MSolver::TA2AAslashVectors<FIMPL>;
template class Grid::Hadrons::MSolver::TA2AAslashVector<ZFIMPL>; template class Grid::Hadrons::MSolver::TA2AAslashVectors<ZFIMPL>;

View File

@ -2,7 +2,7 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Modules/MSolver/A2AAslashVector.hpp Source file: Hadrons/Modules/MSolver/A2AAslashVectors.hpp
Copyright (C) 2015-2018 Copyright (C) 2015-2018
@ -25,13 +25,14 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_MSolver_A2AAslashVector_hpp_ #ifndef Hadrons_MSolver_A2AAslashVectors_hpp_
#define Hadrons_MSolver_A2AAslashVector_hpp_ #define Hadrons_MSolver_A2AAslashVectors_hpp_
#include <Hadrons/Global.hpp> #include <Hadrons/Global.hpp>
#include <Hadrons/Module.hpp> #include <Hadrons/Module.hpp>
#include <Hadrons/ModuleFactory.hpp> #include <Hadrons/ModuleFactory.hpp>
#include <Hadrons/Solver.hpp> #include <Hadrons/Solver.hpp>
#include <Hadrons/A2AVectors.hpp>
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
@ -54,18 +55,19 @@ BEGIN_MODULE_NAMESPACE(MSolver)
* *
*****************************************************************************/ *****************************************************************************/
class A2AAslashVectorsPar: Serializable
class A2AAslashVectorPar: Serializable
{ {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashVectorPar, GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashVectorsPar,
std::string, vector, std::string, vector,
std::string, emField, std::string, emField,
std::string, solver); std::string, solver,
std::string, output,
bool, multiFile);
}; };
template <typename FImpl> template <typename FImpl>
class TA2AAslashVector : public Module<A2AAslashVectorPar> class TA2AAslashVectors : public Module<A2AAslashVectorsPar>
{ {
public: public:
FERM_TYPE_ALIASES(FImpl,); FERM_TYPE_ALIASES(FImpl,);
@ -74,9 +76,9 @@ public:
typedef PhotonR::GaugeField EmField; typedef PhotonR::GaugeField EmField;
public: public:
// constructor // constructor
TA2AAslashVector(const std::string name); TA2AAslashVectors(const std::string name);
// destructor // destructor
virtual ~TA2AAslashVector(void) {}; virtual ~TA2AAslashVectors(void) {};
// dependency relation // dependency relation
virtual std::vector<std::string> getInput(void); virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void); virtual std::vector<std::string> getOutput(void);
@ -88,21 +90,21 @@ private:
unsigned int Ls_; unsigned int Ls_;
}; };
MODULE_REGISTER_TMP(A2AAslashVector,TA2AAslashVector<FIMPL>, MSolver); MODULE_REGISTER_TMP(A2AAslashVectors, TA2AAslashVectors<FIMPL>, MSolver);
MODULE_REGISTER_TMP(ZA2AAslashVector,TA2AAslashVector<ZFIMPL>, MSolver); MODULE_REGISTER_TMP(ZA2AAslashVectors, TA2AAslashVectors<ZFIMPL>, MSolver);
/****************************************************************************** /******************************************************************************
* TA2AAslashVector implementation * * TA2AAslashVectors implementation *
******************************************************************************/ ******************************************************************************/
// constructor ///////////////////////////////////////////////////////////////// // constructor /////////////////////////////////////////////////////////////////
template <typename FImpl> template <typename FImpl>
TA2AAslashVector<FImpl>::TA2AAslashVector(const std::string name) TA2AAslashVectors<FImpl>::TA2AAslashVectors(const std::string name)
: Module<A2AAslashVectorPar>(name) : Module<A2AAslashVectorsPar>(name)
{} {}
// dependencies/products /////////////////////////////////////////////////////// // dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl> template <typename FImpl>
std::vector<std::string> TA2AAslashVector<FImpl>::getInput(void) std::vector<std::string> TA2AAslashVectors<FImpl>::getInput(void)
{ {
std::vector<std::string> in = {par().vector, par().emField, par().solver}; std::vector<std::string> in = {par().vector, par().emField, par().solver};
@ -110,7 +112,7 @@ std::vector<std::string> TA2AAslashVector<FImpl>::getInput(void)
} }
template <typename FImpl> template <typename FImpl>
std::vector<std::string> TA2AAslashVector<FImpl>::getOutput(void) std::vector<std::string> TA2AAslashVectors<FImpl>::getOutput(void)
{ {
std::vector<std::string> out = {getName()}; std::vector<std::string> out = {getName()};
@ -119,7 +121,7 @@ std::vector<std::string> TA2AAslashVector<FImpl>::getOutput(void)
// setup /////////////////////////////////////////////////////////////////////// // setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl> template <typename FImpl>
void TA2AAslashVector<FImpl>::setup(void) void TA2AAslashVectors<FImpl>::setup(void)
{ {
Ls_ = env().getObjectLs(par().solver); Ls_ = env().getObjectLs(par().solver);
auto &vvector = envGet(std::vector<FermionField>, par().vector); auto &vvector = envGet(std::vector<FermionField>, par().vector);
@ -134,7 +136,7 @@ void TA2AAslashVector<FImpl>::setup(void)
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
template <typename FImpl> template <typename FImpl>
void TA2AAslashVector<FImpl>::execute(void) void TA2AAslashVectors<FImpl>::execute(void)
{ {
auto &solver = envGet(Solver, par().solver); auto &solver = envGet(Solver, par().solver);
auto &stoch_photon = envGet(EmField, par().emField); auto &stoch_photon = envGet(EmField, par().emField);
@ -148,13 +150,9 @@ void TA2AAslashVector<FImpl>::execute(void)
Complex ci(0.0,1.0); Complex ci(0.0,1.0);
startTimer("Seq Aslash"); startTimer("Seq Aslash");
LOG(Message) << "Calculate Sequential propagator on Aslash * v with the A2A vector "
LOG(Message) << "Calculate Sequential propagator on Aslash * v with the A2A vector " << par().vector << par().vector << " and the photon field " << par().emField << std::endl;
<< " and the photon field " << par().emField << std::endl;
for(unsigned int i=0; i<Nmodes; i++) for(unsigned int i=0; i<Nmodes; i++)
{ {
v4dtmp = zero; v4dtmp = zero;
@ -166,6 +164,7 @@ void TA2AAslashVector<FImpl>::execute(void)
} }
stopTimer("Multiply Aslash"); stopTimer("Multiply Aslash");
startTimer("Inversion");
if (Ls_ == 1) if (Ls_ == 1)
{ {
solver(Aslashv[i], v4dtmp); solver(Aslashv[i], v4dtmp);
@ -177,13 +176,19 @@ void TA2AAslashVector<FImpl>::execute(void)
mat.ExportPhysicalFermionSolution(v5dtmp_sol, v4dtmp); mat.ExportPhysicalFermionSolution(v5dtmp_sol, v4dtmp);
Aslashv[i] = v4dtmp; Aslashv[i] = v4dtmp;
} }
stopTimer("Inversion");
} }
stopTimer("Seq Aslash"); stopTimer("Seq Aslash");
if (!par().output.empty())
{
startTimer("I/O");
A2AVectorsIo::write(par().output, Aslashv, par().multiFile, vm().getTrajectory());
stopTimer("I/O");
}
} }
END_MODULE_NAMESPACE END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_MSolver_A2AAslashVector_hpp_ #endif // Hadrons_MSolver_A2AAslashVectors_hpp_

View File

@ -0,0 +1,454 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/Utilities/Contractor.cc
Copyright (C) 2015-2018
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Hadrons/Global.hpp>
#include <Hadrons/A2AMatrix.hpp>
#include <Hadrons/DiskVector.hpp>
#include <Hadrons/TimerArray.hpp>
using namespace Grid;
using namespace QCD;
using namespace Hadrons;
#define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
namespace Contractor
{
class TrajRange: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
unsigned int, start,
unsigned int, end,
unsigned int, step);
};
class GlobalPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
TrajRange, trajCounter,
unsigned int, nt,
std::string, diskVectorDir,
std::string, output);
};
class A2AMatrixPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
std::string, file,
std::string, dataset,
unsigned int, cacheSize,
std::string, name);
};
class ProductPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
std::string, terms,
std::vector<std::string>, times,
std::string, translations,
bool, translationAverage);
};
class CorrelatorResult: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
std::vector<Contractor::A2AMatrixPar>, a2aMatrix,
ProductPar, contraction,
std::vector<unsigned int>, times,
std::vector<ComplexD>, correlator);
};
}
struct ContractorPar
{
Contractor::GlobalPar global;
std::vector<Contractor::A2AMatrixPar> a2aMatrix;
std::vector<Contractor::ProductPar> product;
};
void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
const std::vector<std::set<unsigned int>> &times,
std::vector<unsigned int> &current,
const unsigned int depth)
{
if (depth > 0)
{
for (auto t: times[times.size() - depth])
{
current[times.size() - depth] = t;
makeTimeSeq(timeSeq, times, current, depth - 1);
}
}
else
{
timeSeq.push_back(current);
}
}
void makeTimeSeq(std::vector<std::vector<unsigned int>> &timeSeq,
const std::vector<std::set<unsigned int>> &times)
{
std::vector<unsigned int> current(times.size());
makeTimeSeq(timeSeq, times, current, times.size());
}
void saveCorrelator(const Contractor::CorrelatorResult &result, const std::string dir,
const unsigned int dt, const unsigned int traj)
{
std::string fileStem = "", filename;
std::vector<std::string> terms = strToVec<std::string>(result.contraction.terms);
for (unsigned int i = 0; i < terms.size() - 1; i++)
{
fileStem += terms[i] + "_" + std::to_string(result.times[i]) + "_";
}
fileStem += terms.back();
if (!result.contraction.translationAverage)
{
fileStem += "_dt_" + std::to_string(dt);
}
filename = dir + "/" + RESULT_FILE_NAME(fileStem, traj);
std::cout << "Saving correlator to '" << filename << "'" << std::endl;
makeFileDir(dir);
ResultWriter writer(filename);
write(writer, fileStem, result);
}
std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
{
std::regex rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))");
std::smatch sm;
std::vector<std::string> rstr = strToVec<std::string>(str);
std::set<unsigned int> tSet;
for (auto &s: rstr)
{
std::regex_match(s, sm, rex);
if (sm[1].matched)
{
unsigned int t;
t = std::stoi(sm[1].str());
if (t >= nt)
{
HADRONS_ERROR(Range, "time out of range (from expression '" + str + "')");
}
tSet.insert(t);
}
else if (sm[2].matched)
{
unsigned int ta, tb;
ta = std::stoi(sm[3].str());
tb = std::stoi(sm[4].str());
if ((ta >= nt) or (tb >= nt))
{
HADRONS_ERROR(Range, "time out of range (from expression '" + str + "')");
}
for (unsigned int ti = ta; ti <= tb; ++ti)
{
tSet.insert(ti);
}
}
}
return tSet;
}
struct Sec
{
Sec(const double usec)
{
seconds = usec/1.0e6;
}
double seconds;
};
inline std::ostream & operator<< (std::ostream& s, const Sec &&sec)
{
s << std::setw(10) << sec.seconds << " sec";
return s;
}
struct Flops
{
Flops(const double flops, const double fusec)
{
gFlopsPerSec = flops/fusec/1.0e3;
}
double gFlopsPerSec;
};
inline std::ostream & operator<< (std::ostream& s, const Flops &&f)
{
s << std::setw(10) << f.gFlopsPerSec << " GFlop/s";
return s;
}
struct Bytes
{
Bytes(const double bytes, const double busec)
{
gBytesPerSec = bytes/busec*1.0e6/1024/1024/1024;
}
double gBytesPerSec;
};
inline std::ostream & operator<< (std::ostream& s, const Bytes &&b)
{
s << std::setw(10) << b.gBytesPerSec << " GB/s";
return s;
}
int main(int argc, char* argv[])
{
// parse command line
std::string parFilename;
if (argc != 2)
{
std::cerr << "usage: " << argv[0] << " <parameter file>";
std::cerr << std::endl;
return EXIT_FAILURE;
}
parFilename = argv[1];
// parse parameter file
ContractorPar par;
unsigned int nMat, nCont;
XmlReader reader(parFilename);
read(reader, "global", par.global);
read(reader, "a2aMatrix", par.a2aMatrix);
read(reader, "product", par.product);
nMat = par.a2aMatrix.size();
nCont = par.product.size();
// create diskvectors
std::map<std::string, EigenDiskVector<ComplexD>> a2aMat;
unsigned int cacheSize;
for (auto &p: par.a2aMatrix)
{
std::string dirName = par.global.diskVectorDir + "/" + p.name;
a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
}
// trajectory loop
for (unsigned int traj = par.global.trajCounter.start;
traj < par.global.trajCounter.end; traj += par.global.trajCounter.step)
{
std::cout << ":::::::: Trajectory " << traj << std::endl;
// load data
for (auto &p: par.a2aMatrix)
{
std::string filename = p.file;
double t, size;
tokenReplace(filename, "traj", traj);
std::cout << "======== Loading '" << filename << "'" << std::endl;
A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
a2aIo.load(a2aMat.at(p.name), &t);
std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6
<< " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
}
// contract
EigenDiskVector<ComplexD>::Matrix buf;
for (auto &p: par.product)
{
std::vector<std::string> term = strToVec<std::string>(p.terms);
std::vector<std::set<unsigned int>> times;
std::vector<std::vector<unsigned int>> timeSeq;
std::set<unsigned int> translations;
std::vector<A2AMatrixTr<ComplexD>> lastTerm(par.global.nt);
A2AMatrix<ComplexD> prod, buf, tmp;
TimerArray tAr;
double fusec, busec, flops, bytes, tusec;
Contractor::CorrelatorResult result;
tAr.startTimer("Total");
std::cout << "======== Contraction tr(";
for (unsigned int g = 0; g < term.size(); ++g)
{
std::cout << term[g] << ((g == term.size() - 1) ? ')' : '*');
}
std::cout << std::endl;
if (term.size() != p.times.size() + 1)
{
HADRONS_ERROR(Size, "number of terms (" + std::to_string(term.size())
+ ") different from number of times ("
+ std::to_string(p.times.size() + 1) + ")");
}
for (auto &s: p.times)
{
times.push_back(parseTimeRange(s, par.global.nt));
}
for (auto &m: par.a2aMatrix)
{
if (std::find(result.a2aMatrix.begin(), result.a2aMatrix.end(), m) == result.a2aMatrix.end())
{
result.a2aMatrix.push_back(m);
tokenReplace(result.a2aMatrix.back().file, "traj", traj);
}
}
result.contraction = p;
result.correlator.resize(par.global.nt, 0.);
translations = parseTimeRange(p.translations, par.global.nt);
makeTimeSeq(timeSeq, times);
std::cout << timeSeq.size()*translations.size()*(term.size() - 2) << " A*B, "
<< timeSeq.size()*translations.size()*par.global.nt << " tr(A*B)"
<< std::endl;
std::cout << "* Caching transposed last term" << std::endl;
for (unsigned int t = 0; t < par.global.nt; ++t)
{
tAr.startTimer("Disk vector overhead");
const A2AMatrix<ComplexD> &ref = a2aMat.at(term.back())[t];
tAr.stopTimer("Disk vector overhead");
tAr.startTimer("Transpose caching");
lastTerm[t].resize(ref.rows(), ref.cols());
parallel_for (unsigned int j = 0; j < ref.cols(); ++j)
for (unsigned int i = 0; i < ref.rows(); ++i)
{
lastTerm[t](i, j) = ref(i, j);
}
tAr.stopTimer("Transpose caching");
}
bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "
<< Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
for (unsigned int i = 0; i < timeSeq.size(); ++i)
{
unsigned int dti = 0;
auto &t = timeSeq[i];
result.times = t;
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
result.correlator[tLast] = 0.;
}
for (auto &dt: translations)
{
std::cout << "* Step " << i*translations.size() + dti + 1
<< "/" << timeSeq.size()*translations.size()
<< " -- positions= " << t << ", dt= " << dt << std::endl;
if (term.size() > 2)
{
std::cout << std::setw(8) << "products";
}
flops = 0.;
bytes = 0.;
fusec = tAr.getDTimer("A*B algebra");
busec = tAr.getDTimer("A*B total");
tAr.startTimer("Linear algebra");
tAr.startTimer("Disk vector overhead");
prod = a2aMat.at(term[0])[TIME_MOD(t[0] + dt)];
tAr.stopTimer("Disk vector overhead");
for (unsigned int j = 1; j < term.size() - 1; ++j)
{
tAr.startTimer("Disk vector overhead");
const A2AMatrix<ComplexD> &ref = a2aMat.at(term[j])[TIME_MOD(t[j] + dt)];
tAr.stopTimer("Disk vector overhead");
tAr.startTimer("A*B total");
tAr.startTimer("A*B algebra");
A2AContraction::mul(tmp, prod, ref);
tAr.stopTimer("A*B algebra");
flops += A2AContraction::mulFlops(prod, ref);
prod = tmp;
tAr.stopTimer("A*B total");
bytes += 3.*tmp.rows()*tmp.cols()*sizeof(ComplexD);
}
if (term.size() > 2)
{
std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
<< Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " "
<< Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
}
std::cout << std::setw(8) << "traces";
flops = 0.;
bytes = 0.;
fusec = tAr.getDTimer("tr(A*B)");
busec = tAr.getDTimer("tr(A*B)");
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
tAr.startTimer("tr(A*B)");
A2AContraction::accTrMul(result.correlator[TIME_MOD(tLast - dt)], prod, lastTerm[tLast]);
tAr.stopTimer("tr(A*B)");
flops += A2AContraction::accTrMulFlops(prod, lastTerm[tLast]);
bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
}
tAr.stopTimer("Linear algebra");
std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
<< Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " "
<< Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
if (!p.translationAverage)
{
saveCorrelator(result, par.global.output, dt, traj);
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
result.correlator[tLast] = 0.;
}
}
dti++;
}
if (p.translationAverage)
{
for (unsigned int tLast = 0; tLast < par.global.nt; ++tLast)
{
result.correlator[tLast] /= translations.size();
}
saveCorrelator(result, par.global.output, 0, traj);
}
}
tAr.stopTimer("Total");
printTimeProfile(tAr.getTimings(), tAr.getTimer("Total"));
}
}
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,12 @@
#ifndef Hadrons_Contractor_hpp_
#define Hadrons_Contractor_hpp_
#include <Hadrons/Global.hpp>
BEGIN_HADRONS_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Contractor_hpp_

View File

@ -0,0 +1,434 @@
#include <Hadrons/Global.hpp>
#include <Hadrons/A2AMatrix.hpp>
#ifdef USE_MKL
#include "mkl.h"
#include "mkl_cblas.h"
#endif
using namespace Grid;
using namespace Hadrons;
#ifdef GRID_COMMS_MPI3
#define GET_RANK(rank, nMpi) \
MPI_Comm_size(MPI_COMM_WORLD, &(nMpi));\
MPI_Comm_rank(MPI_COMM_WORLD, &(rank))
#define BARRIER() MPI_Barrier(MPI_COMM_WORLD)
#define INIT() MPI_Init(NULL, NULL)
#define FINALIZE() MPI_Finalize()
#else
#define GET_RANK(rank, nMpi) (nMpi) = 1; (rank) = 0
#define BARRIER()
#define INIT()
#define FINALIZE()
#endif
template <typename Function, typename MatLeft, typename MatRight>
inline void trBenchmark(const std::string name, const MatLeft &left,
const MatRight &right, const ComplexD ref, Function fn)
{
double t, flops, bytes, n = left[0].rows()*left[0].cols();
unsigned int nMat = left.size();
int nMpi, rank;
ComplexD buf;
t = 0.;
GET_RANK(rank, nMpi);
t = -usecond();
BARRIER();
for (unsigned int i = rank*nMat/nMpi; i < (rank+1)*nMat/nMpi; ++i)
{
fn(buf, left[i], right[i]);
}
BARRIER();
t += usecond();
flops = nMat*(6.*n + 2.*(n - 1.));
bytes = nMat*(2.*n*sizeof(ComplexD));
if (rank == 0)
{
std::cout << std::setw(34) << name << ": diff= "
<< std::setw(12) << std::norm(buf-ref)
<< std::setw(10) << t/1.0e6 << " sec "
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
<< std::endl;
}
::sleep(1);
}
template <typename Function, typename MatV, typename Mat>
inline void mulBenchmark(const std::string name, const MatV &left,
const MatV &right, const Mat &ref, Function fn)
{
double t, flops, bytes;
double nr = left[0].rows(), nc = left[0].cols(), n = nr*nc;
unsigned int nMat = left.size();
int nMpi, rank;
Mat buf(left[0].rows(), left[0].rows());
t = 0.;
GET_RANK(rank, nMpi);
t = -usecond();
BARRIER();
for (unsigned int i = rank*nMat/nMpi; i < (rank+1)*nMat/nMpi; ++i)
{
fn(buf, left[i], right[i]);
}
BARRIER();
t += usecond();
flops = nMat*(nr*nr*(6.*nc + 2.*(nc - 1.)));
bytes = nMat*(2*nc*nr*sizeof(ComplexD));
if (rank == 0)
{
std::cout << std::setw(34) << name << ": diff= "
<< std::setw(12) << (buf-ref).squaredNorm()
<< std::setw(10) << t/1.0e6 << " sec "
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
<< std::endl;
}
::sleep(1);
}
#ifdef USE_MKL
template <typename MatLeft, typename MatRight>
static inline void zdotuRow(ComplexD &res, const unsigned int aRow,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aRow*a.cols();
aInc = 1;
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aRow;
aInc = a.rows();
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aRow;
bInc = b.cols();
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aRow*b.rows();
bInc = 1;
}
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
}
template <typename MatLeft, typename MatRight>
static inline void zdotuCol(ComplexD &res, const unsigned int aCol,
const MatLeft &a, const MatRight &b)
{
const ComplexD *aPt, *bPt;
unsigned int aInc, bInc;
if (MatLeft::Options == Eigen::RowMajor)
{
aPt = a.data() + aCol;
aInc = a.cols();
}
else if (MatLeft::Options == Eigen::ColMajor)
{
aPt = a.data() + aCol*a.rows();
aInc = 1;
}
if (MatRight::Options == Eigen::RowMajor)
{
bPt = b.data() + aCol*b.cols();
bInc = 1;
}
else if (MatRight::Options == Eigen::ColMajor)
{
bPt = b.data() + aCol;
bInc = b.rows();
}
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
}
#endif
template <typename MatLeft, typename MatRight>
void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
{
std::vector<MatLeft> left;
std::vector<MatRight> right;
MatRight buf;
ComplexD ref;
int rank, nMpi;
left.resize(nMat, MatLeft::Random(ni, nj));
right.resize(nMat, MatRight::Random(nj, ni));
GET_RANK(rank, nMpi);
if (rank == 0)
{
std::cout << "==== tr(A*B) benchmarks" << std::endl;
std::cout << "A matrices use ";
if (MatLeft::Options == Eigen::RowMajor)
{
std::cout << "row-major ordering" << std::endl;
}
else if (MatLeft::Options == Eigen::ColMajor)
{
std::cout << "col-major ordering" << std::endl;
}
std::cout << "B matrices use ";
if (MatRight::Options == Eigen::RowMajor)
{
std::cout << "row-major ordering" << std::endl;
}
else if (MatRight::Options == Eigen::ColMajor)
{
std::cout << "col-major ordering" << std::endl;
}
std::cout << std::endl;
}
BARRIER();
ref = (left.back()*right.back()).trace();
trBenchmark("Hadrons A2AContraction::accTrMul", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
A2AContraction::accTrMul(res, a, b);
});
trBenchmark("Naive loop rows first", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
auto nr = a.rows(), nc = a.cols();
res = 0.;
parallel_for (unsigned int i = 0; i < nr; ++i)
{
ComplexD tmp = 0.;
for (unsigned int j = 0; j < nc; ++j)
{
tmp += a(i, j)*b(j, i);
}
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Naive loop cols first", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
auto nr = a.rows(), nc = a.cols();
res = 0.;
parallel_for (unsigned int j = 0; j < nc; ++j)
{
ComplexD tmp = 0.;
for (unsigned int i = 0; i < nr; ++i)
{
tmp += a(i, j)*b(j, i);
}
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Eigen tr(A*B)", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = (a*b).trace();
});
trBenchmark("Eigen row-wise dot", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
{
ComplexD tmp;
tmp = a.row(r).conjugate().dot(b.col(r));
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Eigen col-wise dot", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
{
ComplexD tmp;
tmp = a.col(c).conjugate().dot(b.row(c));
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("Eigen Hadamard", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = a.cwiseProduct(b.transpose()).sum();
});
#ifdef USE_MKL
trBenchmark("MKL row-wise zdotu", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int r = 0; r < a.rows(); ++r)
{
ComplexD tmp;
zdotuRow(tmp, r, a, b);
parallel_critical
{
res += tmp;
}
}
});
trBenchmark("MKL col-wise zdotu", left, right, ref,
[](ComplexD &res, const MatLeft &a, const MatRight &b)
{
res = 0.;
parallel_for (unsigned int c = 0; c < a.cols(); ++c)
{
ComplexD tmp;
zdotuCol(tmp, c, a, b);
parallel_critical
{
res += tmp;
}
}
});
#endif
BARRIER();
if (rank == 0)
{
std::cout << std::endl;
}
}
template <typename Mat>
void fullMulBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
{
std::vector<Mat> left, right;
Mat ref;
int rank, nMpi;
left.resize(nMat, Mat::Random(ni, nj));
right.resize(nMat, Mat::Random(nj, ni));
GET_RANK(rank, nMpi);
if (rank == 0)
{
std::cout << "==== A*B benchmarks" << std::endl;
std::cout << "all matrices use ";
if (Mat::Options == Eigen::RowMajor)
{
std::cout << "row-major ordering" << std::endl;
}
else if (Mat::Options == Eigen::ColMajor)
{
std::cout << "col-major ordering" << std::endl;
}
std::cout << std::endl;
}
BARRIER();
ref = left.back()*right.back();
mulBenchmark("Hadrons A2AContraction::mul", left, right, ref,
[](Mat &res, const Mat &a, const Mat &b)
{
A2AContraction::mul(res, a, b);
});
mulBenchmark("Eigen A*B", left, right, ref,
[](Mat &res, const Mat &a, const Mat &b)
{
res = a*b;
});
#ifdef USE_MKL
mulBenchmark("MKL A*B", left, right, ref,
[](Mat &res, const Mat &a, const Mat &b)
{
const ComplexD one(1., 0.), zero(0., 0.);
if (Mat::Options == Eigen::RowMajor)
{
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
res.data(), res.cols());
}
else if (Mat::Options == Eigen::ColMajor)
{
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
res.data(), res.rows());
}
});
#endif
BARRIER();
if (rank == 0)
{
std::cout << std::endl;
}
}
int main(int argc, char *argv[])
{
// parse command line
Eigen::Index ni, nj, nMat;
int nMpi, rank;
if (argc != 4)
{
std::cerr << "usage: " << argv[0] << " <Ni> <Nj> <#matrices>";
std::cerr << std::endl;
return EXIT_FAILURE;
}
ni = std::stoi(argv[1]);
nj = std::stoi(argv[2]);
nMat = std::stoi(argv[3]);
INIT();
GET_RANK(rank, nMpi);
if (rank == 0)
{
std::cout << "\n*** ALL-TO-ALL MATRIX CONTRACTION BENCHMARK ***\n" << std::endl;
std::cout << nMat << " couples of " << ni << "x" << nj << " matrices\n" << std::endl;
std::cout << nMpi << " MPI processes" << std::endl;
#ifdef GRID_OMP
#pragma omp parallel
{
#pragma omp single
std::cout << omp_get_num_threads() << " threads\n" << std::endl;
}
#else
std::cout << "Single-threaded\n" << std::endl;
#endif
#ifdef EIGEN_USE_MKL_ALL
std::cout << "Eigen uses the MKL" << std::endl;
#endif
std::cout << "Eigen uses " << Eigen::nbThreads() << " threads" << std::endl;
#ifdef USE_MKL
std::cout << "MKL uses " << mkl_get_max_threads() << " threads" << std::endl;
#endif
std::cout << std::endl;
}
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
fullMulBenchmark<A2AMatrix<ComplexD>>(ni, nj, nMat);
fullMulBenchmark<A2AMatrixTr<ComplexD>>(ni, nj, nMat);
FINALIZE();
return EXIT_SUCCESS;
}

View File

@ -1,4 +1,4 @@
bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32 bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32 HadronsContractor HadronsContractorBenchmark
HadronsXmlRun_SOURCES = HadronsXmlRun.cc HadronsXmlRun_SOURCES = HadronsXmlRun.cc
HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a
@ -6,3 +6,9 @@ HadronsXmlRun_LDADD = ../libHadrons.a ../../Grid/libGrid.a
HadronsFermionEP64To32_SOURCES = EigenPackCast.cc HadronsFermionEP64To32_SOURCES = EigenPackCast.cc
HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
HadronsFermionEP64To32_LDADD = ../libHadrons.a ../../Grid/libGrid.a HadronsFermionEP64To32_LDADD = ../libHadrons.a ../../Grid/libGrid.a
HadronsContractor_SOURCES = Contractor.cc
HadronsContractor_LDADD = ../libHadrons.a ../../Grid/libGrid.a
HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc
HadronsContractorBenchmark_LDADD = ../libHadrons.a ../../Grid/libGrid.a

View File

@ -20,7 +20,7 @@ modules_cc =\
Modules/MSink/Point.cc \ Modules/MSink/Point.cc \
Modules/MSink/Smear.cc \ Modules/MSink/Smear.cc \
Modules/MSolver/A2AVectors.cc \ Modules/MSolver/A2AVectors.cc \
Modules/MSolver/A2AAslashVector.cc \ Modules/MSolver/A2AAslashVectors.cc \
Modules/MSolver/RBPrecCG.cc \ Modules/MSolver/RBPrecCG.cc \
Modules/MSolver/MixedPrecisionRBPrecCG.cc \ Modules/MSolver/MixedPrecisionRBPrecCG.cc \
Modules/MSolver/LocalCoherenceLanczos.cc \ Modules/MSolver/LocalCoherenceLanczos.cc \
@ -97,7 +97,7 @@ modules_hpp =\
Modules/MSolver/Guesser.hpp \ Modules/MSolver/Guesser.hpp \
Modules/MSolver/RBPrecCG.hpp \ Modules/MSolver/RBPrecCG.hpp \
Modules/MSolver/A2AVectors.hpp \ Modules/MSolver/A2AVectors.hpp \
Modules/MSolver/A2AAslashVector.hpp \ Modules/MSolver/A2AAslashVectors.hpp \
Modules/MGauge/UnitEm.hpp \ Modules/MGauge/UnitEm.hpp \
Modules/MGauge/StoutSmearing.hpp \ Modules/MGauge/StoutSmearing.hpp \
Modules/MGauge/Unit.hpp \ Modules/MGauge/Unit.hpp \

View File

@ -123,10 +123,13 @@ case ${ac_SFW_FP16} in
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);; AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
esac esac
############### MKL ############### Intel libraries
AC_ARG_ENABLE([mkl], AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
[ac_MKL=${enable_mkl}], [ac_MKL=no]) [ac_MKL=${enable_mkl}], [ac_MKL=no])
AC_ARG_ENABLE([ipp],
[AC_HELP_STRING([--enable-ipp=yes|no|prefix], [enable Intel IPP for fast CRC32C])],
[ac_IPP=${enable_mkl}], [ac_IPP=no])
case ${ac_MKL} in case ${ac_MKL} in
no) no)
@ -139,6 +142,17 @@ case ${ac_MKL} in
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);; AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
esac esac
case ${ac_IPP} in
no)
;;
yes)
AC_DEFINE([USE_IPP], [1], [Define to 1 if you use the Intel IPP]);;
*)
AM_CXXFLAGS="-I$ac_IPP/include $AM_CXXFLAGS"
AM_LDFLAGS="-L$ac_IPP/lib $AM_LDFLAGS"
AC_DEFINE([USE_IPP], [1], [Define to 1 if you use the Intel IPP]);;
esac
############### HDF5 ############### HDF5
AC_ARG_WITH([hdf5], AC_ARG_WITH([hdf5],
[AS_HELP_STRING([--with-hdf5=prefix], [AS_HELP_STRING([--with-hdf5=prefix],
@ -170,7 +184,13 @@ AC_CHECK_FUNCS([gettimeofday])
if test "${ac_MKL}x" != "nox"; then if test "${ac_MKL}x" != "nox"; then
AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [], AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [],
[AC_MSG_ERROR("MKL enabled but library not found")]) [AC_MSG_ERROR("Intel MKL enabled but library not found")])
fi
if test "${ac_IPP}x" != "nox"; then
AC_SEARCH_LIBS([ippsCRC32C_8u], [ippdc],
[LIBS="${LIBS} -lippdc -lippvm -lipps -lippcore"],
[AC_MSG_ERROR("Intel IPP enabled but library not found")])
fi fi
AC_SEARCH_LIBS([__gmpf_init], [gmp], AC_SEARCH_LIBS([__gmpf_init], [gmp],