mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Hadrons: fast A2A matrix contraction kernels
This commit is contained in:
parent
9734e3ee58
commit
88d9922e4f
@ -39,12 +39,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
|
|
||||||
// complex reductions
|
// complex reductions
|
||||||
#pragma omp declare reduction(ComplexPlus: Grid::Complex: omp_out += omp_in)
|
#pragma omp declare reduction(ComplexPlus:Grid::ComplexD, Grid::vComplexD, Grid::ComplexF, Grid::vComplexF: omp_out += omp_in)
|
||||||
#pragma omp declare reduction(GridVComplexPlus: Grid::vComplex: omp_out += omp_in)
|
|
||||||
#pragma omp declare reduction(ComplexDPlus: Grid::ComplexD: omp_out += omp_in)
|
|
||||||
#pragma omp declare reduction(GridVComplexDPlus: Grid::vComplexD: omp_out += omp_in)
|
|
||||||
#pragma omp declare reduction(ComplexFPlus: Grid::ComplexF: omp_out += omp_in)
|
|
||||||
#pragma omp declare reduction(GridVComplexFPlus: Grid::vComplexF: omp_out += omp_in)
|
|
||||||
|
|
||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||||
|
@ -32,6 +32,10 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
#include <Hadrons/Global.hpp>
|
#include <Hadrons/Global.hpp>
|
||||||
#include <Hadrons/TimerArray.hpp>
|
#include <Hadrons/TimerArray.hpp>
|
||||||
#include <Grid/Eigen/unsupported/CXX11/Tensor>
|
#include <Grid/Eigen/unsupported/CXX11/Tensor>
|
||||||
|
#ifdef USE_MKL
|
||||||
|
#include "mkl.h"
|
||||||
|
#include "mkl_cblas.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef HADRONS_A2AM_NAME
|
#ifndef HADRONS_A2AM_NAME
|
||||||
#define HADRONS_A2AM_NAME "a2aMatrix"
|
#define HADRONS_A2AM_NAME "a2aMatrix"
|
||||||
@ -58,6 +62,9 @@ using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
|
using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
* Abstract class for A2A kernels *
|
* Abstract class for A2A kernels *
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
@ -150,6 +157,198 @@ private:
|
|||||||
std::vector<IoHelper> nodeIo_;
|
std::vector<IoHelper> nodeIo_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
* A2A matrix contraction kernels *
|
||||||
|
******************************************************************************/
|
||||||
|
class A2AContraction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
// accTrMul(acc, a, b): acc += tr(a*b)
|
||||||
|
template <typename C, typename MatLeft, typename MatRight>
|
||||||
|
static inline void accTrMul(C &acc, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
if ((MatLeft::Options == Eigen::RowMajor) and
|
||||||
|
(MatRight::Options == Eigen::ColMajor))
|
||||||
|
{
|
||||||
|
parallel_for_reduce(ComplexPlus, acc) (unsigned int r = 0; r < a.rows(); ++r)
|
||||||
|
{
|
||||||
|
#ifdef USE_MKL
|
||||||
|
ComplexD tmp;
|
||||||
|
dotuRow(tmp, r, a, b);
|
||||||
|
acc += tmp;
|
||||||
|
#else
|
||||||
|
acc += a.row(r).conjugate().dot(b.col(r));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parallel_for_reduce(ComplexPlus, acc) (unsigned int c = 0; c < a.cols(); ++c)
|
||||||
|
{
|
||||||
|
#ifdef USE_MKL
|
||||||
|
ComplexD tmp;
|
||||||
|
dotuCol(tmp, c, a, b);
|
||||||
|
acc += tmp;
|
||||||
|
#else
|
||||||
|
acc += a.col(c).conjugate().dot(b.row(c));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// mul(res, a, b): res = a*b
|
||||||
|
#ifdef USE_MKL
|
||||||
|
template <template <class, int...> class Mat, int... Opts>
|
||||||
|
static inline void mul(Mat<ComplexD, Opts...> &res,
|
||||||
|
const Mat<ComplexD, Opts...> &a,
|
||||||
|
const Mat<ComplexD, Opts...> &b)
|
||||||
|
{
|
||||||
|
static const ComplexD one(1., 0.), zero(0., 0.);
|
||||||
|
|
||||||
|
if (Mat<ComplexD, Opts...>::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
|
||||||
|
res.data(), res.cols());
|
||||||
|
}
|
||||||
|
else if (Mat<ComplexD, Opts...>::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
|
||||||
|
res.data(), res.rows());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <template <class, int...> class Mat, int... Opts>
|
||||||
|
static inline void mul(Mat<ComplexF, Opts...> &res,
|
||||||
|
const Mat<ComplexF, Opts...> &a,
|
||||||
|
const Mat<ComplexF, Opts...> &b)
|
||||||
|
{
|
||||||
|
static const ComplexF one(1., 0.), zero(0., 0.);
|
||||||
|
|
||||||
|
if (Mat<ComplexF, Opts...>::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.cols(), b.data(), b.cols(), &zero,
|
||||||
|
res.data(), res.cols());
|
||||||
|
}
|
||||||
|
else if (Mat<ComplexF, Opts...>::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, a.rows(), b.cols(),
|
||||||
|
a.cols(), &one, a.data(), a.rows(), b.data(), b.rows(), &zero,
|
||||||
|
res.data(), res.rows());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <typename Mat>
|
||||||
|
static inline void mul(Mat &res, const Mat &a, const Mat &b)
|
||||||
|
{
|
||||||
|
res = a*b;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
private:
|
||||||
|
template <typename C, typename MatLeft, typename MatRight>
|
||||||
|
static inline void makeDotRowPt(C * &aPt, unsigned int &aInc, C * &bPt,
|
||||||
|
unsigned int &bInc, const unsigned int aRow,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
if (MatLeft::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aRow*a.cols();
|
||||||
|
aInc = 1;
|
||||||
|
}
|
||||||
|
else if (MatLeft::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aRow;
|
||||||
|
aInc = a.rows();
|
||||||
|
}
|
||||||
|
if (MatRight::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aRow;
|
||||||
|
bInc = b.cols();
|
||||||
|
}
|
||||||
|
else if (MatRight::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aRow*b.rows();
|
||||||
|
bInc = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_MKL
|
||||||
|
template <typename C, typename MatLeft, typename MatRight>
|
||||||
|
static inline void makeDotColPt(C * &aPt, unsigned int &aInc, C * &bPt,
|
||||||
|
unsigned int &bInc, const unsigned int aCol,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
if (MatLeft::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aCol;
|
||||||
|
aInc = a.cols();
|
||||||
|
}
|
||||||
|
else if (MatLeft::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
aPt = a.data() + aCol*a.rows();
|
||||||
|
aInc = 1;
|
||||||
|
}
|
||||||
|
if (MatRight::Options == Eigen::RowMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aCol*b.cols();
|
||||||
|
bInc = 1;
|
||||||
|
}
|
||||||
|
else if (MatRight::Options == Eigen::ColMajor)
|
||||||
|
{
|
||||||
|
bPt = b.data() + aCol;
|
||||||
|
bInc = b.rows();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuRow(ComplexF &res, const unsigned int aRow,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexF *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
|
||||||
|
cblas_cdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuCol(ComplexF &res, const unsigned int aCol,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexF *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
|
||||||
|
cblas_cdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuRow(ComplexD &res, const unsigned int aRow,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexD *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotRowPt(aPt, aInc, bPt, bInc, aRow, a, b);
|
||||||
|
cblas_zdotu_sub(a.cols(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename MatLeft, typename MatRight>
|
||||||
|
static inline void dotuCol(ComplexD &res, const unsigned int aCol,
|
||||||
|
const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
const ComplexD *aPt, *bPt;
|
||||||
|
unsigned int aInc, bInc;
|
||||||
|
|
||||||
|
makeDotColPt(aPt, aInc, bPt, bInc, aCol, a, b);
|
||||||
|
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
* A2AMatrixIo template implementation *
|
* A2AMatrixIo template implementation *
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
@ -1,16 +1,12 @@
|
|||||||
#include <Hadrons/Global.hpp>
|
#include <Hadrons/Global.hpp>
|
||||||
|
#include <Hadrons/A2AMatrix.hpp>
|
||||||
#ifdef USE_MKL
|
#ifdef USE_MKL
|
||||||
#include "mkl.h"
|
#include "mkl.h"
|
||||||
#include "mkl_cblas.h"
|
#include "mkl_cblas.h"
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX2
|
|
||||||
#include <immintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
using namespace Hadrons;
|
||||||
typedef Eigen::Matrix<ComplexD, -1, -1, Eigen::RowMajor> RMCMat;
|
|
||||||
typedef Eigen::Matrix<ComplexD, -1, -1, Eigen::ColMajor> CMCMat;
|
|
||||||
|
|
||||||
#ifdef GRID_COMMS_MPI3
|
#ifdef GRID_COMMS_MPI3
|
||||||
#define GET_RANK(rank, nMpi) \
|
#define GET_RANK(rank, nMpi) \
|
||||||
@ -50,7 +46,7 @@ inline void trBenchmark(const std::string name, const MatLeft &left,
|
|||||||
|
|
||||||
if (rank == 0)
|
if (rank == 0)
|
||||||
{
|
{
|
||||||
std::cout << std::setw(30) << name << ": diff= "
|
std::cout << std::setw(34) << name << ": diff= "
|
||||||
<< std::setw(12) << std::norm(buf-ref)
|
<< std::setw(12) << std::norm(buf-ref)
|
||||||
<< std::setw(10) << t/1.0e6 << " sec "
|
<< std::setw(10) << t/1.0e6 << " sec "
|
||||||
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
|
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
|
||||||
@ -85,16 +81,17 @@ inline void mulBenchmark(const std::string name, const MatV &left,
|
|||||||
|
|
||||||
if (rank == 0)
|
if (rank == 0)
|
||||||
{
|
{
|
||||||
std::cout << std::setw(30) << name << ": diff= "
|
std::cout << std::setw(34) << name << ": diff= "
|
||||||
<< std::setw(12) << (buf-ref).squaredNorm()
|
<< std::setw(12) << (buf-ref).squaredNorm()
|
||||||
<< std::setw(10) << t/1.0e6 << " sec "
|
<< std::setw(10) << t/1.0e6 << " sec "
|
||||||
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
|
<< std::setw(10) << flops/t/1.0e3 << " GFlop/s "
|
||||||
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
|
<< std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s "
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
::sleep(1);
|
::sleep(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_MKL
|
||||||
template <typename MatLeft, typename MatRight>
|
template <typename MatLeft, typename MatRight>
|
||||||
static inline void zdotuRow(ComplexD &res, const unsigned int aRow,
|
static inline void zdotuRow(ComplexD &res, const unsigned int aRow,
|
||||||
const MatLeft &a, const MatRight &b)
|
const MatLeft &a, const MatRight &b)
|
||||||
@ -154,6 +151,7 @@ static inline void zdotuCol(ComplexD &res, const unsigned int aCol,
|
|||||||
}
|
}
|
||||||
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
|
cblas_zdotu_sub(a.rows(), aPt, aInc, bPt, bInc, &res);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template <typename MatLeft, typename MatRight>
|
template <typename MatLeft, typename MatRight>
|
||||||
void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
|
void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigned int nMat)
|
||||||
@ -192,12 +190,18 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
}
|
}
|
||||||
BARRIER();
|
BARRIER();
|
||||||
ref = (left.back()*right.back()).trace();
|
ref = (left.back()*right.back()).trace();
|
||||||
|
trBenchmark("Hadrons A2AContraction::accTrMul", left, right, ref,
|
||||||
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
|
{
|
||||||
|
res = 0.;
|
||||||
|
A2AContraction::accTrMul(res, a, b);
|
||||||
|
});
|
||||||
trBenchmark("Naive loop rows first", left, right, ref,
|
trBenchmark("Naive loop rows first", left, right, ref,
|
||||||
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
[](ComplexD &res, const MatLeft &a, const MatRight &b)
|
||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
auto nr = a.rows(), nc = a.cols();
|
auto nr = a.rows(), nc = a.cols();
|
||||||
parallel_for_reduce(ComplexDPlus, res) (unsigned int i = 0; i < nr; ++i)
|
parallel_for_reduce(ComplexPlus, res) (unsigned int i = 0; i < nr; ++i)
|
||||||
for (unsigned int j = 0; j < nc; ++j)
|
for (unsigned int j = 0; j < nc; ++j)
|
||||||
{
|
{
|
||||||
res += a(i, j)*b(j, i);
|
res += a(i, j)*b(j, i);
|
||||||
@ -208,7 +212,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
auto nr = a.rows(), nc = a.cols();
|
auto nr = a.rows(), nc = a.cols();
|
||||||
parallel_for_reduce(ComplexDPlus, res) (unsigned int j = 0; j < nc; ++j)
|
parallel_for_reduce(ComplexPlus, res) (unsigned int j = 0; j < nc; ++j)
|
||||||
for (unsigned int i = 0; i < nr; ++i)
|
for (unsigned int i = 0; i < nr; ++i)
|
||||||
{
|
{
|
||||||
res += a(i, j)*b(j, i);
|
res += a(i, j)*b(j, i);
|
||||||
@ -224,7 +228,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
|
|
||||||
parallel_for_reduce(ComplexDPlus, res) (unsigned int r = 0; r < a.rows(); ++r)
|
parallel_for_reduce(ComplexPlus, res) (unsigned int r = 0; r < a.rows(); ++r)
|
||||||
{
|
{
|
||||||
res += a.row(r).conjugate().dot(b.col(r));
|
res += a.row(r).conjugate().dot(b.col(r));
|
||||||
}
|
}
|
||||||
@ -234,7 +238,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
|
|
||||||
parallel_for_reduce(ComplexDPlus, res) (unsigned int c = 0; c < a.cols(); ++c)
|
parallel_for_reduce(ComplexPlus, res) (unsigned int c = 0; c < a.cols(); ++c)
|
||||||
{
|
{
|
||||||
res += a.col(c).conjugate().dot(b.row(c));
|
res += a.col(c).conjugate().dot(b.row(c));
|
||||||
}
|
}
|
||||||
@ -250,7 +254,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
|
|
||||||
parallel_for_reduce(ComplexDPlus, res) (unsigned int r = 0; r < a.rows(); ++r)
|
parallel_for_reduce(ComplexPlus, res) (unsigned int r = 0; r < a.rows(); ++r)
|
||||||
{
|
{
|
||||||
ComplexD tmp;
|
ComplexD tmp;
|
||||||
|
|
||||||
@ -263,7 +267,7 @@ void fullTrBenchmark(const unsigned int ni, const unsigned int nj, const unsigne
|
|||||||
{
|
{
|
||||||
res = 0.;
|
res = 0.;
|
||||||
|
|
||||||
parallel_for_reduce(ComplexDPlus, res) (unsigned int c = 0; c < a.cols(); ++c)
|
parallel_for_reduce(ComplexPlus, res) (unsigned int c = 0; c < a.cols(); ++c)
|
||||||
{
|
{
|
||||||
ComplexD tmp;
|
ComplexD tmp;
|
||||||
|
|
||||||
@ -305,6 +309,11 @@ void fullMulBenchmark(const unsigned int ni, const unsigned int nj, const unsign
|
|||||||
}
|
}
|
||||||
BARRIER();
|
BARRIER();
|
||||||
ref = left.back()*right.back();
|
ref = left.back()*right.back();
|
||||||
|
mulBenchmark("Hadrons A2AContraction::mul", left, right, ref,
|
||||||
|
[](Mat &res, const Mat &a, const Mat &b)
|
||||||
|
{
|
||||||
|
A2AContraction::mul(res, a, b);
|
||||||
|
});
|
||||||
mulBenchmark("Eigen A*B", left, right, ref,
|
mulBenchmark("Eigen A*B", left, right, ref,
|
||||||
[](Mat &res, const Mat &a, const Mat &b)
|
[](Mat &res, const Mat &a, const Mat &b)
|
||||||
{
|
{
|
||||||
@ -381,12 +390,12 @@ int main(int argc, char *argv[])
|
|||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
fullTrBenchmark<RMCMat, RMCMat>(ni, nj, nMat);
|
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
|
||||||
fullTrBenchmark<RMCMat, CMCMat>(ni, nj, nMat);
|
fullTrBenchmark<A2AMatrix<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
|
||||||
fullTrBenchmark<CMCMat, RMCMat>(ni, nj, nMat);
|
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrix<ComplexD>>(ni, nj, nMat);
|
||||||
fullTrBenchmark<CMCMat, CMCMat>(ni, nj, nMat);
|
fullTrBenchmark<A2AMatrixTr<ComplexD>, A2AMatrixTr<ComplexD>>(ni, nj, nMat);
|
||||||
fullMulBenchmark<RMCMat>(ni, nj, nMat);
|
fullMulBenchmark<A2AMatrix<ComplexD>>(ni, nj, nMat);
|
||||||
fullMulBenchmark<CMCMat>(ni, nj, nMat);
|
fullMulBenchmark<A2AMatrixTr<ComplexD>>(ni, nj, nMat);
|
||||||
FINALIZE();
|
FINALIZE();
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user