1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-22 17:52:02 +01:00

Merge branch 'develop' into feature/distil

* develop: (27 commits)
  Update README.md
  result layout standardised, iterator size more elegant
  updated syntac in Test_hadrons_spectrum
  chroma-regression test now prints difference correctly
  baryon input strings are now pairs of pairs of gammas - still ugly!!
  second update to pull request
  Changing back interface for Gamma3pt
  Removing old debug code
  Changes to A2Autils
  suggested changes for 1st pull request implemented
  changed input parameters for easier use
  Should compile everywhere now
  changed baryon interface
  added author information
  ready for pull request
  code compiling now - still need to test
  Baryons module works in 1 of 3 cases - still need SlicedProp and Msource part!!
  thread_for caused the problems - slow for loop for now
  still bugfix
  weird bug...
  ...

# Conflicts:
#	Hadrons/Modules.hpp
#	Hadrons/modules.inc
This commit is contained in:
Michael Marshall
2019-10-30 14:13:00 +00:00
16 changed files with 1886 additions and 483 deletions

View File

@ -76,8 +76,21 @@ public:
const std::vector<ComplexField> &emB1,
int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);
static void ContractWWVV(std::vector<PropagatorField> &WWVV,
const Eigen::Tensor<ComplexD,3> &WW_sd,
template <typename TensorType>
typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
static ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd);
template <typename TensorType>
typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
static ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd);
@ -107,6 +120,11 @@ public:
const FermionField *vd,
int orthogdim);
#endif
private:
inline static void OuterProductWWVV(PropagatorField &WWVV,
const vobj &lhs,
const vobj &rhs,
const int Ns, const int ss);
};
template<class FImpl>
@ -1375,9 +1393,13 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
// Take WW_sd v^dag_d (x) v_s
//
template<class FImpl>
void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
const Eigen::Tensor<ComplexD,3> &WW_sd,
template <class FImpl>
template <typename TensorType>
typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd)
{
@ -1399,39 +1421,100 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
for(int t=0;t<N_t;t++){
for(int s=0;s<N_s;s++){
auto vs_v = vs[s].View();
auto tmp1 = vs_v[ss];
vobj tmp2 = Zero();
vobj tmp3 = Zero();
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
auto vd_v = vd[d].View();
Scalar_v coeff = WW_sd(t,s,d);
tmp3 = conjugate(vd_v[ss]);
mac(&tmp2, &coeff, &tmp3);
}
auto vs_v = vs[s].View();
auto tmp1 = vs_v[ss];
vobj tmp2 = Zero();
vobj tmp3 = Zero();
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
auto vd_v = vd[d].View();
Scalar_v coeff = WW_sd(t,s,d);
tmp3 = conjugate(vd_v[ss]);
mac(&tmp2, &coeff, &tmp3);
}
//////////////////////////
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
//////////////////////////
auto WWVV_v = WWVV[t].View();
for(int s1=0;s1<Ns;s1++){
for(int s2=0;s2<Ns;s2++){
WWVV_v[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0);
WWVV_v[ss]()(s1,s2)(0,1) += tmp1()(s1)(0)*tmp2()(s2)(1);
WWVV_v[ss]()(s1,s2)(0,2) += tmp1()(s1)(0)*tmp2()(s2)(2);
WWVV_v[ss]()(s1,s2)(1,0) += tmp1()(s1)(1)*tmp2()(s2)(0);
WWVV_v[ss]()(s1,s2)(1,1) += tmp1()(s1)(1)*tmp2()(s2)(1);
WWVV_v[ss]()(s1,s2)(1,2) += tmp1()(s1)(1)*tmp2()(s2)(2);
WWVV_v[ss]()(s1,s2)(2,0) += tmp1()(s1)(2)*tmp2()(s2)(0);
WWVV_v[ss]()(s1,s2)(2,1) += tmp1()(s1)(2)*tmp2()(s2)(1);
WWVV_v[ss]()(s1,s2)(2,2) += tmp1()(s1)(2)*tmp2()(s2)(2);
}}
//////////////////////////
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
//////////////////////////
OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);
}}
}
});
}
template <class FImpl>
template <typename TensorType>
typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD, 3>, TensorType>::value ||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
void>::type
A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
const TensorType &WW_sd,
const FermionField *vs,
const FermionField *vd)
{
GridBase *grid = vs[0].Grid();
int nd = grid->_ndimension;
int Nsimd = grid->Nsimd();
int N_t = WW_sd.dimensions()[0];
int N_s = WW_sd.dimensions()[1];
int N_d = WW_sd.dimensions()[2];
int d_unroll = 32;// Empirical optimisation
Eigen::Matrix<Complex, -1, -1, Eigen::RowMajor> buf;
for(int t=0;t<N_t;t++){
WWVV[t] = Zero();
}
for (int t = 0; t < N_t; t++){
std::cout << GridLogMessage << "Contraction t = " << t << std::endl;
buf = WW_sd[t];
thread_for(ss,grid->oSites(),{
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
for(int s=0;s<N_s;s++){
auto vs_v = vs[s].View();
auto tmp1 = vs_v[ss];
vobj tmp2 = Zero();
vobj tmp3 = Zero();
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
auto vd_v = vd[d].View();
Scalar_v coeff = buf(s,d);
tmp3 = conjugate(vd_v[ss]);
mac(&tmp2, &coeff, &tmp3);
}
//////////////////////////
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
//////////////////////////
OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);
}}
});
}
}
template <class FImpl>
inline void A2Autils<FImpl>::OuterProductWWVV(PropagatorField &WWVV,
const vobj &lhs,
const vobj &rhs,
const int Ns, const int ss)
{
auto WWVV_v = WWVV.View();
for (int s1 = 0; s1 < Ns; s1++){
for (int s2 = 0; s2 < Ns; s2++){
WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0);
WWVV_v[ss]()(s1,s2)(0, 1) += lhs()(s1)(0) * rhs()(s2)(1);
WWVV_v[ss]()(s1,s2)(0, 2) += lhs()(s1)(0) * rhs()(s2)(2);
WWVV_v[ss]()(s1,s2)(1, 0) += lhs()(s1)(1) * rhs()(s2)(0);
WWVV_v[ss]()(s1,s2)(1, 1) += lhs()(s1)(1) * rhs()(s2)(1);
WWVV_v[ss]()(s1,s2)(1, 2) += lhs()(s1)(1) * rhs()(s2)(2);
WWVV_v[ss]()(s1,s2)(2, 0) += lhs()(s1)(2) * rhs()(s2)(0);
WWVV_v[ss]()(s1,s2)(2, 1) += lhs()(s1)(2) * rhs()(s2)(1);
WWVV_v[ss]()(s1,s2)(2, 2) += lhs()(s1)(2) * rhs()(s2)(2);
}
}
}
template<class FImpl>
void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWVV0,