mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-22 17:52:02 +01:00
Merge branch 'develop' into feature/distil
* develop: (27 commits) Update README.md result layout standardised, iterator size more elegant updated syntac in Test_hadrons_spectrum chroma-regression test now prints difference correctly baryon input strings are now pairs of pairs of gammas - still ugly!! second update to pull request Changing back interface for Gamma3pt Removing old debug code Changes to A2Autils suggested changes for 1st pull request implemented changed input parameters for easier use Should compile everywhere now changed baryon interface added author information ready for pull request code compiling now - still need to test Baryons module works in 1 of 3 cases - still need SlicedProp and Msource part!! thread_for caused the problems - slow for loop for now still bugfix weird bug... ... # Conflicts: # Hadrons/Modules.hpp # Hadrons/modules.inc
This commit is contained in:
@ -76,8 +76,21 @@ public:
|
||||
const std::vector<ComplexField> &emB1,
|
||||
int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);
|
||||
|
||||
static void ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
const Eigen::Tensor<ComplexD,3> &WW_sd,
|
||||
template <typename TensorType>
|
||||
typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
|
||||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
|
||||
void>::type
|
||||
static ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
const TensorType &WW_sd,
|
||||
const FermionField *vs,
|
||||
const FermionField *vd);
|
||||
|
||||
template <typename TensorType>
|
||||
typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
|
||||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
|
||||
void>::type
|
||||
static ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
const TensorType &WW_sd,
|
||||
const FermionField *vs,
|
||||
const FermionField *vd);
|
||||
|
||||
@ -107,6 +120,11 @@ public:
|
||||
const FermionField *vd,
|
||||
int orthogdim);
|
||||
#endif
|
||||
private:
|
||||
inline static void OuterProductWWVV(PropagatorField &WWVV,
|
||||
const vobj &lhs,
|
||||
const vobj &rhs,
|
||||
const int Ns, const int ss);
|
||||
};
|
||||
|
||||
template<class FImpl>
|
||||
@ -1375,9 +1393,13 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
|
||||
// Take WW_sd v^dag_d (x) v_s
|
||||
//
|
||||
|
||||
template<class FImpl>
|
||||
void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
const Eigen::Tensor<ComplexD,3> &WW_sd,
|
||||
template <class FImpl>
|
||||
template <typename TensorType>
|
||||
typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
|
||||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
|
||||
void>::type
|
||||
A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
const TensorType &WW_sd,
|
||||
const FermionField *vs,
|
||||
const FermionField *vd)
|
||||
{
|
||||
@ -1399,39 +1421,100 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
|
||||
for(int t=0;t<N_t;t++){
|
||||
for(int s=0;s<N_s;s++){
|
||||
auto vs_v = vs[s].View();
|
||||
auto tmp1 = vs_v[ss];
|
||||
vobj tmp2 = Zero();
|
||||
vobj tmp3 = Zero();
|
||||
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
|
||||
auto vd_v = vd[d].View();
|
||||
Scalar_v coeff = WW_sd(t,s,d);
|
||||
tmp3 = conjugate(vd_v[ss]);
|
||||
mac(&tmp2, &coeff, &tmp3);
|
||||
}
|
||||
auto vs_v = vs[s].View();
|
||||
auto tmp1 = vs_v[ss];
|
||||
vobj tmp2 = Zero();
|
||||
vobj tmp3 = Zero();
|
||||
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
|
||||
auto vd_v = vd[d].View();
|
||||
Scalar_v coeff = WW_sd(t,s,d);
|
||||
tmp3 = conjugate(vd_v[ss]);
|
||||
mac(&tmp2, &coeff, &tmp3);
|
||||
}
|
||||
|
||||
//////////////////////////
|
||||
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
|
||||
//////////////////////////
|
||||
auto WWVV_v = WWVV[t].View();
|
||||
for(int s1=0;s1<Ns;s1++){
|
||||
for(int s2=0;s2<Ns;s2++){
|
||||
WWVV_v[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0);
|
||||
WWVV_v[ss]()(s1,s2)(0,1) += tmp1()(s1)(0)*tmp2()(s2)(1);
|
||||
WWVV_v[ss]()(s1,s2)(0,2) += tmp1()(s1)(0)*tmp2()(s2)(2);
|
||||
WWVV_v[ss]()(s1,s2)(1,0) += tmp1()(s1)(1)*tmp2()(s2)(0);
|
||||
WWVV_v[ss]()(s1,s2)(1,1) += tmp1()(s1)(1)*tmp2()(s2)(1);
|
||||
WWVV_v[ss]()(s1,s2)(1,2) += tmp1()(s1)(1)*tmp2()(s2)(2);
|
||||
WWVV_v[ss]()(s1,s2)(2,0) += tmp1()(s1)(2)*tmp2()(s2)(0);
|
||||
WWVV_v[ss]()(s1,s2)(2,1) += tmp1()(s1)(2)*tmp2()(s2)(1);
|
||||
WWVV_v[ss]()(s1,s2)(2,2) += tmp1()(s1)(2)*tmp2()(s2)(2);
|
||||
}}
|
||||
//////////////////////////
|
||||
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
|
||||
//////////////////////////
|
||||
OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);
|
||||
|
||||
}}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class FImpl>
|
||||
template <typename TensorType>
|
||||
typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD, 3>, TensorType>::value ||
|
||||
std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
|
||||
void>::type
|
||||
A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
|
||||
const TensorType &WW_sd,
|
||||
const FermionField *vs,
|
||||
const FermionField *vd)
|
||||
{
|
||||
GridBase *grid = vs[0].Grid();
|
||||
|
||||
int nd = grid->_ndimension;
|
||||
int Nsimd = grid->Nsimd();
|
||||
int N_t = WW_sd.dimensions()[0];
|
||||
int N_s = WW_sd.dimensions()[1];
|
||||
int N_d = WW_sd.dimensions()[2];
|
||||
|
||||
int d_unroll = 32;// Empirical optimisation
|
||||
|
||||
Eigen::Matrix<Complex, -1, -1, Eigen::RowMajor> buf;
|
||||
|
||||
for(int t=0;t<N_t;t++){
|
||||
WWVV[t] = Zero();
|
||||
}
|
||||
|
||||
for (int t = 0; t < N_t; t++){
|
||||
std::cout << GridLogMessage << "Contraction t = " << t << std::endl;
|
||||
buf = WW_sd[t];
|
||||
thread_for(ss,grid->oSites(),{
|
||||
for(int d_o=0;d_o<N_d;d_o+=d_unroll){
|
||||
for(int s=0;s<N_s;s++){
|
||||
auto vs_v = vs[s].View();
|
||||
auto tmp1 = vs_v[ss];
|
||||
vobj tmp2 = Zero();
|
||||
vobj tmp3 = Zero();
|
||||
for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
|
||||
auto vd_v = vd[d].View();
|
||||
Scalar_v coeff = buf(s,d);
|
||||
tmp3 = conjugate(vd_v[ss]);
|
||||
mac(&tmp2, &coeff, &tmp3);
|
||||
}
|
||||
|
||||
//////////////////////////
|
||||
// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
|
||||
//////////////////////////
|
||||
OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);
|
||||
}}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <class FImpl>
|
||||
inline void A2Autils<FImpl>::OuterProductWWVV(PropagatorField &WWVV,
|
||||
const vobj &lhs,
|
||||
const vobj &rhs,
|
||||
const int Ns, const int ss)
|
||||
{
|
||||
auto WWVV_v = WWVV.View();
|
||||
for (int s1 = 0; s1 < Ns; s1++){
|
||||
for (int s2 = 0; s2 < Ns; s2++){
|
||||
WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0);
|
||||
WWVV_v[ss]()(s1,s2)(0, 1) += lhs()(s1)(0) * rhs()(s2)(1);
|
||||
WWVV_v[ss]()(s1,s2)(0, 2) += lhs()(s1)(0) * rhs()(s2)(2);
|
||||
WWVV_v[ss]()(s1,s2)(1, 0) += lhs()(s1)(1) * rhs()(s2)(0);
|
||||
WWVV_v[ss]()(s1,s2)(1, 1) += lhs()(s1)(1) * rhs()(s2)(1);
|
||||
WWVV_v[ss]()(s1,s2)(1, 2) += lhs()(s1)(1) * rhs()(s2)(2);
|
||||
WWVV_v[ss]()(s1,s2)(2, 0) += lhs()(s1)(2) * rhs()(s2)(0);
|
||||
WWVV_v[ss]()(s1,s2)(2, 1) += lhs()(s1)(2) * rhs()(s2)(1);
|
||||
WWVV_v[ss]()(s1,s2)(2, 2) += lhs()(s1)(2) * rhs()(s2)(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class FImpl>
|
||||
void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWVV0,
|
||||
|
Reference in New Issue
Block a user