From 7c57cac670675bd3a178fd333cfee6adaed5c63d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 4 Oct 2018 18:57:41 +0100 Subject: [PATCH 1/2] Adding A2A utils class for containing kernels. --- Grid/qcd/utils/A2Autils.h | 1185 +++++++++++++++++++++++++++++++++++++ Grid/qcd/utils/Utils.h | 6 + 2 files changed, 1191 insertions(+) create mode 100644 Grid/qcd/utils/A2Autils.h diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h new file mode 100644 index 00000000..c75f8a1c --- /dev/null +++ b/Grid/qcd/utils/A2Autils.h @@ -0,0 +1,1185 @@ +#pragma once +//#include +#include + +namespace Grid { +namespace QCD { + +#undef DELTA_F_EQ_2 + +template +class A2Autils +{ +public: + typedef typename FImpl::ComplexField ComplexField; + typedef typename FImpl::FermionField FermionField; + typedef typename FImpl::PropagatorField PropagatorField; + + typedef typename FImpl::SiteSpinor vobj; + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + typedef iSinglet Scalar_v; + typedef iSinglet Scalar_s; + + typedef iSpinColourMatrix SpinColourMatrix_v; + + static void MesonField(Eigen::Tensor &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + std::vector gammas, + const std::vector &mom, + int orthogdim); + + static void PionFieldWVmom(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + const std::vector &mom, + int orthogdim); + + static void PionFieldXX(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim, + int g5); + + static void PionFieldWV(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim); + static void PionFieldWW(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *wj, + int orthogdim); + static void PionFieldVV(Eigen::Tensor &mat, + const FermionField *vi, + const FermionField *vj, + int orthogdim); + + static void ContractWWVV(std::vector &WWVV, + const Eigen::Tensor &WW_sd, + const FermionField *vs, + const FermionField *vd); + + static void ContractFourQuarkColourDiagonal(const PropagatorField &WWVV0, + const PropagatorField &WWVV1, + const std::vector &gamma0, + const std::vector &gamma1, + ComplexField &O_trtr, + ComplexField &O_fig8); + + static void ContractFourQuarkColourMix(const PropagatorField &WWVV0, + const PropagatorField &WWVV1, + const std::vector &gamma0, + const std::vector &gamma1, + ComplexField &O_trtr, + ComplexField &O_fig8); +#ifdef DELTA_F_EQ_2 + static void DeltaFeq2(int dt_min,int dt_max, + Eigen::Tensor &dF2_fig8, + Eigen::Tensor &dF2_trtr, + Eigen::Tensor &dF2_fig8_mix, + Eigen::Tensor &dF2_trtr_mix, + Eigen::Tensor &denom_A0, + Eigen::Tensor &denom_P, + Eigen::Tensor &WW_sd, + const FermionField *vs, + const FermionField *vd, + int orthogdim); +#endif +}; + +template +void A2Autils::MesonField(Eigen::Tensor &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + std::vector gammas, + const std::vector &mom, + int orthogdim) +{ + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + + int Lblock = mat.dimension(3); + int Rblock = mat.dimension(4); + + GridBase *grid = lhs_wi[0]._grid; + + const int Nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + int Ngamma = gammas.size(); + int Nmom = mom.size(); + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock*Nmom; + int MFlvol = ld*Lblock*Rblock*Nmom; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++){ + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++){ + lsSum[r]=scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + // potentially wasting cores here if local time extent too small + parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n icoor(Nd); + std::vector extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]; + + } + }}} + } + + assert(mat.dimension(0) == Nmom); + assert(mat.dimension(1) == Ngamma); + assert(mat.dimension(2) == Nt); + + // ld loop and local only?? + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + parallel_for_nest2(int lt=0;ltGlobalSumVector(&mat(0,0,0,0,0),Nmom*Ngamma*Nt*Lblock*Rblock); + +} + + +/////////////////////////////////////////////////////////////////// +//Meson +// Interested in +// +// sum_x,y Trace[ G S(x,tx,y,ty) G S(y,ty,x,tx) ] +// +// Conventional meson field: +// +// = sum_x,y Trace[ sum_j G |v_j(y,ty)> +// = sum_ij PI_ji(tx) PI_ij(ty) +// +// G5-Hermiticity +// +// sum_x,y Trace[ G S(x,tx,y,ty) G S(y,ty,x,tx) ] +// = sum_x,y Trace[ G S(x,tx,y,ty) G g5 S^dag(x,tx,y,ty) g5 ] +// = sum_x,y Trace[ g5 G sum_j |v_j(y,ty)> +// = sum_ij PionVV(ty) PionWW(tx) +// +// (*) is only correct estimator if w_i and w_j come from distinct noise sets to preserve the kronecker +// expectation value. Otherwise biased. +//////////////////////////////////////////////////////////////////// + +template +void A2Autils::PionFieldXX(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim, + int g5) +{ + int Lblock = mat.dimension(1); + int Rblock = mat.dimension(2); + + GridBase *grid = wi[0]._grid; + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock; + int MFlvol = ld*Lblock*Rblock; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++){ + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++){ + lsSum[r]=scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n icoor(nd); + iScalar temp; + std::vector > extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx =i+Lblock*j+Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; + + } + }} + } + + assert(mat.dimension(0) == Nt); + // ld loop and local only?? + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + parallel_for_nest2(int lt=0;ltGlobalSumVector(&mat(0,0,0),Nt*Lblock*Rblock); +} + +template +void A2Autils::PionFieldWVmom(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + const std::vector &mom, + int orthogdim) +{ + int Lblock = mat.dimension(2); + int Rblock = mat.dimension(3); + + GridBase *grid = wi[0]._grid; + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + int Nmom = mom.size(); + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock*Nmom; + int MFlvol = ld*Lblock*Rblock*Nmom; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++){ + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++){ + lsSum[r]=scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n icoor(nd); + iScalar temp; + std::vector > extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; + + } + }}} + } + + assert(mat.dimension(0) == Nmom); + assert(mat.dimension(1) == Nt); + + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + parallel_for_nest2(int lt=0;ltGlobalSumVector(&mat(0,0,0,0),Nmom*Nt*Lblock*Rblock); +} + +template +void A2Autils::PionFieldWV(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *vj, + int orthogdim) +{ + const int g5=1; + PionFieldXX(mat,wi,vj,orthogdim,g5); +} +template +void A2Autils::PionFieldWW(Eigen::Tensor &mat, + const FermionField *wi, + const FermionField *wj, + int orthogdim) +{ + const int nog5=0; + PionFieldXX(mat,wi,wj,orthogdim,nog5); +} +template +void A2Autils::PionFieldVV(Eigen::Tensor &mat, + const FermionField *vi, + const FermionField *vj, + int orthogdim) +{ + const int nog5=0; + PionFieldXX(mat,vi,vj,orthogdim,nog5); +} + + +//////////////////////////////////////////// +// Schematic thoughts about more generalised four quark insertion +// +// -- Pupil or fig8 type topology (depending on flavour structure) done below +// -- Have Bag style -- WWVV VVWW +// _ _ +// / \/ \ +// \_/\_/ +// +// - Kpipi style (two pion insertions) +// K +// ********* +// Type 1 +// ********* +// x +// ___ _____ pi(b) +// K / \/____/ +// \ \ +// \____\pi(a) +// +// +// W^W_sd V_s(x) V^_d(xa) Wa(xa) Va^(x) WW_bb' Vb Vb'(x) +// +// (Kww.PIvw) VV ; pi_ww.VV +// +// But Norman and Chris say g5 hermiticity not used, except on K +// +// Kww PIvw PIvw +// +// (W^W_sd PIa_dd') PIb_uu' (v_s v_d' w_u v_u')(x) +// +// - Can form (Nmode^3) +// +// (Kww . PIvw)_sd and then V_sV_d tensor product contracting this +// +// - Can form +// +// (PIvw)_uu' W_uV_u' tensor product. +// +// Both are lattice propagators. +// +// Contract with the same four quark type contraction as BK. +// +// ********* +// Type 2 +// ********* +// _ _____ pi +// K / \/ | +// \_/\_____|pi +// +// Norman/Chris would say +// +// (Kww VV)(x) . (PIwv Piwv) VW(x) +// +// Full four quark via g5 hermiticity +// +// (Kww VV)(x) . (PIww Pivw) VV(x) +// +// ********* +// Type 3 +// ********* +// ___ _____ pi +// K / \/ | +// \ /\ | +// \ \/ | +// \________|pi +// +// +// (V(x) . Kww . pi_vw . pivw . V(x)). WV(x) +// +// No difference possible to Norman, Chris, Diaqian +// +// ********* +// Type 4 +// ********* +// ___ pi +// K / \/\ |\ +// \___/\/ |/ +// pi +// +// (Kww VV ) WV x Tr(PIwv PIwv) +// +// Could use alternate PI_ww PIvv for disco loop interesting comparison +// +// ********* +// Primitives / Utilities for assembly +// ********* +// +// i) Basic type for meson field - mode indexed object, whether WW, VW, VV etc.. +// ii) Multiply two meson fields (modes^3) ; use BLAS MKL via Eigen +// iii) Multiply and trace two meson fields ; use BLAS MKL via Eigen. The element wise product trick +// iv) Contract a meson field (whether WW,VW, VV WV) with W and V fields to form LatticePropagator +// v) L^3 sum a four quark contaction with two LatticePropagators. +// +// Use lambda functions to enable flexibility in v) +// Use lambda functions to unify the pion field / meson field contraction codes. +//////////////////////////////////////////// + + + +//////////////////////////////////////////////////////////////////////// +// DeltaF=2 contraction ; use exernal WW field for Kaon, anti Kaon sink +//////////////////////////////////////////////////////////////////////// +// +// WW -- i vectors have adjoint, and j vectors not. +// -- Think of "i" as the strange quark, forward prop from 0 +// -- Think of "j" as the anti-down quark. +// +// WW_sd are w^dag_s w_d +// +// Hence VV vectors correspondingly are v^dag_d, v_s from t=0 +// and v^dag_d, v_s from t=dT +// +// There is an implicit g5 associated with each v^dag_d from use of g5 Hermiticity. +// The other gamma_5 lies in the WW external meson operator. +// +// From UKhadron wallbag.cc: +// +// LatticePropagator anti_d0 = adj( Gamma(G5) * Qd0 * Gamma(G5)); +// LatticePropagator anti_d1 = adj( Gamma(G5) * Qd1 * Gamma(G5)); +// +// PR1 = Qs0 * Gamma(G5) * anti_d0; +// PR2 = Qs1 * Gamma(G5) * anti_d1; +// +// TR1 = trace( PR1 * G1 ); +// TR2 = trace( PR2 * G2 ); +// Wick1 = TR1 * TR2; +// +// Wick2 = trace( PR1* G1 * PR2 * G2 ); +// // was Wick2 = trace( Qs0 * Gamma(G5) * anti_d0 * G1 * Qs1 * Gamma(G5) * anti_d1 * G2 ); +// +// TR TR(tx) = Wick1 = sum_x WW[t0]_sd < v^_d |g5 G| v_s> WW[t1]_s'd' < v^_d' |g5 G| v_s'> |_{x,tx) +// = sum_x [ Trace(WW[t0] VgV(t,x) ) x Trace( WW_[t1] VgV(t,x) ) ] +// +// +// Calc all Nt Trace(WW VV) products at once, take Nt^2 products of these. +// +// Fig8(tx) = Wick2 = sum_x WW[t0]_sd WW[t1]_s'd' < v^_d |g5 G| v_s'> < v^_d' |g5 G| v_s> |_{x,tx} +// +// = sum_x Trace( WW[t0] VV[t,x] WW[t1] VV[t,x] ) +// +/////////////////////////////////////////////////////////////////////////////// +// +// WW is w_s^dag (x) w_d (G5 implicitly absorbed) +// +// WWVV will have spin-col (x) spin-col tensor. +// +// Want this to look like a strange fwd prop, anti-d prop. +// +// Take WW_sd v^dag_d (x) v_s +// + +template +void A2Autils::ContractWWVV(std::vector &WWVV, + const Eigen::Tensor &WW_sd, + const FermionField *vs, + const FermionField *vd) +{ + GridBase *grid = vs[0]._grid; + + int nd = grid->_ndimension; + int Nsimd = grid->Nsimd(); + int N_t = WW_sd.dimension(0); + int N_s = WW_sd.dimension(1); + int N_d = WW_sd.dimension(2); + + int d_unroll = 32;// Empirical optimisation + + for(int t=0;toSites();ss++){ + for(int d_o=0;d_o +void A2Autils::ContractFourQuarkColourDiagonal(const PropagatorField &WWVV0, + const PropagatorField &WWVV1, + const std::vector &gamma0, + const std::vector &gamma1, + ComplexField &O_trtr, + ComplexField &O_fig8) +{ + assert(gamma0.size()==gamma1.size()); + int Ng = gamma0.size(); + + GridBase *grid = WWVV0._grid; + + parallel_for(int ss=0;ssoSites();ss++){ + + typedef typename ComplexField::vector_object vobj; + + vobj v_trtr; + vobj v_fig8; + + auto VV0 = WWVV0._odata[ss]; + auto VV1 = WWVV1._odata[ss]; + + for(int g=0;g +void A2Autils::ContractFourQuarkColourMix(const PropagatorField &WWVV0, + const PropagatorField &WWVV1, + const std::vector &gamma0, + const std::vector &gamma1, + ComplexField &O_trtr, + ComplexField &O_fig8) +{ + assert(gamma0.size()==gamma1.size()); + int Ng = gamma0.size(); + + GridBase *grid = WWVV0._grid; + + parallel_for(int ss=0;ssoSites();ss++){ + + typedef typename ComplexField::vector_object vobj; + + auto VV0 = WWVV0._odata[ss]; + auto VV1 = WWVV1._odata[ss]; + + for(int g=0;g +void A2Autils::DeltaFeq2(int dt_min,int dt_max, + Eigen::Tensor &dF2_fig8, + Eigen::Tensor &dF2_trtr, + Eigen::Tensor &dF2_fig8_mix, + Eigen::Tensor &dF2_trtr_mix, + Eigen::Tensor &denom_A0, + Eigen::Tensor &denom_P, + Eigen::Tensor &WW_sd, + const FermionField *vs, + const FermionField *vd, + int orthogdim) +{ + GridBase *grid = vs[0]._grid; + + LOG(Message) << "Computing A2A DeltaF=2 graph" << std::endl; + + auto G5 = Gamma(Gamma::Algebra::Gamma5); + + double nodes = grid->NodeCount(); + int nd = grid->_ndimension; + int Nsimd = grid->Nsimd(); + int N_t = WW_sd.dimension(0); + int N_s = WW_sd.dimension(1); + int N_d = WW_sd.dimension(2); + + assert(grid->GlobalDimensions()[orthogdim] == N_t); + double vol = 1.0; + for(int dim=0;dimGlobalDimensions()[dim]; + } + + double t_tot = -usecond(); + std::vector WWVV (N_t,grid); + + double t_outer= -usecond(); + ContractWWVV(WWVV,WW_sd,&vs[0],&vd[0]); + t_outer+=usecond(); + + ////////////////////////////// + // Implicit gamma-5 + ////////////////////////////// + for(int t=0;t correlator from each wall + ComplexField D1(grid); D1 = zero; + + ComplexField O1_trtr(grid); O1_trtr = zero; + ComplexField O2_trtr(grid); O2_trtr = zero; + ComplexField O3_trtr(grid); O3_trtr = zero; + ComplexField O4_trtr(grid); O4_trtr = zero; + ComplexField O5_trtr(grid); O5_trtr = zero; + + ComplexField O1_fig8(grid); O1_fig8 = zero; + ComplexField O2_fig8(grid); O2_fig8 = zero; + ComplexField O3_fig8(grid); O3_fig8 = zero; + ComplexField O4_fig8(grid); O4_fig8 = zero; + ComplexField O5_fig8(grid); O5_fig8 = zero; + + ComplexField VV_trtr(grid); VV_trtr = zero; + ComplexField AA_trtr(grid); AA_trtr = zero; + ComplexField SS_trtr(grid); SS_trtr = zero; + ComplexField PP_trtr(grid); PP_trtr = zero; + ComplexField TT_trtr(grid); TT_trtr = zero; + + ComplexField VV_fig8(grid); VV_fig8 = zero; + ComplexField AA_fig8(grid); AA_fig8 = zero; + ComplexField SS_fig8(grid); SS_fig8 = zero; + ComplexField PP_fig8(grid); PP_fig8 = zero; + ComplexField TT_fig8(grid); TT_fig8 = zero; + + ////////////////////////////////////////////////// + // Used to store appropriate correlation funcs + ////////////////////////////////////////////////// + std::vector C1; + std::vector C2; + std::vector C3; + std::vector C4; + std::vector C5; + + ////////////////////////////////////////////////////////// + // Could do AA, VV, SS, PP, TT and form linear combinations later. + // Almost 2x. but for many modes, the above loop dominates. + ////////////////////////////////////////////////////////// + double t_contr= -usecond(); + + // Tr Tr Wick contraction + auto VX = Gamma(Gamma::Algebra::GammaX); + auto VY = Gamma(Gamma::Algebra::GammaY); + auto VZ = Gamma(Gamma::Algebra::GammaZ); + auto VT = Gamma(Gamma::Algebra::GammaT); + + auto AX = Gamma(Gamma::Algebra::GammaXGamma5); + auto AY = Gamma(Gamma::Algebra::GammaYGamma5); + auto AZ = Gamma(Gamma::Algebra::GammaZGamma5); + auto AT = Gamma(Gamma::Algebra::GammaTGamma5); + + auto S = Gamma(Gamma::Algebra::Identity); + auto P = Gamma(Gamma::Algebra::Gamma5); + + auto T0 = Gamma(Gamma::Algebra::SigmaXY); + auto T1 = Gamma(Gamma::Algebra::SigmaXZ); + auto T2 = Gamma(Gamma::Algebra::SigmaXT); + auto T3 = Gamma(Gamma::Algebra::SigmaYZ); + auto T4 = Gamma(Gamma::Algebra::SigmaYT); + auto T5 = Gamma(Gamma::Algebra::SigmaZT); + + std::cout < VV({VX,VY,VZ,VT}); + std::vector AA({AX,AY,AZ,AT}); + std::vector SS({S}); + std::vector PP({P}); + std::vector TT({T0,T1,T2,T3,T4,T5}); + std::vector A0({AT}); + + ContractFourQuarkColourDiagonal(WWVV[t0], WWVV[t1],VV,VV,VV_trtr,VV_fig8); // VV + ContractFourQuarkColourDiagonal(WWVV[t0], WWVV[t1],AA,AA,AA_trtr,AA_fig8); // AA + ContractFourQuarkColourDiagonal(WWVV[t0], WWVV[t1],SS,SS,SS_trtr,SS_fig8); // SS + ContractFourQuarkColourDiagonal(WWVV[t0], WWVV[t1],PP,PP,PP_trtr,PP_fig8); // PP + ContractFourQuarkColourDiagonal(WWVV[t0], WWVV[t1],TT,TT,TT_trtr,TT_fig8); // TT + + O1_trtr = VV_trtr+AA_trtr; O2_trtr = VV_trtr-AA_trtr; // VV+AA,VV-AA + O1_fig8 = VV_fig8+AA_fig8; O2_fig8 = VV_fig8-AA_fig8; + + O3_trtr = SS_trtr-PP_trtr; O4_trtr = SS_trtr+PP_trtr; // SS+PP,SS-PP + O3_fig8 = SS_fig8-PP_fig8; O4_fig8 = SS_fig8+PP_fig8; + + O5_trtr = TT_trtr; + O5_fig8 = TT_fig8; + + sliceSum(O1_trtr,C1, orthogdim); for(int t=0;t #include +// All-to-all contraction kernels that touch the +// internal lattice structure +#include + + + #endif From b46d31d4b642b6ab7397d6db7b8b4473b02b1827 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 5 Oct 2018 11:29:40 +0100 Subject: [PATCH 2/2] MKL enable on Eigen if Grid is configured to use MKL --- Grid/Grid_Eigen_Dense.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index 4fb5b831..dd330dd9 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -1,4 +1,9 @@ #pragma once +// Force Eigen to use MKL if Grid has been configured with --enable-mkl +#ifdef USE_MKL +#define EIGEN_USE_MKL_ALL +#endif + #if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations"