diff --git a/extras/Hadrons/Modules/MContraction/A2APionField.cc b/extras/Hadrons/Modules/MContraction/A2APionField.cc new file mode 100644 index 00000000..a55f55d0 --- /dev/null +++ b/extras/Hadrons/Modules/MContraction/A2APionField.cc @@ -0,0 +1,8 @@ +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MContraction; + +template class Grid::Hadrons::MContraction::TA2APionField; +template class Grid::Hadrons::MContraction::TA2APionField; diff --git a/extras/Hadrons/Modules/MContraction/A2APionField.hpp b/extras/Hadrons/Modules/MContraction/A2APionField.hpp new file mode 100644 index 00000000..da3a2137 --- /dev/null +++ b/extras/Hadrons/Modules/MContraction/A2APionField.hpp @@ -0,0 +1,1726 @@ +#ifndef Hadrons_MContraction_A2APionField_hpp_ +#define Hadrons_MContraction_A2APionField_hpp_ + +#include +#include +#include +#include + +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * A2APionField * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MContraction) + +typedef std::pair GammaPair; + + +class A2APionFieldPar : Serializable +{ + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(A2APionFieldPar, + int, cacheBlock, + int, schurBlock, + int, Nmom, + std::string, A2A_i, + std::string, A2A_j, + std::string, output); +}; + +template +class TA2APionField : public Module +{ + public: + FERM_TYPE_ALIASES(FImpl, ); + SOLVER_TYPE_ALIASES(FImpl, ); + + typedef A2AModesSchurDiagTwo A2ABase; + + int d_unroll ; + + public: + // constructor + TA2APionField(const std::string name); + // destructor + virtual ~TA2APionField(void){}; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); + + virtual void DeltaFeq2(int dt_min,int dt_max, + Eigen::Tensor &dF2_fig8, + Eigen::Tensor &dF2_trtr, + Eigen::Tensor &WW_sd, + const LatticeFermion *vs, + const LatticeFermion *vd, + int orthogdim); + + virtual void DeltaFeq2_alt(int dt_min,int dt_max, + Eigen::Tensor &dF2_fig8, + Eigen::Tensor &dF2_trtr, + Eigen::Tensor &den0, + Eigen::Tensor &den1, + Eigen::Tensor &WW_sd, + const LatticeFermion *vs, + const LatticeFermion *vd, + int orthogdim); + + /////////////////////////////////////// + // Arithmetic help. Move to Grid?? + /////////////////////////////////////// + virtual void PionFieldXX(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *vj, + int orthogdim, + int g5); + /////////////////////////// + // Simple wrappers + /////////////////////////// + virtual void PionFieldWVmom(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *vj, + const std::vector &mom, + int orthogdim); + + virtual void PionFieldWV(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *vj, + int orthogdim); + + virtual void PionFieldVV(Eigen::Tensor &mat, + const LatticeFermion *vi, + const LatticeFermion *vj, + int orthogdim); + + virtual void PionFieldWW(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *wj, + int orthogdim); + +}; + +MODULE_REGISTER(A2APionField, ARG(TA2APionField), MContraction); +MODULE_REGISTER(ZA2APionField, ARG(TA2APionField), MContraction); + +/****************************************************************************** +* TA2APionField implementation * +******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TA2APionField::TA2APionField(const std::string name) + : Module(name) +{ +} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TA2APionField::getInput(void) +{ + std::vector in; + in.push_back(par().A2A_i + "_class"); + in.push_back(par().A2A_i + "_w_high_4d"); + in.push_back(par().A2A_i + "_v_high_4d"); + in.push_back(par().A2A_j + "_class"); + in.push_back(par().A2A_j + "_w_high_4d"); + in.push_back(par().A2A_j + "_v_high_4d"); + + return in; +} + +template +std::vector TA2APionField::getOutput(void) +{ + std::vector out = {}; + + return out; +} + + +// setup /////////////////////////////////////////////////////////////////////// +template +void TA2APionField::setup(void) +{ + d_unroll = 32; // empirical default. Can be overridden if desired + + // Four D fields + envTmp(std::vector, "wi", 1, par().schurBlock, FermionField(env().getGrid(1))); + envTmp(std::vector, "vi", 1, par().schurBlock, FermionField(env().getGrid(1))); + envTmp(std::vector, "wj", 1, par().schurBlock, FermionField(env().getGrid(1))); + envTmp(std::vector, "vj", 1, par().schurBlock, FermionField(env().getGrid(1))); + + // 5D tmp + int Ls_i = env().getObjectLs(par().A2A_i + "_class"); + envTmpLat(FermionField, "tmp_5d", Ls_i); + + int Ls_j= env().getObjectLs(par().A2A_j + "_class"); + assert ( Ls_i == Ls_j ); +} + + +////////////////////////////////////////////////////////////////////////////////// +// Cache blocked arithmetic routine +// Could move to Grid ??? +////////////////////////////////////////////////////////////////////////////////// +template +void TA2APionField::PionFieldWVmom(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *vj, + const std::vector &mom, + int orthogdim) +{ + double t0,t1,t2,t3; t0=t1=t2=t3= 0.0; + + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + + int Lblock = mat.dimension(2); + int Rblock = mat.dimension(3); + + GridBase *grid = wi[0]._grid; + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + int Nmom = mom.size(); + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock*Nmom; + int MFlvol = ld*Lblock*Rblock*Nmom; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++){ + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++){ + lsSum[r]=scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + t0-=usecond(); + parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n icoor(nd); + iScalar temp; + std::vector > extracted(Nsimd); + + // std::vector extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx = m+Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; + + } + }}} + } + t1+=usecond(); + + assert(mat.dimension(0) == Nmom); + assert(mat.dimension(1) == Nt); + t2-=usecond(); + // ld loop and local only?? + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + parallel_for_nest2(int lt=0;ltGlobalSumVector(&mat(0,0,0,0),Nmom*Nt*Lblock*Rblock); + t3+=usecond(); +} + +/////////////////////////////////////////////////////////////////// +//Meson +// Interested in +// +// sum_x,y Trace[ G S(x,tx,y,ty) G S(y,ty,x,tx) ] +// +// Conventional meson field: +// +// = sum_x,y Trace[ sum_j G |v_j(y,ty)> +// = sum_ij PI_ji(tx) PI_ij(ty) +// +// G5-Hermiticity +// +// sum_x,y Trace[ G S(x,tx,y,ty) G S(y,ty,x,tx) ] +// = sum_x,y Trace[ G S(x,tx,y,ty) G g5 S^dag(x,tx,y,ty) g5 ] +// = sum_x,y Trace[ g5 G sum_j |v_j(y,ty)> +// = sum_ij PionVV(ty) PionWW(tx) +// +// (*) is only correct estimator if w_i and w_j come from distinct noise sets to preserve the kronecker +// expectation value. Otherwise biased. +//////////////////////////////////////////////////////////////////// + + +template +void TA2APionField::PionFieldXX(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *vj, + int orthogdim, + int g5) +{ + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + + int Lblock = mat.dimension(1); + int Rblock = mat.dimension(2); + + GridBase *grid = wi[0]._grid; + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock; + int MFlvol = ld*Lblock*Rblock; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++){ + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++){ + lsSum[r]=scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n icoor(nd); + iScalar temp; + std::vector > extracted(Nsimd); + + for(int i=0;iiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + + int ij_ldx =i+Lblock*j+Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]._internal; + + } + }} + } + + assert(mat.dimension(0) == Nt); + // ld loop and local only?? + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + parallel_for_nest2(int lt=0;ltGlobalSumVector(&mat(0,0,0),Nt*Lblock*Rblock); +} + +template +void TA2APionField::PionFieldWV(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *vj, + int orthogdim) +{ + const int g5=1; + PionFieldXX(mat,wi,vj,orthogdim,g5); +} +template +void TA2APionField::PionFieldWW(Eigen::Tensor &mat, + const LatticeFermion *wi, + const LatticeFermion *wj, + int orthogdim) +{ + const int nog5=0; + PionFieldXX(mat,wi,wj,orthogdim,nog5); +} +template +void TA2APionField::PionFieldVV(Eigen::Tensor &mat, + const LatticeFermion *vi, + const LatticeFermion *vj, + int orthogdim) +{ + const int nog5=0; + PionFieldXX(mat,vi,vj,orthogdim,nog5); +} + + +///////////////////////////////////////////////////////////////////////// +// New dirac trace code needed for efficiency (I think). +// TODO: Ask Antonin to auto gen from Mathemetica +///////////////////////////////////////////////////////////////////////// +template +inline iScalar traceGammaZ(const iMatrix &rhs) +{ + iScalar ret; + ret() = timesI(rhs(2,0)) + timesMinusI(rhs(3,1)) + timesMinusI(rhs(0,2)) + timesI(rhs(1,3)); + return ret; +}; +template +inline iScalar traceGammaZGamma5(const iMatrix &rhs) +{ + iScalar ret; + ret() = timesMinusI(rhs(2,0)) + timesI(rhs(3,1)) + timesMinusI(rhs(0,2)) + timesI(rhs(1,3)); + return ret; +}; +template +inline iScalar traceGammaY(const iMatrix &rhs) +{ + iScalar ret; + ret() = -rhs(3,0) + rhs(2,1) + rhs(1,2) -rhs(0,3); + return ret; +}; +template +inline iScalar traceGammaYGamma5(const iMatrix &rhs) +{ + iScalar ret; + ret() = rhs(3,0) - rhs(2,1) + rhs(1,2) -rhs(0,3); + return ret; +}; +template +inline iScalar traceGamma5(const iMatrix &rhs) +{ + iScalar ret; + ret() = rhs(0, 0) + rhs(1, 1) - rhs(2, 2) - rhs(3,3); + return ret; +}; +template +inline iScalar traceGammaT(const iMatrix &rhs) +{ + iScalar ret; + ret() = rhs(2, 0) + rhs(3, 1) + rhs(0, 2) + rhs(1,3); + return ret; +}; +template +inline iScalar traceGammaTGamma5(const iMatrix &rhs) +{ + iScalar ret; + ret() = -rhs(2, 0) - rhs(3, 1) + rhs(0, 2) + rhs(1,3); + return ret; +}; +template +inline iScalar traceGammaX(const iMatrix &rhs) +{ + iScalar ret; + ret() = timesMinusI(rhs(0, 3)) +timesMinusI(rhs(1, 2)) + + timesI(rhs(2, 1)) +timesI(rhs(3,0)); + return ret; +}; +template +inline iScalar traceGammaXGamma5(const iMatrix &rhs) +{ + iScalar ret; + ret() = timesMinusI(rhs(0, 3)) +timesMinusI(rhs(1, 2)) + + timesMinusI(rhs(2, 1)) +timesMinusI(rhs(3,0)); + return ret; +}; + + +//////////////////////////////////////////////////////////////////////// +// DeltaF=2 contraction ; use exernal WW field for Kaon, anti Kaon sink +//////////////////////////////////////////////////////////////////////// +// +// WW -- i vectors have adjoint, and j vectors not. +// -- Think of "i" as the strange quark, forward prop from 0 +// -- Think of "j" as the anti-down quark. +// +// WW_sd are w^dag_s w_d +// +// Hence VV vectors correspondingly are v^dag_d, v_s from t=0 +// and v^dag_d, v_s from t=dT +// +// There is an implicit g5 associated with each v^dag_d from use of g5 Hermiticity. +// The other gamma_5 lies in the WW external meson operator. +// +// From UKhadron wallbag.cc: +// +// LatticePropagator anti_d0 = adj( Gamma(G5) * Qd0 * Gamma(G5)); +// LatticePropagator anti_d1 = adj( Gamma(G5) * Qd1 * Gamma(G5)); +// +// PR1 = Qs0 * Gamma(G5) * anti_d0; +// PR2 = Qs1 * Gamma(G5) * anti_d1; +// +// TR1 = trace( PR1 * G1 ); +// TR2 = trace( PR2 * G2 ); +// Wick1 = TR1 * TR2; +// +// Wick2 = trace( PR1* G1 * PR2 * G2 ); +// // was Wick2 = trace( Qs0 * Gamma(G5) * anti_d0 * G1 * Qs1 * Gamma(G5) * anti_d1 * G2 ); +// +// TR TR(tx) = Wick1 = sum_x WW[t0]_sd < v^_d |g5 G| v_s> WW[t1]_s'd' < v^_d' |g5 G| v_s'> |_{x,tx) +// = sum_x [ Trace(WW[t0] VgV(t,x) ) x Trace( WW_[t1] VgV(t,x) ) ] +// +// +// Calc all Nt Trace(WW VV) products at once, take Nt^2 products of these. +// +// Fig8(tx) = Wick2 = sum_x WW[t0]_sd WW[t1]_s'd' < v^_d |g5 G| v_s'> < v^_d' |g5 G| v_s> |_{x,tx} +// +// = sum_x Trace( WW[t0] VV[t,x] WW[t1] VV[t,x] ) +// +// Might as well form Ns x Nj x Ngamma matrix +// +/////////////////////////////////////////////////////////////////////////////// + +template +void TA2APionField::DeltaFeq2(int dt_min,int dt_max, + Eigen::Tensor &dF2_fig8, + Eigen::Tensor &dF2_trtr, + Eigen::Tensor &WW_sd, + const LatticeFermion *vs, + const LatticeFermion *vd, + int orthogdim) +{ + LOG(Message) << "Computing A2A DeltaF=2 graph" << std::endl; + + int dt = dt_min; // HACK ; should loop over dt + + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + typedef iSinglet Scalar_v; + typedef iSinglet Scalar_s; + + int N_s = WW_sd.dimension(1); + int N_d = WW_sd.dimension(2); + + GridBase *grid = vs[0]._grid; + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + int Nt = grid->GlobalDimensions()[orthogdim]; + + dF2_trtr.resize(Nt,16); + dF2_fig8.resize(Nt,16); + for(int t=0;t WWVV (Nt,grid) + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + ////////////////////////////////////// + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + ////////////////////////////////////// + int MFrvol = rd*N_s*N_d; + int MFlvol = ld*N_s*N_d; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++){ + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++){ + lsSum[r]=scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + Eigen::Tensor VgV_sd(N_s,N_d,16); // trace with dirac structure + Eigen::Tensor VgV_sd_l(N_s,N_d,16,Nsimd); + int Ng; + + LOG(Message) << "Computing A2A DeltaF=2 graph entering site loop" << std::endl; + + double t_tot =0; + double t_vv =0; + double t_extr =0; + double t_transp=0; + double t_WW =0; + double t_trtr =0; + double t_fig8 =0; + + t_tot -=usecond(); + + for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n extracted(Nsimd); + Scalar_v temp; + for(int s=0;s icoor(nd); + grid->iCoorFromIindex(icoor,l); + int ttt = r+icoor[orthogdim]*rd; + + Eigen::MatrixXcd VgV(N_d,N_s); + Eigen::MatrixXcd VgV_T(N_s,N_d); + Eigen::MatrixXcd WW0(N_s,N_d); + Eigen::MatrixXcd WW1(N_s,N_d); + + ///////////////////////////////////////// + // Single site VgV , scalar order + ///////////////////////////////////////// + t_transp -=usecond(); + parallel_for(int d=0;dGlobalSumVector(&dF2_fig8(0),Nt*Ng); + grid->GlobalSumVector(&dF2_trtr(0),Nt*Ng); + t_tot +=usecond(); + + LOG(Message) << "Computing A2A DeltaF=2 graph t_tot " << t_tot << " us "<< std::endl; + LOG(Message) << "Computing A2A DeltaF=2 graph t_vv " << t_vv << " us "<< std::endl; + LOG(Message) << "Computing A2A DeltaF=2 graph t_extr " << t_extr << " us "<< std::endl; + LOG(Message) << "Computing A2A DeltaF=2 graph t_transp " << t_transp << " us "<< std::endl; + LOG(Message) << "Computing A2A DeltaF=2 graph t_WW " << t_WW << " us "<< std::endl; + LOG(Message) << "Computing A2A DeltaF=2 graph t_trtr " << t_trtr << " us "<< std::endl; + LOG(Message) << "Computing A2A DeltaF=2 graph t_fig8 " << t_fig8 << " us "<< std::endl; + +} + +// WW is w_s^dag (x) w_d (G5 implicitly absorbed) +// +// WWVV will have spin-col (x) spin-col tensor. +// +// Want this to look like a strange fwd prop, anti-d prop. +// +// Take WW_sd v^dag_d (x) v_s +// +// + +template +void TA2APionField::DeltaFeq2_alt(int dt_min,int dt_max, + Eigen::Tensor &dF2_fig8, + Eigen::Tensor &dF2_trtr, + Eigen::Tensor &den0, + Eigen::Tensor &den1, + Eigen::Tensor &WW_sd, + const LatticeFermion *vs, + const LatticeFermion *vd, + int orthogdim) +{ + LOG(Message) << "Computing A2A DeltaF=2 graph" << std::endl; + + int dt = dt_min; // HACK ; should loop over dt + + auto G5 = Gamma(Gamma::Algebra::Gamma5); + + typedef typename FImpl::SiteSpinor vobj; + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + typedef iSinglet Scalar_v; + typedef iSinglet Scalar_s; + + int N_s = WW_sd.dimension(1); + int N_d = WW_sd.dimension(2); + + GridBase *grid = vs[0]._grid; + + const int nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + int N_t = grid->GlobalDimensions()[orthogdim]; + double vol = 1.0; + for(int dim=0;dimGlobalDimensions()[dim]; + } + double nodes = grid->NodeCount(); + dF2_trtr.resize(N_t,16); + dF2_fig8.resize(N_t,16); + + den0.resize(N_t); + den1.resize(N_t); + for(int t=0;t correlator from each wall + LatticeComplex D1(grid); + + LatticeComplex O1_trtr(grid); + LatticeComplex O2_trtr(grid); + LatticeComplex O3_trtr(grid); + LatticeComplex O4_trtr(grid); + LatticeComplex O5_trtr(grid); + + LatticeComplex O1_fig8(grid); + LatticeComplex O2_fig8(grid); + LatticeComplex O3_fig8(grid); + LatticeComplex O4_fig8(grid); + LatticeComplex O5_fig8(grid); + + O1_trtr = zero; + O2_trtr = zero; + O3_trtr = zero; + O4_trtr = zero; + O5_trtr = zero; + + O1_fig8 = zero; + O2_fig8 = zero; + O3_fig8 = zero; + O4_fig8 = zero; + O5_fig8 = zero; + + D0 = zero; + D1 = zero; + + double t_tot = -usecond(); + std::vector WWVV (N_t,grid); + for(int t=0;toSites();ss++){ + for(int s=0;soSites();ss++){ + for(int d_o=0;d_o C1; + std::vector C2; + std::vector C3; + std::vector C4; + std::vector C5; + + ////////////////////////////////////////////////////////// + // Could do AA, VV, SS, PP, TT and form linear combinations later. + // Almost 2x. but for many modes, the above loop dominates. + ////////////////////////////////////////////////////////// + double t_contr= -usecond(); + for(int t0=0;t0oSites();ss++){ + + auto VV0= WWVV[t0]._odata[ss]; + auto VV1= WWVV[t1]._odata[ss]; + + // Tr Tr Wick contraction + auto VX = Gamma(Gamma::Algebra::GammaX); + auto VY = Gamma(Gamma::Algebra::GammaY); + auto VZ = Gamma(Gamma::Algebra::GammaZ); + auto VT = Gamma(Gamma::Algebra::GammaT); + + auto AX = Gamma(Gamma::Algebra::GammaXGamma5); + auto AY = Gamma(Gamma::Algebra::GammaYGamma5); + auto AZ = Gamma(Gamma::Algebra::GammaZGamma5); + auto AT = Gamma(Gamma::Algebra::GammaTGamma5); + + auto S = Gamma(Gamma::Algebra::Identity); + auto P = Gamma(Gamma::Algebra::Gamma5); + + auto T0 = Gamma(Gamma::Algebra::SigmaXY); + auto T1 = Gamma(Gamma::Algebra::SigmaXZ); + auto T2 = Gamma(Gamma::Algebra::SigmaXT); + auto T3 = Gamma(Gamma::Algebra::SigmaYZ); + auto T4 = Gamma(Gamma::Algebra::SigmaYT); + auto T5 = Gamma(Gamma::Algebra::SigmaZT); + + O1_trtr._odata[ss] = trace(VX*VV0) * trace(VX*VV1) + + trace(VY*VV0) * trace(VY*VV1) + + trace(VZ*VV0) * trace(VZ*VV1) + + trace(VT*VV0) * trace(VT*VV1) + + trace(AX*VV0) * trace(AX*VV1) + + trace(AY*VV0) * trace(AY*VV1) + + trace(AZ*VV0) * trace(AZ*VV1) + + trace(AT*VV0) * trace(AT*VV1); + + O2_trtr._odata[ss] = trace(VX*VV0) * trace(VX*VV1) + + trace(VY*VV0) * trace(VY*VV1) + + trace(VZ*VV0) * trace(VZ*VV1) + + trace(VT*VV0) * trace(VT*VV1) + - trace(AX*VV0) * trace(AX*VV1) + - trace(AY*VV0) * trace(AY*VV1) + - trace(AZ*VV0) * trace(AZ*VV1) + - trace(AT*VV0) * trace(AT*VV1); + + O3_trtr._odata[ss] = trace(S*VV0) * trace(S*VV1) + + trace(P*VV0) * trace(P*VV1); + + O4_trtr._odata[ss] = trace(S*VV0) * trace(S*VV1) + - trace(P*VV0) * trace(P*VV1); + + O5_trtr._odata[ss] = trace(T0*VV0) * trace(T0*VV1) + + trace(T1*VV0) * trace(T1*VV1) + + trace(T2*VV0) * trace(T2*VV1) + + trace(T3*VV0) * trace(T3*VV1) + + trace(T4*VV0) * trace(T4*VV1) + + trace(T5*VV0) * trace(T5*VV1); + + + //////////////////////////////////// + // Fig8 Wick contraction + //////////////////////////////////// + // was (UKhadron) Wick2 = trace( Qs0 * Gamma(G5) * anti_d0 * G * Qs1 * Gamma(G5) * anti_d1 * G ); + // + // = trace( [ Vs WW_sd'[t0] Vd'^dag ] G [ Vs' WW_s'd[t1] Vd^dag ] G ) + // = trace( WWVV * G * WWVV * G ) + // + // Can do VV and AA seperately and then add/sub later cheaper + + O1_fig8._odata[ss] = trace (VV0 * VX * VV1 * VX) + + trace (VV0 * VY * VV1 * VY) + + trace (VV0 * VZ * VV1 * VZ) + + trace (VV0 * VT * VV1 * VT) + + trace (VV0 * AX * VV1 * AX) + + trace (VV0 * AY * VV1 * AY) + + trace (VV0 * AZ * VV1 * AZ) + + trace (VV0 * AT * VV1 * AT); + + O2_fig8._odata[ss] = trace (VV0 * VX * VV1 * VX) + + trace (VV0 * VY * VV1 * VY) + + trace (VV0 * VZ * VV1 * VZ) + + trace (VV0 * VT * VV1 * VT) + - trace (VV0 * AX * VV1 * AX) + - trace (VV0 * AY * VV1 * AY) + - trace (VV0 * AZ * VV1 * AZ) + - trace (VV0 * AT * VV1 * AT); + + O3_fig8._odata[ss] = trace (VV0 * S * VV1 * S) + + trace (VV0 * P * VV1 * P); + + O4_fig8._odata[ss] = trace (VV0 * S * VV1 * S) + - trace (VV0 * P * VV1 * P); + + O5_fig8._odata[ss] = trace (VV0 * T0 * VV1 * T0) + + trace (VV0 * T1 * VV1 * T1) + + trace (VV0 * T2 * VV1 * T2) + + trace (VV0 * T3 * VV1 * T3) + + trace (VV0 * T4 * VV1 * T4) + + trace (VV0 * T5 * VV1 * T5); + + + // Hack force PP correlator + // D0._odata[ss] = trace(P*VV0); // These match signed off + // D1._odata[ss] = trace(P*VV1); + D0._odata[ss] = trace(AT*VV0); + D1._odata[ss] = trace(AT*VV1); + + } + + sliceSum(O1_trtr,C1, orthogdim); + sliceSum(O2_trtr,C2, orthogdim); + sliceSum(O3_trtr,C3, orthogdim); + sliceSum(O4_trtr,C4, orthogdim); + sliceSum(O5_trtr,C5, orthogdim); + + for(int t=0;t +void TA2APionField::execute(void) +{ + LOG(Message) << "Computing A2A Pion fields" << std::endl; + + auto &a2a_i = envGet(A2ABase, par().A2A_i + "_class"); + auto &a2a_j = envGet(A2ABase, par().A2A_j + "_class"); + + /////////////////////////////////////////////// + // Square assumption for now Nl = Nr = N + /////////////////////////////////////////////// + int nt = env().getDim(Tp); + int nx = env().getDim(Xp); + int ny = env().getDim(Yp); + int nz = env().getDim(Zp); + + // int N_i = a2a_i.par().N; + // int N_j = a2a_j.par().N; + int N_i = a2a_i.getN(); + int N_j = a2a_j.getN(); + + int nmom=par().Nmom; + + int schurBlock = par().schurBlock; + int cacheBlock = par().cacheBlock; + + + /////////////////////////////////////////////// + // Momentum setup + /////////////////////////////////////////////// + GridBase *grid = env().getGrid(1); + std::vector phases(nmom,grid); + for(int m=0;m pionFieldWVmom_ij (nmom,nt,N_i,N_j); + Eigen::Tensor pionFieldWV_ij (nt,N_i,N_j); + + Eigen::Tensor pionFieldWVmom_ji (nmom,nt,N_j,N_i); + Eigen::Tensor pionFieldWV_ji (nt,N_j,N_i); + + + LOG(Message) << "Rank for A2A PionField is " << N_i << " x "<, wi); + envGetTmp(std::vector, vi); + + envGetTmp(std::vector, wj); + envGetTmp(std::vector, vj); + envGetTmp(FermionField, tmp_5d); + + LOG(Message) << "Finding v and w vectors " << std::endl; + + ////////////////////////////////////////////////////////////////////////// + // i,j is first loop over SchurBlock factors reusing 5D matrices + // ii,jj is second loop over cacheBlock factors for high perf contractoin + // iii,jjj are loops within cacheBlock + // Total index is sum of these i+ii+iii etc... + ////////////////////////////////////////////////////////////////////////// + + double flops = 0.0; + double bytes = 0.0; + double vol = nx*ny*nz*nt; + double vol3 = nx*ny*nz; + double t_schur=0; + double t_contr_vwm=0; + double t_contr_vw=0; + double t_contr_ww=0; + double t_contr_vv=0; + + double tt0 = usecond(); + for(int i=0;i pionFieldWVmomB_ij(nmom,nt,N_iii,N_jjj); + Eigen::Tensor pionFieldWVmomB_ji(nmom,nt,N_jjj,N_iii); + + Eigen::Tensor pionFieldWVB_ij(nt,N_iii,N_jjj); + Eigen::Tensor pionFieldWVB_ji(nt,N_jjj,N_iii); + + t_contr_vwm-=usecond(); + PionFieldWVmom(pionFieldWVmomB_ij, &wi[ii], &vj[jj], phases,Tp); + PionFieldWVmom(pionFieldWVmomB_ji, &wj[jj], &vi[ii], phases,Tp); + t_contr_vwm+=usecond(); + + t_contr_vw-=usecond(); + PionFieldWV(pionFieldWVB_ij, &wi[ii], &vj[jj],Tp); + PionFieldWV(pionFieldWVB_ji, &wj[jj], &vi[ii],Tp); + t_contr_vw+=usecond(); + + + flops += vol * ( 2 * 8.0 + 6.0 + 8.0*nmom) * N_iii*N_jjj; + + bytes += vol * (12.0 * sizeof(Complex) ) * N_iii*N_jjj + + vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj; + + /////////////////////////////////////////////////////////////// + // Copy back to full meson field tensor + /////////////////////////////////////////////////////////////// + parallel_for_nest2(int iii=0;iii< N_iii;iii++) { + for(int jjj=0;jjj< N_jjj;jjj++) { + + for(int m =0;m< nmom;m++) { + for(int t =0;t< nt;t++) { + pionFieldWVmom_ij(m,t,i+ii+iii,j+jj+jjj) = pionFieldWVmomB_ij(m,t,iii,jjj); + pionFieldWVmom_ji(m,t,j+jj+jjj,i+ii+iii) = pionFieldWVmomB_ji(m,t,jjj,iii); + }} + + for(int t =0;t< nt;t++) { + pionFieldWV_ij(t,i+ii+iii,j+jj+jjj) = pionFieldWVB_ij(t,iii,jjj); + pionFieldWV_ji(t,j+jj+jjj,i+ii+iii) = pionFieldWVB_ji(t,jjj,iii); + } + + + }} + }} + }} + + double nodes=grid->NodeCount(); + double tt1 = usecond(); + LOG(Message) << " Contraction of PionFields took "<<(tt1-tt0)/1.0e6<< " seconds " << std::endl; + LOG(Message) << " Schur "<<(t_schur)/1.0e6<< " seconds " << std::endl; + LOG(Message) << " Contr WVmom "<<(t_contr_vwm)/1.0e6<< " seconds " << std::endl; + LOG(Message) << " Contr WV "<<(t_contr_vw)/1.0e6<< " seconds " << std::endl; + + double t_kernel = t_contr_vwm; + LOG(Message) << " Arith "< + ///////////////////////////////////////////////////////////////////////// + std::vector corrMom(nt,ComplexD(0.0)); + + for(int i=0;i + ///////////////////////////////////////////////////////////////////////// + std::vector corr(nt,ComplexD(0.0)); + + for(int i=0;i corr_wwvv(nt,ComplexD(0.0)); + + wi.resize(N_i,grid); + vi.resize(N_i,grid); + wj.resize(N_j,grid); + vj.resize(N_j,grid); + + for(int i =0;i < N_i;i++) a2a_i.return_v(i, tmp_5d, vi[i]); + for(int i =0;i < N_i;i++) a2a_i.return_w(i, tmp_5d, wi[i]); + for(int j =0;j < N_j;j++) a2a_j.return_v(j, tmp_5d, vj[j]); + for(int j =0;j < N_j;j++) a2a_j.return_w(j, tmp_5d, wj[j]); + + Eigen::Tensor pionFieldWW_ij (nt,N_i,N_j); + Eigen::Tensor pionFieldVV_ji (nt,N_j,N_i); + Eigen::Tensor pionFieldWW_ji (nt,N_j,N_i); + Eigen::Tensor pionFieldVV_ij (nt,N_i,N_j); + + PionFieldWW(pionFieldWW_ij, &wi[0], &wj[0],Tp); + PionFieldVV(pionFieldVV_ji, &vj[0], &vi[0],Tp); + PionFieldWW(pionFieldWW_ji, &wj[0], &wi[0],Tp); + PionFieldVV(pionFieldVV_ij, &vi[0], &vj[0],Tp); + + + for(int i=0;i corr_z2(nt,ComplexD(0.0)); + Eigen::Tensor pionFieldWW (nt,N_i,N_i); + Eigen::Tensor pionFieldVV (nt,N_i,N_i); + + + PionFieldWW(pionFieldWW, &wi[0], &wi[0],Tp); + PionFieldVV(pionFieldVV, &vi[0], &vi[0],Tp); + for(int i=0;i DeltaF2_fig8 (nt,16); + Eigen::Tensor DeltaF2_trtr (nt,16); + Eigen::Tensor denom0 (nt); + Eigen::Tensor denom1 (nt); + + const int dT=16; + + DeltaFeq2_alt (dT,dT,DeltaF2_fig8,DeltaF2_trtr, + denom0,denom1, + pionFieldWW_ij,&vi[0],&vj[0],Tp); + for(int t=0;t(Qd0, vi[idx0], s, c); + FermToProp(Qd1, vi[idx1], s, c); + FermToProp(Qs0, vj[idx0], s, c); + FermToProp(Qs1, vj[idx1], s, c); + } + } + + std::vector gammas ( { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + Gamma::Algebra::GammaXGamma5, + Gamma::Algebra::GammaYGamma5, + Gamma::Algebra::GammaZGamma5, + Gamma::Algebra::GammaTGamma5, + Gamma::Algebra::Identity, + Gamma::Algebra::Gamma5, + Gamma::Algebra::SigmaXY, + Gamma::Algebra::SigmaXZ, + Gamma::Algebra::SigmaXT, + Gamma::Algebra::SigmaYZ, + Gamma::Algebra::SigmaYT, + Gamma::Algebra::SigmaZT + }); + + auto G5 = Gamma::Algebra::Gamma5; + LatticePropagator anti_d0 = adj( Gamma(G5) * Qd0 * Gamma(G5)); + LatticePropagator anti_d1 = adj( Gamma(G5) * Qd1 * Gamma(G5)); + LatticeComplex TR1(grid); + LatticeComplex TR2(grid); + LatticeComplex Wick1(grid); + LatticeComplex Wick2(grid); + + LatticePropagator PR1(grid); + LatticePropagator PR2(grid); + PR1 = Qs0 * Gamma(G5) * anti_d0; + PR2 = Qs1 * Gamma(G5) * anti_d1; + + for(int g=0;g C1; + std::vector C2; + std::vector C3; + sliceSum(Wick1,C1, Tp); + sliceSum(Wick2,C2, Tp); + sliceSum(TR1 ,C3, Tp); + + if(g<5){ + for(int t=0;t["<