#ifndef Hadrons_MContraction_A2APionField_hpp_ #define Hadrons_MContraction_A2APionField_hpp_ #include #include #include #include #include BEGIN_HADRONS_NAMESPACE /****************************************************************************** * A2APionField * ******************************************************************************/ BEGIN_MODULE_NAMESPACE(MContraction) typedef std::pair GammaPair; class A2APionFieldPar : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(A2APionFieldPar, int, cacheBlock, int, schurBlock, int, Nmom, std::string, A2A_i, std::string, A2A_j, std::string, output); }; template class TA2APionField : public Module { public: FERM_TYPE_ALIASES(FImpl, ); SOLVER_TYPE_ALIASES(FImpl, ); typedef typename FImpl::SiteSpinor vobj; typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; typedef iSpinMatrix SpinMatrix_v; typedef iSpinMatrix SpinMatrix_s; typedef iSinglet Scalar_v; typedef iSinglet Scalar_s; typedef A2AModesSchurDiagTwo A2ABase; public: // constructor TA2APionField(const std::string name); // destructor virtual ~TA2APionField(void){}; // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); // setup virtual void setup(void); // execution virtual void execute(void); }; MODULE_REGISTER(A2APionField, ARG(TA2APionField), MContraction); MODULE_REGISTER(ZA2APionField, ARG(TA2APionField), MContraction); /****************************************************************************** * TA2APionField implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// template TA2APionField::TA2APionField(const std::string name) : Module(name) { } // dependencies/products /////////////////////////////////////////////////////// template std::vector TA2APionField::getInput(void) { std::vector in; in.push_back(par().A2A_i + "_class"); in.push_back(par().A2A_i + "_w_high_4d"); in.push_back(par().A2A_i + "_v_high_4d"); in.push_back(par().A2A_j + "_class"); in.push_back(par().A2A_j + "_w_high_4d"); in.push_back(par().A2A_j + "_v_high_4d"); return in; } template std::vector TA2APionField::getOutput(void) { std::vector out = {}; return out; } // setup /////////////////////////////////////////////////////////////////////// template void TA2APionField::setup(void) { // Four D fields envTmp(std::vector, "wi", 1, par().schurBlock, FermionField(env().getGrid(1))); envTmp(std::vector, "vi", 1, par().schurBlock, FermionField(env().getGrid(1))); envTmp(std::vector, "wj", 1, par().schurBlock, FermionField(env().getGrid(1))); envTmp(std::vector, "vj", 1, par().schurBlock, FermionField(env().getGrid(1))); // 5D tmp int Ls_i = env().getObjectLs(par().A2A_i + "_class"); envTmpLat(FermionField, "tmp_5d", Ls_i); int Ls_j= env().getObjectLs(par().A2A_j + "_class"); assert ( Ls_i == Ls_j ); } // execution /////////////////////////////////////////////////////////////////// template void TA2APionField::execute(void) { LOG(Message) << "Computing A2A Pion fields" << std::endl; auto &a2a_i = envGet(A2ABase, par().A2A_i + "_class"); auto &a2a_j = envGet(A2ABase, par().A2A_j + "_class"); /////////////////////////////////////////////// // Square assumption for now Nl = Nr = N /////////////////////////////////////////////// int nt = env().getDim(Tp); int nx = env().getDim(Xp); int ny = env().getDim(Yp); int nz = env().getDim(Zp); // int N_i = a2a_i.par().N; // int N_j = a2a_j.par().N; int N_i = a2a_i.getN(); int N_j = a2a_j.getN(); int nmom=par().Nmom; int schurBlock = par().schurBlock; int cacheBlock = par().cacheBlock; /////////////////////////////////////////////// // Momentum setup /////////////////////////////////////////////// GridBase *grid = env().getGrid(1); std::vector phases(nmom,grid); for(int m=0;m pionFieldWVmom_ij (nmom,nt,N_i,N_j); Eigen::Tensor pionFieldWV_ij (nt,N_i,N_j); Eigen::Tensor pionFieldWVmom_ji (nmom,nt,N_j,N_i); Eigen::Tensor pionFieldWV_ji (nt,N_j,N_i); LOG(Message) << "Rank for A2A PionField is " << N_i << " x "<, wi); envGetTmp(std::vector, vi); envGetTmp(std::vector, wj); envGetTmp(std::vector, vj); envGetTmp(FermionField, tmp_5d); LOG(Message) << "Finding v and w vectors " << std::endl; ////////////////////////////////////////////////////////////////////////// // i,j is first loop over SchurBlock factors reusing 5D matrices // ii,jj is second loop over cacheBlock factors for high perf contractoin // iii,jjj are loops within cacheBlock // Total index is sum of these i+ii+iii etc... ////////////////////////////////////////////////////////////////////////// double flops = 0.0; double bytes = 0.0; double vol = nx*ny*nz*nt; double vol3 = nx*ny*nz; double t_schur=0; double t_contr_vwm=0; double t_contr_vw=0; double t_contr_ww=0; double t_contr_vv=0; double tt0 = usecond(); for(int i=0;i pionFieldWVmomB_ij(nmom,nt,N_iii,N_jjj); Eigen::Tensor pionFieldWVmomB_ji(nmom,nt,N_jjj,N_iii); Eigen::Tensor pionFieldWVB_ij(nt,N_iii,N_jjj); Eigen::Tensor pionFieldWVB_ji(nt,N_jjj,N_iii); t_contr_vwm-=usecond(); A2Autils::PionFieldWVmom(pionFieldWVmomB_ij, &wi[ii], &vj[jj], phases,Tp); A2Autils::PionFieldWVmom(pionFieldWVmomB_ji, &wj[jj], &vi[ii], phases,Tp); t_contr_vwm+=usecond(); t_contr_vw-=usecond(); A2Autils::PionFieldWV(pionFieldWVB_ij, &wi[ii], &vj[jj],Tp); A2Autils::PionFieldWV(pionFieldWVB_ji, &wj[jj], &vi[ii],Tp); t_contr_vw+=usecond(); flops += vol * ( 2 * 8.0 + 6.0 + 8.0*nmom) * N_iii*N_jjj; bytes += vol * (12.0 * sizeof(Complex) ) * N_iii*N_jjj + vol * ( 2.0 * sizeof(Complex) *nmom ) * N_iii*N_jjj; /////////////////////////////////////////////////////////////// // Copy back to full meson field tensor /////////////////////////////////////////////////////////////// parallel_for_nest2(int iii=0;iii< N_iii;iii++) { for(int jjj=0;jjj< N_jjj;jjj++) { for(int m =0;m< nmom;m++) { for(int t =0;t< nt;t++) { pionFieldWVmom_ij(m,t,i+ii+iii,j+jj+jjj) = pionFieldWVmomB_ij(m,t,iii,jjj); pionFieldWVmom_ji(m,t,j+jj+jjj,i+ii+iii) = pionFieldWVmomB_ji(m,t,jjj,iii); }} for(int t =0;t< nt;t++) { pionFieldWV_ij(t,i+ii+iii,j+jj+jjj) = pionFieldWVB_ij(t,iii,jjj); pionFieldWV_ji(t,j+jj+jjj,i+ii+iii) = pionFieldWVB_ji(t,jjj,iii); } }} }} }} double nodes=grid->NodeCount(); double tt1 = usecond(); LOG(Message) << " Contraction of PionFields took "<<(tt1-tt0)/1.0e6<< " seconds " << std::endl; LOG(Message) << " Schur "<<(t_schur)/1.0e6<< " seconds " << std::endl; LOG(Message) << " Contr WVmom "<<(t_contr_vwm)/1.0e6<< " seconds " << std::endl; LOG(Message) << " Contr WV "<<(t_contr_vw)/1.0e6<< " seconds " << std::endl; double t_kernel = t_contr_vwm; LOG(Message) << " Arith "< ///////////////////////////////////////////////////////////////////////// std::vector corrMom(nt,ComplexD(0.0)); for(int i=0;i ///////////////////////////////////////////////////////////////////////// std::vector corr(nt,ComplexD(0.0)); for(int i=0;i corr_wwvv(nt,ComplexD(0.0)); wi.resize(N_i,grid); vi.resize(N_i,grid); wj.resize(N_j,grid); vj.resize(N_j,grid); for(int i =0;i < N_i;i++) a2a_i.return_v(i, tmp_5d, vi[i]); for(int i =0;i < N_i;i++) a2a_i.return_w(i, tmp_5d, wi[i]); for(int j =0;j < N_j;j++) a2a_j.return_v(j, tmp_5d, vj[j]); for(int j =0;j < N_j;j++) a2a_j.return_w(j, tmp_5d, wj[j]); Eigen::Tensor pionFieldWW_ij (nt,N_i,N_j); Eigen::Tensor pionFieldVV_ji (nt,N_j,N_i); Eigen::Tensor pionFieldWW_ji (nt,N_j,N_i); Eigen::Tensor pionFieldVV_ij (nt,N_i,N_j); A2Autils::PionFieldWW(pionFieldWW_ij, &wi[0], &wj[0],Tp); A2Autils::PionFieldVV(pionFieldVV_ji, &vj[0], &vi[0],Tp); A2Autils::PionFieldWW(pionFieldWW_ji, &wj[0], &wi[0],Tp); A2Autils::PionFieldVV(pionFieldVV_ij, &vi[0], &vj[0],Tp); for(int i=0;i corr_z2(nt,ComplexD(0.0)); Eigen::Tensor pionFieldWW (nt,N_i,N_i); Eigen::Tensor pionFieldVV (nt,N_i,N_i); A2Autils::PionFieldWW(pionFieldWW, &wi[0], &wi[0],Tp); A2Autils::PionFieldVV(pionFieldVV, &vi[0], &vi[0],Tp); for(int i=0;i DeltaF2_fig8 (nt,16); Eigen::Tensor DeltaF2_trtr (nt,16); Eigen::Tensor denom0 (nt); Eigen::Tensor denom1 (nt); const int dT=16; A2Autils::DeltaFeq2 (dT,dT,DeltaF2_fig8,DeltaF2_trtr, denom0,denom1, pionFieldWW_ij,&vi[0],&vj[0],Tp); { int g=0; // O_{VV+AA} for(int t=0;t(Qd0, vi[idx0], s, c); FermToProp(Qd1, vi[idx1], s, c); FermToProp(Qs0, vj[idx0], s, c); FermToProp(Qs1, vj[idx1], s, c); } } std::vector gammas ( { Gamma::Algebra::GammaX, Gamma::Algebra::GammaY, Gamma::Algebra::GammaZ, Gamma::Algebra::GammaT, Gamma::Algebra::GammaXGamma5, Gamma::Algebra::GammaYGamma5, Gamma::Algebra::GammaZGamma5, Gamma::Algebra::GammaTGamma5, Gamma::Algebra::Identity, Gamma::Algebra::Gamma5, Gamma::Algebra::SigmaXY, Gamma::Algebra::SigmaXZ, Gamma::Algebra::SigmaXT, Gamma::Algebra::SigmaYZ, Gamma::Algebra::SigmaYT, Gamma::Algebra::SigmaZT }); auto G5 = Gamma::Algebra::Gamma5; LatticePropagator anti_d0 = adj( Gamma(G5) * Qd0 * Gamma(G5)); LatticePropagator anti_d1 = adj( Gamma(G5) * Qd1 * Gamma(G5)); LatticeComplex TR1(grid); LatticeComplex TR2(grid); LatticeComplex Wick1(grid); LatticeComplex Wick2(grid); LatticePropagator PR1(grid); LatticePropagator PR2(grid); PR1 = Qs0 * Gamma(G5) * anti_d0; PR2 = Qs1 * Gamma(G5) * anti_d1; for(int g=0;g C1; std::vector C2; std::vector C3; sliceSum(Wick1,C1, Tp); sliceSum(Wick2,C2, Tp); sliceSum(TR1 ,C3, Tp); /* if(g<5){ for(int t=0;t["<