mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 03:54:33 +00:00 
			
		
		
		
	A2A Lepton-Meson Field contraction
This commit is contained in:
		| @@ -68,6 +68,16 @@ public: | ||||
|         const std::vector<ComplexField> &emB1, | ||||
|         int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr); | ||||
|  | ||||
|   template <typename TensorType> | ||||
|   static void LeptonField(TensorType &mat, | ||||
|         const FermionField *lhs_wi, | ||||
|         const FermionField *rhs_vj, | ||||
|         const std::vector<ComplexField> &lepton02, | ||||
|         const std::vector<ComplexField> &lepton03, | ||||
|         const std::vector<ComplexField> &lepton12, | ||||
|         const std::vector<ComplexField> &lepton13, | ||||
|         int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr); | ||||
|  | ||||
|   static void ContractWWVV(std::vector<PropagatorField> &WWVV, | ||||
| 			   const Eigen::Tensor<ComplexD,3> &WW_sd, | ||||
| 			   const FermionField *vs, | ||||
| @@ -809,6 +819,204 @@ void A2Autils<FImpl>::AslashField(TensorType &mat, | ||||
|     if (t_gsum) *t_gsum += usecond(); | ||||
| } | ||||
|  | ||||
| // "Lepton" field w_i(x)^dag * L_mu * g^L_mu * v_j(x) | ||||
| // | ||||
| // With V-A current: | ||||
| // | ||||
| // g^L_mu = g_mu * (1 - g5) | ||||
| // | ||||
| // then in spin space | ||||
| // | ||||
| //                 ( 0	0	L_02	L_03	) | ||||
| // L_mu * g^L_mu = ( 0	0	L_12	L_13 	) | ||||
| //                 ( 0	0       0       0	) | ||||
| //                 ( 0	0 	0       0	) | ||||
| // | ||||
| // | ||||
| // With | ||||
| // | ||||
| // L_02 =  2*L3 + 2*i*L2 | ||||
| // L_03 = -2*L1 + 2*i*L0 | ||||
| // L_12 =  2*L1 + 2*i*L0 | ||||
| // L_13 =  2*L3 - 2*i*L2 | ||||
| // | ||||
| // (where mu = (x,y,z,t) ) | ||||
| // | ||||
| template <class FImpl> | ||||
| template <typename TensorType> | ||||
| void A2Autils<FImpl>::LeptonField(TensorType &mat, | ||||
|           const FermionField *lhs_wi, | ||||
|           const FermionField *rhs_vj, | ||||
|           const std::vector<ComplexField> &lepton02, | ||||
|           const std::vector<ComplexField> &lepton03, | ||||
|           const std::vector<ComplexField> &lepton12, | ||||
|           const std::vector<ComplexField> &lepton13, | ||||
|           int orthogdim, double *t_kernel, double *t_gsum) | ||||
| { | ||||
|     typedef typename FermionField::vector_object vobj; | ||||
|     typedef typename vobj::scalar_object         sobj; | ||||
|     typedef typename vobj::scalar_type           scalar_type; | ||||
|     typedef typename vobj::vector_type           vector_type; | ||||
|  | ||||
|     typedef iSpinMatrix<vector_type> SpinMatrix_v; | ||||
|     typedef iSpinMatrix<scalar_type> SpinMatrix_s; | ||||
|     typedef iSinglet<vector_type>    Singlet_v; | ||||
|     typedef iSinglet<scalar_type>    Singlet_s; | ||||
|  | ||||
|     int Lblock = mat.dimension(3); | ||||
|     int Rblock = mat.dimension(4); | ||||
|  | ||||
|     GridBase *grid = lhs_wi[0]._grid; | ||||
|  | ||||
|     const int    Nd = grid->_ndimension; | ||||
|     const int Nsimd = grid->Nsimd(); | ||||
|  | ||||
|     int Nt  = grid->GlobalDimensions()[orthogdim]; | ||||
|  | ||||
|     int Nlep = lepton02.size(); | ||||
|     assert(lepton03.size() == Nlep); | ||||
|     assert(lepton12.size() == Nlep); | ||||
|     assert(lepton13.size() == Nlep); | ||||
|  | ||||
|     int fd=grid->_fdimensions[orthogdim]; | ||||
|     int ld=grid->_ldimensions[orthogdim]; | ||||
|     int rd=grid->_rdimensions[orthogdim]; | ||||
|  | ||||
|     // will locally sum vectors first | ||||
|     // sum across these down to scalars | ||||
|     // splitting the SIMD | ||||
|     int MFrvol = rd*Lblock*Rblock*Nlep; | ||||
|     int MFlvol = ld*Lblock*Rblock*Nlep; | ||||
|  | ||||
|     Vector<vector_type> lvSum(MFrvol); | ||||
|     parallel_for (int r = 0; r < MFrvol; r++) | ||||
|     { | ||||
|         lvSum[r] = zero; | ||||
|     } | ||||
|  | ||||
|     Vector<scalar_type> lsSum(MFlvol); | ||||
|     parallel_for (int r = 0; r < MFlvol; r++) | ||||
|     { | ||||
|         lsSum[r] = scalar_type(0.0); | ||||
|     } | ||||
|  | ||||
|     int e1=    grid->_slice_nblock[orthogdim]; | ||||
|     int e2=    grid->_slice_block [orthogdim]; | ||||
|     int stride=grid->_slice_stride[orthogdim]; | ||||
|  | ||||
|     // Nested parallelism would be ok | ||||
|     // Wasting cores here. Test case r | ||||
|     if (t_kernel) *t_kernel = -usecond(); | ||||
|     parallel_for(int r=0;r<rd;r++) | ||||
|     { | ||||
|         int so=r*grid->_ostride[orthogdim]; // base offset for start of plane | ||||
|  | ||||
|         for(int n=0;n<e1;n++) | ||||
|         for(int b=0;b<e2;b++) | ||||
|         { | ||||
|             int ss= so+n*stride+b; | ||||
|  | ||||
|             for(int i=0;i<Lblock;i++) | ||||
|             { | ||||
|                 auto left = conjugate(lhs_wi[i]._odata[ss]); | ||||
|  | ||||
|                 for(int j=0;j<Rblock;j++) | ||||
|                 { | ||||
|                     SpinMatrix_v vv; | ||||
|                     auto right = rhs_vj[j]._odata[ss]; | ||||
|  | ||||
|                     for(int s1=0;s1<Ns;s1++) | ||||
|                     for(int s2=0;s2<Ns;s2++) | ||||
|                     { | ||||
|                         vv()(s1,s2)() = left()(s2)(0) * right()(s1)(0) | ||||
|                                         + left()(s2)(1) * right()(s1)(1) | ||||
|                                         + left()(s2)(2) * right()(s1)(2); | ||||
|                     } | ||||
|  | ||||
|                     // After getting the sitewise product do the mom phase loop | ||||
|                     int base = Nlep*i+Nlep*Lblock*j+Nlep*Lblock*Rblock*r; | ||||
|  | ||||
|                     for ( int m=0;m<Nlep;m++) | ||||
|                     { | ||||
|                         int idx  = m+base; | ||||
|  | ||||
|                         auto lep02  = lepton02[m]._odata[ss]; | ||||
|                         auto lep12  = lepton12[m]._odata[ss]; | ||||
|                         auto lep03  = lepton03[m]._odata[ss]; | ||||
|                         auto lep13  = lepton13[m]._odata[ss]; | ||||
|  | ||||
| 			lvSum[idx] += vv()(2,0)()*lep02()()() + vv()(3,0)()*lep03()()() | ||||
| 					+ vv()(2,1)()*lep12()()() + vv()(3,1)()*lep13()()(); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     // Sum across simd lanes in the plane, breaking out orthog dir. | ||||
|     parallel_for(int rt=0;rt<rd;rt++) | ||||
|     { | ||||
|         std::vector<int> icoor(Nd); | ||||
|         std::vector<scalar_type> extracted(Nsimd); | ||||
|  | ||||
|         for(int i=0;i<Lblock;i++) | ||||
|         for(int j=0;j<Rblock;j++) | ||||
|         for(int m=0;m<Nlep;m++) | ||||
|         { | ||||
|  | ||||
|             int ij_rdx = m+Nlep*i+Nlep*Lblock*j+Nlep*Lblock*Rblock*rt; | ||||
|  | ||||
|             extract<vector_type,scalar_type>(lvSum[ij_rdx],extracted); | ||||
|             for(int idx=0;idx<Nsimd;idx++) | ||||
|             { | ||||
|                 grid->iCoorFromIindex(icoor,idx); | ||||
|  | ||||
|                 int ldx    = rt+icoor[orthogdim]*rd; | ||||
|                 int ij_ldx = m+Nlep*i+Nlep*Lblock*j+Nlep*Lblock*Rblock*ldx; | ||||
|  | ||||
|                 lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     if (t_kernel) *t_kernel += usecond(); | ||||
|  | ||||
|     // ld loop and local only?? | ||||
|     int pd = grid->_processors[orthogdim]; | ||||
|     int pc = grid->_processor_coor[orthogdim]; | ||||
|     parallel_for_nest2(int lt=0;lt<ld;lt++) | ||||
|     { | ||||
|         for(int pt=0;pt<pd;pt++) | ||||
|         { | ||||
|             int t = lt + pt*ld; | ||||
|             if (pt == pc) | ||||
|             { | ||||
|                 for(int i=0;i<Lblock;i++) | ||||
|                 for(int j=0;j<Rblock;j++) | ||||
|                 for(int m=0;m<Nlep;m++) | ||||
|                 { | ||||
|                     int ij_dx = m+Nlep*i + Nlep*Lblock * j + Nlep*Lblock * Rblock * lt; | ||||
|  | ||||
|                     mat(m,0,t,i,j) = lsSum[ij_dx]; | ||||
|                 } | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 const scalar_type zz(0.0); | ||||
|  | ||||
|                 for(int i=0;i<Lblock;i++) | ||||
|                 for(int j=0;j<Rblock;j++) | ||||
|                 for(int m=0;m<Nlep;m++) | ||||
|                 { | ||||
|                     mat(m,0,t,i,j) = zz; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     if (t_gsum) *t_gsum = -usecond(); | ||||
|     grid->GlobalSumVector(&mat(0,0,0,0,0),Nlep*Nt*Lblock*Rblock); | ||||
|     if (t_gsum) *t_gsum += usecond(); | ||||
| } | ||||
|  | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Schematic thoughts about more generalised four quark insertion | ||||
| // | ||||
|   | ||||
		Reference in New Issue
	
	Block a user