1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-27 22:25:56 +01:00

compiles on GPU. 3pt still wrong!!!!

This commit is contained in:
Felix Erben 2020-12-17 17:04:08 +00:00
parent 808f1e0e8c
commit f36d6f3923
2 changed files with 544 additions and 449 deletions

View File

@ -1047,7 +1047,7 @@ A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
{ {
GridBase *grid = vs[0].Grid(); GridBase *grid = vs[0].Grid();
int nd = grid->_ndimension; //int nd = grid->_ndimension;
int Nsimd = grid->Nsimd(); int Nsimd = grid->Nsimd();
int N_t = WW_sd.dimensions()[0]; int N_t = WW_sd.dimensions()[0];
int N_s = WW_sd.dimensions()[1]; int N_s = WW_sd.dimensions()[1];

View File

@ -44,13 +44,13 @@ public:
typedef typename ComplexField::vector_object vobj; typedef typename ComplexField::vector_object vobj;
typedef Lattice<iSpinMatrix<typename FImpl::Simd>> SpinMatrixField; typedef Lattice<iSpinMatrix<typename FImpl::Simd>> SpinMatrixField;
typedef typename SpinMatrixField::vector_object sobj; //typedef typename SpinMatrixField::vector_object sobj;
static const int epsilon[6][3] ; //static const int epsilon[6][3] ;
static const Real epsilon_sgn[6]; //static const Real epsilon_sgn[6];
private: private:
template <class mobj, class robj> template <class mobj, class robj> accelerator_inline
static void BaryonSite(const mobj &D1, static void BaryonSite(const mobj &D1,
const mobj &D2, const mobj &D2,
const mobj &D3, const mobj &D3,
@ -61,7 +61,7 @@ public:
const int parity, const int parity,
const bool * wick_contractions, const bool * wick_contractions,
robj &result); robj &result);
template <class mobj, class robj> template <class mobj, class robj> accelerator_inline
static void BaryonSiteMatrix(const mobj &D1, static void BaryonSiteMatrix(const mobj &D1,
const mobj &D2, const mobj &D2,
const mobj &D3, const mobj &D3,
@ -118,11 +118,11 @@ public:
const int nt, const int nt,
robj &result); robj &result);
private: private:
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
static void BaryonGamma3ptGroup1Site( static void BaryonGamma3ptGroup1Site(
const mobj &Dq1_ti, const mobj &Dq1_ti,
const mobj2 &Dq2_spec, const mobj2 &Dq2_spec,
const mobj2 &Dq3_spec, // const mobj2 &Dq3_spec,
const mobj &Dq4_tf, const mobj &Dq4_tf,
const Gamma GammaJ, const Gamma GammaJ,
const Gamma GammaBi, const Gamma GammaBi,
@ -130,11 +130,11 @@ public:
int wick_contraction, int wick_contraction,
robj &result); robj &result);
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
static void BaryonGamma3ptGroup2Site( static void BaryonGamma3ptGroup2Site(
const mobj2 &Dq1_spec, const mobj2 &Dq1_spec,
const mobj &Dq2_ti, const mobj &Dq2_ti,
const mobj2 &Dq3_spec, //const mobj2 &Dq3_spec,
const mobj &Dq4_tf, const mobj &Dq4_tf,
const Gamma GammaJ, const Gamma GammaJ,
const Gamma GammaBi, const Gamma GammaBi,
@ -142,10 +142,10 @@ public:
int wick_contraction, int wick_contraction,
robj &result); robj &result);
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
static void BaryonGamma3ptGroup3Site( static void BaryonGamma3ptGroup3Site(
const mobj2 &Dq1_spec, const mobj2 &Dq1_spec,
const mobj2 &Dq2_spec, //const mobj2 &Dq2_spec,
const mobj &Dq3_ti, const mobj &Dq3_ti,
const mobj &Dq4_tf, const mobj &Dq4_tf,
const Gamma GammaJ, const Gamma GammaJ,
@ -167,7 +167,7 @@ public:
const Gamma GammaBf, const Gamma GammaBf,
SpinMatrixField &stn_corr); SpinMatrixField &stn_corr);
private: private:
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ1EyeSite(const mobj &Dq_loop, static void SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec, const mobj2 &Du_spec,
const mobj &Dd_tf, const mobj &Dd_tf,
@ -176,7 +176,7 @@ public:
const Gamma GammaB_sigma, const Gamma GammaB_sigma,
const Gamma GammaB_nucl, const Gamma GammaB_nucl,
robj &result); robj &result);
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti, static void SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf, const mobj &Du_tf,
const mobj2 &Du_spec, const mobj2 &Du_spec,
@ -188,7 +188,7 @@ public:
robj &result); robj &result);
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ2EyeSite(const mobj &Dq_loop, static void SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec, const mobj2 &Du_spec,
const mobj &Dd_tf, const mobj &Dd_tf,
@ -197,7 +197,7 @@ public:
const Gamma GammaB_sigma, const Gamma GammaB_sigma,
const Gamma GammaB_nucl, const Gamma GammaB_nucl,
robj &result); robj &result);
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti, static void SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf, const mobj &Du_tf,
const mobj2 &Du_spec, const mobj2 &Du_spec,
@ -230,23 +230,15 @@ public:
const std::string op, const std::string op,
SpinMatrixField &stn_corr); SpinMatrixField &stn_corr);
}; };
/*
template <class FImpl> template <class FImpl>
const int BaryonUtils<FImpl>::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; const int BaryonUtils<FImpl>::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}};
/*template <class FImpl>
const Complex BaryonUtils<FImpl>::epsilon_sgn[6] = {Complex(1),
Complex(1),
Complex(1),
Complex(-1),
Complex(-1),
Complex(-1)};
*/
template <class FImpl> template <class FImpl>
const Real BaryonUtils<FImpl>::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.}; const Real BaryonUtils<FImpl>::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.};
*/
//This is the old version //This is the old version
template <class FImpl> template <class FImpl>
template <class mobj, class robj> template <class mobj, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonSite(const mobj &D1, void BaryonUtils<FImpl>::BaryonSite(const mobj &D1,
const mobj &D2, const mobj &D2,
const mobj &D3, const mobj &D3,
@ -274,16 +266,20 @@ void BaryonUtils<FImpl>::BaryonSite(const mobj &D1,
auto GBf_D3 = GammaB_f * D3; auto GBf_D3 = GammaB_f * D3;
auto GAf_D3 = GammaA_f * D3; auto GAf_D3 = GammaA_f * D3;
for (int ie_f=0; ie_f < 6 ; ie_f++){ Real ee;
int a_f = epsilon[ie_f][0]; //a
int b_f = epsilon[ie_f][1]; //b
int c_f = epsilon[ie_f][2]; //c
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = epsilon[ie_i][0]; //a'
int b_i = epsilon[ie_i][1]; //b'
int c_i = epsilon[ie_i][2]; //c'
Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; for (int ie_f=0; ie_f < 6 ; ie_f++){
int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
//This is the \delta_{456}^{123} part //This is the \delta_{456}^{123} part
if (wick_contraction[0]){ if (wick_contraction[0]){
for (int rho=0; rho<Ns; rho++){ for (int rho=0; rho<Ns; rho++){
@ -361,7 +357,7 @@ void BaryonUtils<FImpl>::BaryonSite(const mobj &D1,
//New version without parity projection or trace //New version without parity projection or trace
template <class FImpl> template <class FImpl>
template <class mobj, class robj> template <class mobj, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonSiteMatrix(const mobj &D1, void BaryonUtils<FImpl>::BaryonSiteMatrix(const mobj &D1,
const mobj &D2, const mobj &D2,
const mobj &D3, const mobj &D3,
@ -384,16 +380,21 @@ void BaryonUtils<FImpl>::BaryonSiteMatrix(const mobj &D1,
auto GBf_D3 = GammaB_f * D3; auto GBf_D3 = GammaB_f * D3;
auto GAf_D3 = GammaA_f * D3; auto GAf_D3 = GammaA_f * D3;
for (int ie_f=0; ie_f < 6 ; ie_f++){
int a_f = epsilon[ie_f][0]; //a
int b_f = epsilon[ie_f][1]; //b
int c_f = epsilon[ie_f][2]; //c
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = epsilon[ie_i][0]; //a'
int b_i = epsilon[ie_i][1]; //b'
int c_i = epsilon[ie_i][2]; //c'
Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){
int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
//This is the \delta_{456}^{123} part //This is the \delta_{456}^{123} part
if (wick_contraction[0]){ if (wick_contraction[0]){
for (int rho_i=0; rho_i<Ns; rho_i++){ for (int rho_i=0; rho_i<Ns; rho_i++){
@ -519,10 +520,10 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
GridBase *grid = q1_left.Grid(); GridBase *grid = q1_left.Grid();
autoView(vbaryon_corr, baryon_corr,CpuWrite); autoView(vbaryon_corr , baryon_corr , AcceleratorWrite);
autoView( v1 , q1_left, CpuRead); autoView( v1 , q1_left , AcceleratorRead);
autoView( v2 , q2_left, CpuRead); autoView( v2 , q2_left , AcceleratorRead);
autoView( v3 , q3_left, CpuRead); autoView( v3 , q3_left , AcceleratorRead);
Real bytes =0.; Real bytes =0.;
bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real)); bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
@ -538,12 +539,13 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
t =-usecond(); t =-usecond();
accelerator_for(ss, grid->oSites(), grid->Nsimd(), { accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto D1 = v1[ss]; auto D1 = v1(ss);
auto D2 = v2[ss]; auto D2 = v2(ss);
auto D3 = v3[ss]; auto D3 = v3(ss);
vobj result=Zero(); typedef decltype(coalescedRead(vbaryon_corr[0])) cVec;
cVec result=Zero();
BaryonSite(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result); BaryonSite(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result);
vbaryon_corr[ss] = result; coalescedWrite(vbaryon_corr[ss],result);
} );//end loop over lattice sites } );//end loop over lattice sites
t += usecond(); t += usecond();
@ -568,37 +570,20 @@ void BaryonUtils<FImpl>::ContractBaryonsMatrix(const PropagatorField &q1_left,
GridBase *grid = q1_left.Grid(); GridBase *grid = q1_left.Grid();
autoView(vbaryon_corr, baryon_corr,CpuWrite); autoView(vbaryon_corr , baryon_corr , AcceleratorWrite);
autoView( v1 , q1_left, CpuRead); autoView( v1 , q1_left , AcceleratorRead);
autoView( v2 , q2_left, CpuRead); autoView( v2 , q2_left , AcceleratorRead);
autoView( v3 , q3_left, CpuRead); autoView( v3 , q3_left , AcceleratorRead);
// Real bytes =0.;
// bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
// for (int ie=0; ie < 6 ; ie++){
// if(ie==0 or ie==3){
// bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contractions[ie];
// }
// else{
// bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contractions[ie];
// }
// }
// Real t=0.;
// t =-usecond();
accelerator_for(ss, grid->oSites(), grid->Nsimd(), { accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto D1 = v1[ss]; auto D1 = v1(ss);
auto D2 = v2[ss]; auto D2 = v2(ss);
auto D3 = v3[ss]; auto D3 = v3(ss);
sobj result=Zero(); typedef decltype(coalescedRead(vbaryon_corr[0])) spinor;
spinor result=Zero();
BaryonSiteMatrix(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,wick_contractions,result); BaryonSiteMatrix(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,wick_contractions,result);
vbaryon_corr[ss] = result; coalescedWrite(vbaryon_corr[ss],result);
} );//end loop over lattice sites } );//end loop over lattice sites
// t += usecond();
// std::cout << GridLogDebug << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl;
} }
/* The array wick_contractions must be of length 6. The order * /* The array wick_contractions must be of length 6. The order *
@ -664,11 +649,11 @@ void BaryonUtils<FImpl>::ContractBaryonsSlicedMatrix(const mobj &D1,
* Dq3_spec is a quark line from t_i to t_f * Dq3_spec is a quark line from t_i to t_f
* Dq4_tf is a quark line from t_f to t_J */ * Dq4_tf is a quark line from t_f to t_J */
template<class FImpl> template<class FImpl>
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonGamma3ptGroup1Site( void BaryonUtils<FImpl>::BaryonGamma3ptGroup1Site(
const mobj &Dq1_ti, const mobj &Dq1_ti,
const mobj2 &Dq2_spec, const mobj2 &Dq2_spec,
const mobj2 &Dq3_spec, // const mobj2 &Dq3_spec,
const mobj &Dq4_tf, const mobj &Dq4_tf,
const Gamma GammaJ, const Gamma GammaJ,
const Gamma GammaBi, const Gamma GammaBi,
@ -678,41 +663,47 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup1Site(
{ {
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; // auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti;
auto adjD4 = g5 * adj(Dq4_tf) * g5 ;
auto adjD4_g_D1 = adjD4 * GammaJ * Dq1_ti;
auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1;
auto D2_Gi = Dq2_spec * GammaBi; auto D2_Gi = Dq2_spec * GammaBi;
auto Gf_D2_Gi = GammaBf * D2_Gi; auto Gf_D2_Gi = GammaBf * D2_Gi;
auto Gf_D3 = GammaBf * Dq3_spec;
int a_f, b_f, c_f; // auto Gf_D3 = GammaBf * Dq3_spec; // including a second mobj2 parameter leads to compilation error
int a_i, b_i, c_i; auto Gf_D3 = GammaBf * Dq2_spec; //WRONG!!!!!
Real ee; Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){ for (int ie_f=0; ie_f < 6 ; ie_f++){
a_f = epsilon[ie_f][0]; //a int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
b_f = epsilon[ie_f][1]; //b int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
c_f = epsilon[ie_f][2]; //c int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){ for (int ie_i=0; ie_i < 6 ; ie_i++){
a_i = epsilon[ie_i][0]; //a' int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
b_i = epsilon[ie_i][1]; //b' int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
c_i = epsilon[ie_i][2]; //c' int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_f=0; alpha_f<Ns; alpha_f++){ for (int alpha_f=0; alpha_f<Ns; alpha_f++){
for (int beta_i=0; beta_i<Ns; beta_i++){ for (int beta_i=0; beta_i<Ns; beta_i++){
auto D2_Gi_ab_aa = D2_Gi ()(alpha_f,beta_i)(a_f,a_i); auto D2_Gi_ab_aa = D2_Gi ()(alpha_f,beta_i)(a_f,a_i);
auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i); auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i);
auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(alpha_f,beta_i)(b_f,a_i); auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(alpha_f,beta_i)(b_f,a_i);
auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i); //auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
auto Dq3_spec_ab_ab = Dq2_spec ()(alpha_f,beta_i)(a_f,b_i); // WRONG!!!!
for (int gamma_i=0; gamma_i<Ns; gamma_i++){ for (int gamma_i=0; gamma_i<Ns; gamma_i++){
auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(alpha_f,gamma_i)(a_f,c_i); auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(alpha_f,gamma_i)(a_f,c_i);
auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(alpha_f,gamma_i)(b_f,c_i); auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(alpha_f,gamma_i)(b_f,c_i);
for (int gamma_f=0; gamma_f<Ns; gamma_f++){ for (int gamma_f=0; gamma_f<Ns; gamma_f++){
auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(gamma_f,gamma_i)(c_f,c_i); auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(gamma_f,gamma_i)(c_f,c_i);
auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i); //auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
auto Dq3_spec_gb_cb = Dq2_spec ()(gamma_f,beta_i)(c_f,b_i); //WRONG!!!!
auto D2_Gi_gb_ca = D2_Gi ()(gamma_f,beta_i)(c_f,a_i); auto D2_Gi_gb_ca = D2_Gi ()(gamma_f,beta_i)(c_f,a_i);
@ -756,11 +747,11 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup1Site(
* Dq3_spec is a quark line from t_i to t_f * Dq3_spec is a quark line from t_i to t_f
* Dq4_tf is a quark line from t_f to t_J */ * Dq4_tf is a quark line from t_f to t_J */
template<class FImpl> template<class FImpl>
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonGamma3ptGroup2Site( void BaryonUtils<FImpl>::BaryonGamma3ptGroup2Site(
const mobj2 &Dq1_spec, const mobj2 &Dq1_spec,
const mobj &Dq2_ti, const mobj &Dq2_ti,
const mobj2 &Dq3_spec, // const mobj2 &Dq3_spec,
const mobj &Dq4_tf, const mobj &Dq4_tf,
const Gamma GammaJ, const Gamma GammaJ,
const Gamma GammaBi, const Gamma GammaBi,
@ -773,37 +764,40 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup2Site(
auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi;
auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi;
auto Gf_D1 = GammaBf * Dq1_spec; auto Gf_D1 = GammaBf * Dq1_spec;
auto Gf_D3 = GammaBf * Dq3_spec; //auto Gf_D3 = GammaBf * Dq3_spec;
auto Gf_D3 = GammaBf * Dq1_spec; // WRONG!!!!!
int a_f, b_f, c_f;
int a_i, b_i, c_i;
Real ee; Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){ for (int ie_f=0; ie_f < 6 ; ie_f++){
a_f = epsilon[ie_f][0]; //a int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
b_f = epsilon[ie_f][1]; //b int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
c_f = epsilon[ie_f][2]; //c int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){ for (int ie_i=0; ie_i < 6 ; ie_i++){
a_i = epsilon[ie_i][0]; //a' int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
b_i = epsilon[ie_i][1]; //b' int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
c_i = epsilon[ie_i][2]; //c' int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_f=0; alpha_f<Ns; alpha_f++){ for (int alpha_f=0; alpha_f<Ns; alpha_f++){
for (int beta_i=0; beta_i<Ns; beta_i++){ for (int beta_i=0; beta_i<Ns; beta_i++){
auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(alpha_f,beta_i)(a_f,a_i); auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(alpha_f,beta_i)(a_f,a_i);
auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i); auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i);
auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(alpha_f,beta_i)(b_f,a_i); auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(alpha_f,beta_i)(b_f,a_i);
auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i); //auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
auto Dq3_spec_ab_ab = Dq1_spec ()(alpha_f,beta_i)(a_f,b_i); //WRONG!!!
for (int gamma_i=0; gamma_i<Ns; gamma_i++){ for (int gamma_i=0; gamma_i<Ns; gamma_i++){
auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(alpha_f,gamma_i)(a_f,c_i); auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(alpha_f,gamma_i)(a_f,c_i);
auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(alpha_f,gamma_i)(b_f,c_i); auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(alpha_f,gamma_i)(b_f,c_i);
for (int gamma_f=0; gamma_f<Ns; gamma_f++){ for (int gamma_f=0; gamma_f<Ns; gamma_f++){
auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(gamma_f,gamma_i)(c_f,c_i); auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(gamma_f,gamma_i)(c_f,c_i);
auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i); //auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
auto Dq3_spec_gb_cb = Dq1_spec ()(gamma_f,beta_i)(c_f,b_i); //WRONG!!!
auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(gamma_f,beta_i)(c_f,a_i); auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(gamma_f,beta_i)(c_f,a_i);
if(wick_contraction == 1) { // Do contraction II1 if(wick_contraction == 1) { // Do contraction II1
@ -846,10 +840,10 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup2Site(
* Dq3_ti is a quark line from t_i to t_J * Dq3_ti is a quark line from t_i to t_J
* Dq4_tf is a quark line from t_f to t_J */ * Dq4_tf is a quark line from t_f to t_J */
template<class FImpl> template<class FImpl>
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonGamma3ptGroup3Site( void BaryonUtils<FImpl>::BaryonGamma3ptGroup3Site(
const mobj2 &Dq1_spec, const mobj2 &Dq1_spec,
const mobj2 &Dq2_spec, // const mobj2 &Dq2_spec,
const mobj &Dq3_ti, const mobj &Dq3_ti,
const mobj &Dq4_tf, const mobj &Dq4_tf,
const Gamma GammaJ, const Gamma GammaJ,
@ -863,24 +857,25 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup3Site(
auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti;
auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3;
auto Gf_D1 = GammaBf * Dq1_spec; auto Gf_D1 = GammaBf * Dq1_spec;
auto D2_Gi = Dq2_spec * GammaBi; //auto D2_Gi = Dq2_spec * GammaBi;
auto D2_Gi = Dq1_spec * GammaBi; //WRONG!!!!!!!!!!!!!!!!!
auto Gf_D2_Gi = GammaBf * D2_Gi; auto Gf_D2_Gi = GammaBf * D2_Gi;
int a_f, b_f, c_f;
int a_i, b_i, c_i;
Real ee; Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){ for (int ie_f=0; ie_f < 6 ; ie_f++){
a_f = epsilon[ie_f][0]; //a int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
b_f = epsilon[ie_f][1]; //b int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
c_f = epsilon[ie_f][2]; //c int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){ for (int ie_i=0; ie_i < 6 ; ie_i++){
a_i = epsilon[ie_i][0]; //a' int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
b_i = epsilon[ie_i][1]; //b' int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
c_i = epsilon[ie_i][2]; //c' int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_f=0; alpha_f<Ns; alpha_f++){ for (int alpha_f=0; alpha_f<Ns; alpha_f++){
for (int beta_i=0; beta_i<Ns; beta_i++){ for (int beta_i=0; beta_i<Ns; beta_i++){
@ -951,38 +946,85 @@ void BaryonUtils<FImpl>::BaryonGamma3pt(
const Gamma GammaBf, const Gamma GammaBf,
SpinMatrixField &stn_corr) SpinMatrixField &stn_corr)
{ {
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
GridBase *grid = q_tf.Grid(); GridBase *grid = q_tf.Grid();
autoView( vcorr, stn_corr, CpuWrite); // autoView( vcorr, stn_corr, CpuWrite);
autoView( vq_ti , q_ti, CpuRead); // autoView( vq_ti , q_ti, CpuRead);
autoView( vq_tf , q_tf, CpuRead); // autoView( vq_tf , q_tf, CpuRead);
// if (group == 1) {
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
// auto Dq_ti = vq_ti[ss];
// auto Dq_tf = vq_tf[ss];
// sobj result=Zero();
// BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
// vcorr[ss] += result;
// });//end loop over lattice sites
// } else if (group == 2) {
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
// auto Dq_ti = vq_ti[ss];
// auto Dq_tf = vq_tf[ss];
// sobj result=Zero();
// BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
// vcorr[ss] += result;
// });//end loop over lattice sites
// } else if (group == 3) {
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
// auto Dq_ti = vq_ti[ss];
// auto Dq_tf = vq_tf[ss];
// sobj result=Zero();
// BaryonGamma3ptGroup3Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
// vcorr[ss] += result;
// });//end loop over lattice sites
// }
autoView( vcorr , stn_corr , AcceleratorWrite);
autoView( vq_ti , q_ti , AcceleratorRead);
autoView( vq_tf , q_tf , AcceleratorRead);
if (group == 1) { if (group == 1) {
accelerator_for(ss, grid->oSites(), grid->Nsimd(), { accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_ti = vq_ti[ss]; auto Dq_ti = vq_ti(ss);
auto Dq_tf = vq_tf[ss]; auto Dq_tf = vq_tf(ss);
sobj result=Zero(); //sobj result=Zero();
BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); typedef decltype(coalescedRead(vcorr[0])) spinor;
vcorr[ss] += result; spinor result=Zero();
//BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); //WRONG
// vcorr[ss] += result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites });//end loop over lattice sites
} else if (group == 2) { } else if (group == 2) {
accelerator_for(ss, grid->oSites(), grid->Nsimd(), { accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_ti = vq_ti[ss]; auto Dq_ti = vq_ti(ss);
auto Dq_tf = vq_tf[ss]; auto Dq_tf = vq_tf(ss);
sobj result=Zero(); //sobj result=Zero();
BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); typedef decltype(coalescedRead(vcorr[0])) spinor;
vcorr[ss] += result; spinor result=Zero();
// BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); //WRONG
// vcorr[ss] += result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites });//end loop over lattice sites
} else if (group == 3) { } else if (group == 3) {
accelerator_for(ss, grid->oSites(), grid->Nsimd(), { accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_ti = vq_ti[ss]; auto Dq_ti = vq_ti(ss);
auto Dq_tf = vq_tf[ss]; auto Dq_tf = vq_tf(ss);
sobj result=Zero(); //sobj result=Zero();
BaryonGamma3ptGroup3Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); typedef decltype(coalescedRead(vcorr[0])) spinor;
spinor result=Zero();
vcorr[ss] += result; //BaryonGamma3ptGroup3Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
BaryonGamma3ptGroup3Site(Dq_spec1,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); //WRONG
// vcorr[ss] += result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites });//end loop over lattice sites
} }
} }
@ -997,7 +1039,7 @@ void BaryonUtils<FImpl>::BaryonGamma3pt(
* Dd_tf is a quark line from t_f to t_H * Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */ * Ds_ti is a quark line from t_i to t_H */
template <class FImpl> template <class FImpl>
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ1EyeSite(const mobj &Dq_loop, void BaryonUtils<FImpl>::SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec, const mobj2 &Du_spec,
const mobj &Dd_tf, const mobj &Dd_tf,
@ -1010,36 +1052,47 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl; //auto Gn_adjDd_GH_Ds = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
// Gamma^B * Ds * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5) auto adjDd_GH_Ds = g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
auto GDsGDd = GammaB_sigma * Ds_ti * Gamma_H * g5 * adj(Dd_tf) * g5; auto Gn_adjDd_GH_Ds = GammaB_nucl * adjDd_GH_Ds;
// Dq_loop * \gamma_\mu^L auto Du_Gs = Du_spec * GammaB_sigma;
auto DqG = Dq_loop * Gamma_H; auto Dq_GH = Dq_loop * Gamma_H;
auto Tr_Dq_GH = trace(Dq_GH)()()();
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){ for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){ for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a' int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b' int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c' int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){ int eSgn_s = (ie_s < 3 ? 1 : -1);
for (int beta_n=0; beta_n<Ns; beta_n++){
auto GDsGDd_ab_bb = GDsGDd()(alpha_s,beta_n)(b_s,b_n); ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int tau2=0; tau2<Ns; tau2++){
for (int j=0; j<Nc; j++){ for (int alpha_n=0; alpha_n<Ns; alpha_n++){
auto DqG_tt_jj = DqG()(tau2,tau2)(j,j); for (int beta_s=0; beta_s<Ns; beta_s++){
auto ee_GDGDDG = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DqG_tt_jj;
auto Gn_adjDd_GH_Ds_ab_bb = Gn_adjDd_GH_Ds ()(alpha_n, beta_s)(b_n,b_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){ for (int gamma_s=0; gamma_s<Ns; gamma_s++){
for (int gamma_n=0; gamma_n<Ns; gamma_n++){ for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() += ee_GDGDDG * DuG()(alpha_s, beta_n)(a_s,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n); result()(gamma_n,gamma_s)() += ee * Gn_adjDd_GH_Ds_ab_bb
result()(gamma_s,gamma_n)() -= ee_GDGDDG * DuG()(gamma_s, beta_n)(c_s,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n); * Du_spec ()(gamma_n,gamma_s)(c_n,c_s)
* Du_Gs ()(alpha_n, beta_s)(a_n,a_s)
* Tr_Dq_GH;
result()(gamma_n,gamma_s)() -= ee * Gn_adjDd_GH_Ds_ab_bb
* Du_spec ()(alpha_n,gamma_s)(a_n,c_s)
* Du_Gs ()(gamma_n, beta_s)(c_n,a_s)
* Tr_Dq_GH;
}} }}
}} }}
}} }}
}
}
} }
/* Du_ti is a quark line from t_i to t_H /* Du_ti is a quark line from t_i to t_H
@ -1048,7 +1101,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
* Dd_tf is a quark line from t_f to t_H * Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */ * Ds_ti is a quark line from t_i to t_H */
template <class FImpl> template <class FImpl>
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti, void BaryonUtils<FImpl>::SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf, const mobj &Du_tf,
const mobj2 &Du_spec, const mobj2 &Du_spec,
@ -1062,43 +1115,54 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl; auto Du_Gs = Du_spec * GammaB_sigma;
auto adjDu = g5 * adj(Du_tf) * g5; //auto Gn_adjDd_GH_Ds = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
auto adjDuG = adjDu * GammaB_nucl; auto adjDd_GH_Ds = g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
// Gamma^B * Ds * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5) auto Gn_adjDd_GH_Ds = GammaB_nucl * adjDd_GH_Ds;
auto GDsGDd = GammaB_sigma * Ds_ti * Gamma_H * g5 * adj(Dd_tf) * g5; auto adjDu_GH_Du = g5 * adj(Du_tf) * g5 * Gamma_H * Du_ti;
// Dq_loop * \gamma_\mu^L auto adjDu_GH_Du_Gs = adjDu_GH_Du * GammaB_sigma;
auto DuGH = Du_ti * Gamma_H;
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){ for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){ for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a' int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b' int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c' int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){ int eSgn_s = (ie_s < 3 ? 1 : -1);
for (int beta_n=0; beta_n<Ns; beta_n++){
auto GDsGDd_ab_bb = GDsGDd()(alpha_s,beta_n)(b_s,b_n); ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int tau2=0; tau2<Ns; tau2++){
for (int j=0; j<Nc; j++){ for (int alpha_n=0; alpha_n<Ns; alpha_n++){
auto DuGH_at_aj = DuGH()(alpha_s,tau2)(a_s,j); for (int beta_s=0; beta_s<Ns; beta_s++){
auto ee_GDGDDG_a = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DuGH_at_aj;
auto Gn_adjDd_GH_Ds_ab_bb = Gn_adjDd_GH_Ds ()(alpha_n, beta_s)(b_n,b_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){ for (int gamma_s=0; gamma_s<Ns; gamma_s++){
auto DuGH_gt_cj = DuGH()(gamma_s,tau2)(c_s,j);
auto ee_GDGDDG_c = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DuGH_gt_cj;
for (int gamma_n=0; gamma_n<Ns; gamma_n++){ for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() += ee_GDGDDG_a * DuG()(gamma_s, beta_n)(c_s,a_n) * adjDu()(tau2,gamma_n)(j,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDDG_c * adjDuG()(tau2, beta_n)(j,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n); result()(gamma_n,gamma_s)() += ee * Gn_adjDd_GH_Ds_ab_bb
result()(gamma_s,gamma_n)() -= ee_GDGDDG_a * adjDuG()(tau2, beta_n)(j,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n); * adjDu_GH_Du ()(alpha_n,gamma_s)(a_n,c_s)
result()(gamma_s,gamma_n)() -= ee_GDGDDG_c * DuG()(alpha_s, beta_n)(a_s,a_n) * adjDu()(tau2,gamma_n)(j,c_n); * Du_Gs ()(gamma_n, beta_s)(c_n,a_s);
}
} result()(gamma_n,gamma_s)() += ee * Gn_adjDd_GH_Ds_ab_bb
* adjDu_GH_Du_Gs ()(gamma_n, beta_s)(c_n,a_s)
* Du_spec ()(alpha_n,gamma_s)(a_n,c_s);
result()(gamma_n,gamma_s)() -= ee * Gn_adjDd_GH_Ds_ab_bb
* adjDu_GH_Du_Gs ()(alpha_n, beta_s)(a_n,a_s)
* Du_spec ()(gamma_n,gamma_s)(c_n,c_s);
result()(gamma_n,gamma_s)() -= ee * Gn_adjDd_GH_Ds_ab_bb
* adjDu_GH_Du ()(gamma_n,gamma_s)(c_n,c_s)
* Du_Gs ()(alpha_n, beta_s)(a_n,a_s);
}}
}} }}
}} }}
}
}
} }
//Equivalent to "One-trace" //Equivalent to "One-trace"
@ -1107,7 +1171,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
* Dd_tf is a quark line from t_f to t_H * Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */ * Ds_ti is a quark line from t_i to t_H */
template <class FImpl> template <class FImpl>
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ2EyeSite(const mobj &Dq_loop, void BaryonUtils<FImpl>::SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec, const mobj2 &Du_spec,
const mobj &Dd_tf, const mobj &Dd_tf,
@ -1120,36 +1184,44 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl; //auto Gn_adjDd_GH_Duloop_GH_Ds = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Dq_loop * Gamma_H * Ds_ti;
// Gamma^B * Ds * \gamma_\mu^L auto adjDd_GH_Duloop_GH_Ds = g5 * adj(Dd_tf) * g5 * Gamma_H * Dq_loop * Gamma_H * Ds_ti;
auto GDsG = GammaB_sigma * Ds_ti * Gamma_H; auto Gn_adjDd_GH_Duloop_GH_Ds = GammaB_nucl * adjDd_GH_Duloop_GH_Ds;
// Dq_loop * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5) auto Du_Gs = Du_spec * GammaB_sigma;
auto DqGDd = Dq_loop * Gamma_H * g5 * adj(Dd_tf) * g5;
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){ for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){ for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a' int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b' int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c' int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){ int eSgn_s = (ie_s < 3 ? 1 : -1);
for (int tau=0; tau<Ns; tau++){
for (int i=0; i<Nc; i++){ ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
auto GDsG_at_bi = GDsG()(alpha_s,tau)(b_s,i);
for (int beta_n=0; beta_n<Ns; beta_n++){ for (int alpha_n=0; alpha_n<Ns; alpha_n++){
auto DqGDd_tb_ib = DqGDd()(tau,beta_n)(i,b_n); for (int beta_s=0; beta_s<Ns; beta_s++){
auto ee_GDGDGD = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DqGDd_tb_ib;
auto Gn_adjDd_GH_Duloop_GH_Ds_ab_bb = Gn_adjDd_GH_Duloop_GH_Ds ()(alpha_n,beta_s)(b_n,b_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){ for (int gamma_s=0; gamma_s<Ns; gamma_s++){
for (int gamma_n=0; gamma_n<Ns; gamma_n++){ for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() -= ee_GDGDGD * DuG()(alpha_s, beta_n)(a_s,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD * DuG()(gamma_s, beta_n)(c_s,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n); result()(gamma_n,gamma_s)() -= ee * Du_spec ()(gamma_n,gamma_s)(c_n,c_s)
* Du_Gs ()(alpha_n,beta_s)(a_n,a_s)
* Gn_adjDd_GH_Duloop_GH_Ds_ab_bb;
result()(gamma_n,gamma_s)() += ee * Du_Gs ()(alpha_n,gamma_s)(a_n,c_s)
* Du_spec ()(gamma_n,beta_s)(c_n,a_s)
* Gn_adjDd_GH_Duloop_GH_Ds_ab_bb;
}}
}}
}} }}
}
}}}
}
}
} }
/* Du_ti is a quark line from t_i to t_H /* Du_ti is a quark line from t_i to t_H
@ -1158,7 +1230,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
* Dd_tf is a quark line from t_f to t_H * Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */ * Ds_ti is a quark line from t_i to t_H */
template <class FImpl> template <class FImpl>
template <class mobj, class mobj2, class robj> template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti, void BaryonUtils<FImpl>::SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf, const mobj &Du_tf,
const mobj2 &Du_spec, const mobj2 &Du_spec,
@ -1172,43 +1244,58 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl; auto Du_Gs = Du_spec * GammaB_sigma;
auto adjDu = g5 * adj(Du_tf) * g5; auto adjDu_GH_Ds = g5 * adj(Du_tf) * g5 * Gamma_H * Ds_ti;
auto adjDuG = adjDu * GammaB_nucl; //auto Gn_adjDd_GH_Du = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Du_ti;
// Gamma^B * Ds * \gamma_\mu^L auto adjDd_GH_Du = g5 * adj(Dd_tf) * g5 * Gamma_H * Du_ti;
auto GDsG = GammaB_sigma * Ds_ti * Gamma_H; auto Gn_adjDd_GH_Du = GammaB_nucl * adjDd_GH_Du; // for some reason I needed to split this into two lines to avoid the compilation error 'error: identifier "Grid::Gamma::mul" is undefined in device code'
// Du * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto DuGDd = Du_ti * Gamma_H * g5 * adj(Dd_tf) * g5; auto Gn_adjDd_GH_Du_Gs = Gn_adjDd_GH_Du * GammaB_sigma;
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){ for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){ for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a' int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b' int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c' int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){ int eSgn_s = (ie_s < 3 ? 1 : -1);
for (int tau=0; tau<Ns; tau++){
for (int i=0; i<Nc; i++){ ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
auto GDsG_at_bi = GDsG()(alpha_s,tau)(b_s,i);
for (int beta_n=0; beta_n<Ns; beta_n++){ for (int alpha_n=0; alpha_n<Ns; alpha_n++){
auto DuGDd_ab_ab = DuGDd()(alpha_s,beta_n)(a_s,b_n); for (int beta_s=0; beta_s<Ns; beta_s++){
auto ee_GDGDGD_a = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DuGDd_ab_ab;
auto adjDu_GH_Ds_ab_ab = adjDu_GH_Ds()(alpha_n, beta_s)(a_n,b_s);
auto Gn_adjDd_GH_Du_Gs_ab_ba = Gn_adjDd_GH_Du_Gs()(alpha_n, beta_s)(b_n,a_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){ for (int gamma_s=0; gamma_s<Ns; gamma_s++){
auto DuGDd_gb_cb = DuGDd()(gamma_s,beta_n)(c_s,b_n); auto Gn_adjDd_GH_Du_ag_bc = Gn_adjDd_GH_Du()(alpha_n,gamma_s)(b_n,c_s);
auto ee_GDGDGD_c = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DuGDd_gb_cb;
for (int gamma_n=0; gamma_n<Ns; gamma_n++){ for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() -= ee_GDGDGD_a * DuG()(gamma_s, beta_n)(c_s,a_n) * adjDu()(tau,gamma_n)(i,c_n); auto adjDu_GH_Ds_gb_cb = adjDu_GH_Ds()(gamma_n, beta_s)(c_n,b_s);
result()(gamma_s,gamma_n)() -= ee_GDGDGD_c * adjDuG()(tau, beta_n)(i,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD_a * adjDuG()(tau, beta_n)(i,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n); result()(gamma_n,gamma_s)() += ee * adjDu_GH_Ds_ab_ab
result()(gamma_s,gamma_n)() += ee_GDGDGD_c * DuG()(alpha_s, beta_n)(a_s,a_n) * adjDu()(tau,gamma_n)(i,c_n); * Gn_adjDd_GH_Du_Gs_ab_ba
} * Du_spec()(gamma_n,gamma_s)(c_n,c_s);
}
} result()(gamma_n,gamma_s)() -= ee * adjDu_GH_Ds_gb_cb
}}} * Gn_adjDd_GH_Du_Gs_ab_ba
} * Du_spec()(alpha_n,gamma_s)(a_n,c_s);
}
result()(gamma_n,gamma_s)() += ee * adjDu_GH_Ds_gb_cb
* Gn_adjDd_GH_Du_ag_bc
* Du_Gs()(alpha_n, beta_s)(a_n,a_s);
result()(gamma_n,gamma_s)() -= ee * adjDu_GH_Ds_ab_ab
* Gn_adjDd_GH_Du_ag_bc
* Du_Gs()(gamma_n, beta_s)(c_n,a_s);
}}
}}
}}
} }
template<class FImpl> template<class FImpl>
@ -1229,25 +1316,29 @@ void BaryonUtils<FImpl>::SigmaToNucleonEye(const PropagatorField &qq_loop,
GridBase *grid = qs_ti.Grid(); GridBase *grid = qs_ti.Grid();
autoView( vcorr, stn_corr, CpuWrite); autoView( vcorr , stn_corr , AcceleratorWrite);
autoView( vq_loop , qq_loop, CpuRead); autoView( vq_loop , qq_loop , AcceleratorRead);
autoView( vd_tf , qd_tf, CpuRead); autoView( vd_tf , qd_tf , AcceleratorRead);
autoView( vs_ti , qs_ti, CpuRead); autoView( vs_ti , qs_ti , AcceleratorRead);
bool doQ1 = (op == "Q1");
bool doQ2 = (op == "Q2");
accelerator_for(ss, grid->oSites(), grid->Nsimd(), { accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_loop = vq_loop[ss]; auto Dq_loop = vq_loop(ss);
auto Dd_tf = vd_tf[ss]; auto Dd_tf = vd_tf(ss);
auto Ds_ti = vs_ti[ss]; auto Ds_ti = vs_ti(ss);
sobj result=Zero(); typedef decltype(coalescedRead(vcorr[0])) spinor;
if(op == "Q1"){ spinor result=Zero();
if(doQ1){
SigmaToNucleonQ1EyeSite(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result); SigmaToNucleonQ1EyeSite(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else if(op == "Q2"){ } else if(doQ2){
SigmaToNucleonQ2EyeSite(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result); SigmaToNucleonQ2EyeSite(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else { } else {
assert(0 && "Weak Operator not correctly specified"); assert(0 && "Weak Operator not correctly specified");
} }
vcorr[ss] = result; coalescedWrite(vcorr[ss],result);
} );//end loop over lattice sites });//end loop over lattice sites
} }
template<class FImpl> template<class FImpl>
@ -1269,27 +1360,31 @@ void BaryonUtils<FImpl>::SigmaToNucleonNonEye(const PropagatorField &qq_ti,
GridBase *grid = qs_ti.Grid(); GridBase *grid = qs_ti.Grid();
autoView( vcorr , stn_corr, CpuWrite); autoView( vcorr , stn_corr , AcceleratorWrite );
autoView( vq_ti , qq_ti, CpuRead); autoView( vq_ti , qq_ti , AcceleratorRead );
autoView( vq_tf , qq_tf, CpuRead); autoView( vq_tf , qq_tf , AcceleratorRead );
autoView( vd_tf , qd_tf, CpuRead); autoView( vd_tf , qd_tf , AcceleratorRead );
autoView( vs_ti , qs_ti, CpuRead); autoView( vs_ti , qs_ti , AcceleratorRead );
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
thread_for(ss,grid->oSites(),{ bool doQ1 = (op == "Q1");
auto Dq_ti = vq_ti[ss]; bool doQ2 = (op == "Q2");
auto Dq_tf = vq_tf[ss];
auto Dd_tf = vd_tf[ss]; accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Ds_ti = vs_ti[ss]; auto Dq_ti = vq_ti(ss);
sobj result=Zero(); auto Dq_tf = vq_tf(ss);
if(op == "Q1"){ auto Dd_tf = vd_tf(ss);
auto Ds_ti = vs_ti(ss);
typedef decltype(coalescedRead(vcorr[0])) spinor;
spinor result=Zero();
if(doQ1){
SigmaToNucleonQ1NonEyeSite(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result); SigmaToNucleonQ1NonEyeSite(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else if(op == "Q2"){ } else if(doQ2){
SigmaToNucleonQ2NonEyeSite(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result); SigmaToNucleonQ2NonEyeSite(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else { } else {
assert(0 && "Weak Operator not correctly specified"); assert(0 && "Weak Operator not correctly specified");
} }
vcorr[ss] = result; coalescedWrite(vcorr[ss],result);
} );//end loop over lattice sites });//end loop over lattice sites
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);