1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-10 14:10:46 +01:00

compiles on GPU. 3pt still wrong!!!!

This commit is contained in:
Felix Erben 2020-12-17 17:04:08 +00:00
parent 808f1e0e8c
commit f36d6f3923
2 changed files with 544 additions and 449 deletions

View File

@ -1047,7 +1047,7 @@ A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
{
GridBase *grid = vs[0].Grid();
int nd = grid->_ndimension;
//int nd = grid->_ndimension;
int Nsimd = grid->Nsimd();
int N_t = WW_sd.dimensions()[0];
int N_s = WW_sd.dimensions()[1];

View File

@ -44,13 +44,13 @@ public:
typedef typename ComplexField::vector_object vobj;
typedef Lattice<iSpinMatrix<typename FImpl::Simd>> SpinMatrixField;
typedef typename SpinMatrixField::vector_object sobj;
//typedef typename SpinMatrixField::vector_object sobj;
static const int epsilon[6][3] ;
static const Real epsilon_sgn[6];
//static const int epsilon[6][3] ;
//static const Real epsilon_sgn[6];
private:
template <class mobj, class robj>
template <class mobj, class robj> accelerator_inline
static void BaryonSite(const mobj &D1,
const mobj &D2,
const mobj &D3,
@ -61,7 +61,7 @@ public:
const int parity,
const bool * wick_contractions,
robj &result);
template <class mobj, class robj>
template <class mobj, class robj> accelerator_inline
static void BaryonSiteMatrix(const mobj &D1,
const mobj &D2,
const mobj &D3,
@ -118,11 +118,11 @@ public:
const int nt,
robj &result);
private:
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
static void BaryonGamma3ptGroup1Site(
const mobj &Dq1_ti,
const mobj2 &Dq2_spec,
const mobj2 &Dq3_spec,
// const mobj2 &Dq3_spec,
const mobj &Dq4_tf,
const Gamma GammaJ,
const Gamma GammaBi,
@ -130,11 +130,11 @@ public:
int wick_contraction,
robj &result);
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
static void BaryonGamma3ptGroup2Site(
const mobj2 &Dq1_spec,
const mobj &Dq2_ti,
const mobj2 &Dq3_spec,
//const mobj2 &Dq3_spec,
const mobj &Dq4_tf,
const Gamma GammaJ,
const Gamma GammaBi,
@ -142,10 +142,10 @@ public:
int wick_contraction,
robj &result);
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
static void BaryonGamma3ptGroup3Site(
const mobj2 &Dq1_spec,
const mobj2 &Dq2_spec,
//const mobj2 &Dq2_spec,
const mobj &Dq3_ti,
const mobj &Dq4_tf,
const Gamma GammaJ,
@ -167,7 +167,7 @@ public:
const Gamma GammaBf,
SpinMatrixField &stn_corr);
private:
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
@ -176,7 +176,7 @@ public:
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result);
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
@ -188,7 +188,7 @@ public:
robj &result);
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
@ -197,7 +197,7 @@ public:
const Gamma GammaB_sigma,
const Gamma GammaB_nucl,
robj &result);
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
static void SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
@ -230,23 +230,15 @@ public:
const std::string op,
SpinMatrixField &stn_corr);
};
/*
template <class FImpl>
const int BaryonUtils<FImpl>::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}};
/*template <class FImpl>
const Complex BaryonUtils<FImpl>::epsilon_sgn[6] = {Complex(1),
Complex(1),
Complex(1),
Complex(-1),
Complex(-1),
Complex(-1)};
*/
template <class FImpl>
const Real BaryonUtils<FImpl>::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.};
*/
//This is the old version
template <class FImpl>
template <class mobj, class robj>
template <class mobj, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonSite(const mobj &D1,
const mobj &D2,
const mobj &D3,
@ -274,16 +266,20 @@ void BaryonUtils<FImpl>::BaryonSite(const mobj &D1,
auto GBf_D3 = GammaB_f * D3;
auto GAf_D3 = GammaA_f * D3;
for (int ie_f=0; ie_f < 6 ; ie_f++){
int a_f = epsilon[ie_f][0]; //a
int b_f = epsilon[ie_f][1]; //b
int c_f = epsilon[ie_f][2]; //c
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = epsilon[ie_i][0]; //a'
int b_i = epsilon[ie_i][1]; //b'
int c_i = epsilon[ie_i][2]; //c'
Real ee;
Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
for (int ie_f=0; ie_f < 6 ; ie_f++){
int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
//This is the \delta_{456}^{123} part
if (wick_contraction[0]){
for (int rho=0; rho<Ns; rho++){
@ -361,7 +357,7 @@ void BaryonUtils<FImpl>::BaryonSite(const mobj &D1,
//New version without parity projection or trace
template <class FImpl>
template <class mobj, class robj>
template <class mobj, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonSiteMatrix(const mobj &D1,
const mobj &D2,
const mobj &D3,
@ -384,16 +380,21 @@ void BaryonUtils<FImpl>::BaryonSiteMatrix(const mobj &D1,
auto GBf_D3 = GammaB_f * D3;
auto GAf_D3 = GammaA_f * D3;
for (int ie_f=0; ie_f < 6 ; ie_f++){
int a_f = epsilon[ie_f][0]; //a
int b_f = epsilon[ie_f][1]; //b
int c_f = epsilon[ie_f][2]; //c
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = epsilon[ie_i][0]; //a'
int b_i = epsilon[ie_i][1]; //b'
int c_i = epsilon[ie_i][2]; //c'
Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){
int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){
int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
//This is the \delta_{456}^{123} part
if (wick_contraction[0]){
for (int rho_i=0; rho_i<Ns; rho_i++){
@ -519,10 +520,10 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
GridBase *grid = q1_left.Grid();
autoView(vbaryon_corr, baryon_corr,CpuWrite);
autoView( v1 , q1_left, CpuRead);
autoView( v2 , q2_left, CpuRead);
autoView( v3 , q3_left, CpuRead);
autoView(vbaryon_corr , baryon_corr , AcceleratorWrite);
autoView( v1 , q1_left , AcceleratorRead);
autoView( v2 , q2_left , AcceleratorRead);
autoView( v3 , q3_left , AcceleratorRead);
Real bytes =0.;
bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
@ -538,12 +539,13 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
t =-usecond();
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto D1 = v1[ss];
auto D2 = v2[ss];
auto D3 = v3[ss];
vobj result=Zero();
auto D1 = v1(ss);
auto D2 = v2(ss);
auto D3 = v3(ss);
typedef decltype(coalescedRead(vbaryon_corr[0])) cVec;
cVec result=Zero();
BaryonSite(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result);
vbaryon_corr[ss] = result;
coalescedWrite(vbaryon_corr[ss],result);
} );//end loop over lattice sites
t += usecond();
@ -568,37 +570,20 @@ void BaryonUtils<FImpl>::ContractBaryonsMatrix(const PropagatorField &q1_left,
GridBase *grid = q1_left.Grid();
autoView(vbaryon_corr, baryon_corr,CpuWrite);
autoView( v1 , q1_left, CpuRead);
autoView( v2 , q2_left, CpuRead);
autoView( v3 , q3_left, CpuRead);
// Real bytes =0.;
// bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
// for (int ie=0; ie < 6 ; ie++){
// if(ie==0 or ie==3){
// bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contractions[ie];
// }
// else{
// bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contractions[ie];
// }
// }
// Real t=0.;
// t =-usecond();
autoView(vbaryon_corr , baryon_corr , AcceleratorWrite);
autoView( v1 , q1_left , AcceleratorRead);
autoView( v2 , q2_left , AcceleratorRead);
autoView( v3 , q3_left , AcceleratorRead);
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto D1 = v1[ss];
auto D2 = v2[ss];
auto D3 = v3[ss];
sobj result=Zero();
auto D1 = v1(ss);
auto D2 = v2(ss);
auto D3 = v3(ss);
typedef decltype(coalescedRead(vbaryon_corr[0])) spinor;
spinor result=Zero();
BaryonSiteMatrix(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,wick_contractions,result);
vbaryon_corr[ss] = result;
coalescedWrite(vbaryon_corr[ss],result);
} );//end loop over lattice sites
// t += usecond();
// std::cout << GridLogDebug << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl;
}
/* The array wick_contractions must be of length 6. The order *
@ -664,11 +649,11 @@ void BaryonUtils<FImpl>::ContractBaryonsSlicedMatrix(const mobj &D1,
* Dq3_spec is a quark line from t_i to t_f
* Dq4_tf is a quark line from t_f to t_J */
template<class FImpl>
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonGamma3ptGroup1Site(
const mobj &Dq1_ti,
const mobj2 &Dq2_spec,
const mobj2 &Dq3_spec,
// const mobj2 &Dq3_spec,
const mobj &Dq4_tf,
const Gamma GammaJ,
const Gamma GammaBi,
@ -678,41 +663,47 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup1Site(
{
Gamma g5(Gamma::Algebra::Gamma5);
auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti;
// auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti;
auto adjD4 = g5 * adj(Dq4_tf) * g5 ;
auto adjD4_g_D1 = adjD4 * GammaJ * Dq1_ti;
auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1;
auto D2_Gi = Dq2_spec * GammaBi;
auto Gf_D2_Gi = GammaBf * D2_Gi;
auto Gf_D3 = GammaBf * Dq3_spec;
int a_f, b_f, c_f;
int a_i, b_i, c_i;
// auto Gf_D3 = GammaBf * Dq3_spec; // including a second mobj2 parameter leads to compilation error
auto Gf_D3 = GammaBf * Dq2_spec; //WRONG!!!!!
Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){
a_f = epsilon[ie_f][0]; //a
b_f = epsilon[ie_f][1]; //b
c_f = epsilon[ie_f][2]; //c
int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){
a_i = epsilon[ie_i][0]; //a'
b_i = epsilon[ie_i][1]; //b'
c_i = epsilon[ie_i][2]; //c'
int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
for (int beta_i=0; beta_i<Ns; beta_i++){
auto D2_Gi_ab_aa = D2_Gi ()(alpha_f,beta_i)(a_f,a_i);
auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i);
auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(alpha_f,beta_i)(b_f,a_i);
auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
//auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
auto Dq3_spec_ab_ab = Dq2_spec ()(alpha_f,beta_i)(a_f,b_i); // WRONG!!!!
for (int gamma_i=0; gamma_i<Ns; gamma_i++){
auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(alpha_f,gamma_i)(a_f,c_i);
auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(alpha_f,gamma_i)(b_f,c_i);
for (int gamma_f=0; gamma_f<Ns; gamma_f++){
auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(gamma_f,gamma_i)(c_f,c_i);
auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
//auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
auto Dq3_spec_gb_cb = Dq2_spec ()(gamma_f,beta_i)(c_f,b_i); //WRONG!!!!
auto D2_Gi_gb_ca = D2_Gi ()(gamma_f,beta_i)(c_f,a_i);
@ -756,11 +747,11 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup1Site(
* Dq3_spec is a quark line from t_i to t_f
* Dq4_tf is a quark line from t_f to t_J */
template<class FImpl>
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonGamma3ptGroup2Site(
const mobj2 &Dq1_spec,
const mobj &Dq2_ti,
const mobj2 &Dq3_spec,
// const mobj2 &Dq3_spec,
const mobj &Dq4_tf,
const Gamma GammaJ,
const Gamma GammaBi,
@ -773,37 +764,40 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup2Site(
auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi;
auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi;
auto Gf_D1 = GammaBf * Dq1_spec;
auto Gf_D3 = GammaBf * Dq3_spec;
//auto Gf_D3 = GammaBf * Dq3_spec;
auto Gf_D3 = GammaBf * Dq1_spec; // WRONG!!!!!
int a_f, b_f, c_f;
int a_i, b_i, c_i;
Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){
a_f = epsilon[ie_f][0]; //a
b_f = epsilon[ie_f][1]; //b
c_f = epsilon[ie_f][2]; //c
int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){
a_i = epsilon[ie_i][0]; //a'
b_i = epsilon[ie_i][1]; //b'
c_i = epsilon[ie_i][2]; //c'
int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
for (int beta_i=0; beta_i<Ns; beta_i++){
auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(alpha_f,beta_i)(a_f,a_i);
auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i);
auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(alpha_f,beta_i)(b_f,a_i);
auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
//auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
auto Dq3_spec_ab_ab = Dq1_spec ()(alpha_f,beta_i)(a_f,b_i); //WRONG!!!
for (int gamma_i=0; gamma_i<Ns; gamma_i++){
auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(alpha_f,gamma_i)(a_f,c_i);
auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(alpha_f,gamma_i)(b_f,c_i);
for (int gamma_f=0; gamma_f<Ns; gamma_f++){
auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(gamma_f,gamma_i)(c_f,c_i);
auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
//auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
auto Dq3_spec_gb_cb = Dq1_spec ()(gamma_f,beta_i)(c_f,b_i); //WRONG!!!
auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(gamma_f,beta_i)(c_f,a_i);
if(wick_contraction == 1) { // Do contraction II1
@ -846,10 +840,10 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup2Site(
* Dq3_ti is a quark line from t_i to t_J
* Dq4_tf is a quark line from t_f to t_J */
template<class FImpl>
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::BaryonGamma3ptGroup3Site(
const mobj2 &Dq1_spec,
const mobj2 &Dq2_spec,
// const mobj2 &Dq2_spec,
const mobj &Dq3_ti,
const mobj &Dq4_tf,
const Gamma GammaJ,
@ -863,24 +857,25 @@ void BaryonUtils<FImpl>::BaryonGamma3ptGroup3Site(
auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti;
auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3;
auto Gf_D1 = GammaBf * Dq1_spec;
auto D2_Gi = Dq2_spec * GammaBi;
//auto D2_Gi = Dq2_spec * GammaBi;
auto D2_Gi = Dq1_spec * GammaBi; //WRONG!!!!!!!!!!!!!!!!!
auto Gf_D2_Gi = GammaBf * D2_Gi;
int a_f, b_f, c_f;
int a_i, b_i, c_i;
Real ee;
for (int ie_f=0; ie_f < 6 ; ie_f++){
a_f = epsilon[ie_f][0]; //a
b_f = epsilon[ie_f][1]; //b
c_f = epsilon[ie_f][2]; //c
int a_f = (ie_f < 3 ? ie_f : (6-ie_f)%3 ); //epsilon[ie_n][0]; //a
int b_f = (ie_f < 3 ? (ie_f+1)%3 : (8-ie_f)%3 ); //epsilon[ie_n][1]; //b
int c_f = (ie_f < 3 ? (ie_f+2)%3 : (7-ie_f)%3 ); //epsilon[ie_n][2]; //c
int eSgn_f = (ie_f < 3 ? 1 : -1);
for (int ie_i=0; ie_i < 6 ; ie_i++){
a_i = epsilon[ie_i][0]; //a'
b_i = epsilon[ie_i][1]; //b'
c_i = epsilon[ie_i][2]; //c'
int a_i = (ie_i < 3 ? ie_i : (6-ie_i)%3 ); //epsilon[ie_s][0]; //a'
int b_i = (ie_i < 3 ? (ie_i+1)%3 : (8-ie_i)%3 ); //epsilon[ie_s][1]; //b'
int c_i = (ie_i < 3 ? (ie_i+2)%3 : (7-ie_i)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_i = (ie_i < 3 ? 1 : -1);
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
ee = Real(eSgn_f * eSgn_i); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
for (int beta_i=0; beta_i<Ns; beta_i++){
@ -951,38 +946,85 @@ void BaryonUtils<FImpl>::BaryonGamma3pt(
const Gamma GammaBf,
SpinMatrixField &stn_corr)
{
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
GridBase *grid = q_tf.Grid();
autoView( vcorr, stn_corr, CpuWrite);
autoView( vq_ti , q_ti, CpuRead);
autoView( vq_tf , q_tf, CpuRead);
// autoView( vcorr, stn_corr, CpuWrite);
// autoView( vq_ti , q_ti, CpuRead);
// autoView( vq_tf , q_tf, CpuRead);
// if (group == 1) {
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
// auto Dq_ti = vq_ti[ss];
// auto Dq_tf = vq_tf[ss];
// sobj result=Zero();
// BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
// vcorr[ss] += result;
// });//end loop over lattice sites
// } else if (group == 2) {
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
// auto Dq_ti = vq_ti[ss];
// auto Dq_tf = vq_tf[ss];
// sobj result=Zero();
// BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
// vcorr[ss] += result;
// });//end loop over lattice sites
// } else if (group == 3) {
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
// auto Dq_ti = vq_ti[ss];
// auto Dq_tf = vq_tf[ss];
// sobj result=Zero();
// BaryonGamma3ptGroup3Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
// vcorr[ss] += result;
// });//end loop over lattice sites
// }
autoView( vcorr , stn_corr , AcceleratorWrite);
autoView( vq_ti , q_ti , AcceleratorRead);
autoView( vq_tf , q_tf , AcceleratorRead);
if (group == 1) {
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_ti = vq_ti[ss];
auto Dq_tf = vq_tf[ss];
sobj result=Zero();
BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
vcorr[ss] += result;
auto Dq_ti = vq_ti(ss);
auto Dq_tf = vq_tf(ss);
//sobj result=Zero();
typedef decltype(coalescedRead(vcorr[0])) spinor;
spinor result=Zero();
//BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
BaryonGamma3ptGroup1Site(Dq_ti,Dq_spec1,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); //WRONG
// vcorr[ss] += result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites
} else if (group == 2) {
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_ti = vq_ti[ss];
auto Dq_tf = vq_tf[ss];
sobj result=Zero();
BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
vcorr[ss] += result;
auto Dq_ti = vq_ti(ss);
auto Dq_tf = vq_tf(ss);
//sobj result=Zero();
typedef decltype(coalescedRead(vcorr[0])) spinor;
spinor result=Zero();
// BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
BaryonGamma3ptGroup2Site(Dq_spec1,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); //WRONG
// vcorr[ss] += result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites
} else if (group == 3) {
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_ti = vq_ti[ss];
auto Dq_tf = vq_tf[ss];
sobj result=Zero();
BaryonGamma3ptGroup3Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
vcorr[ss] += result;
auto Dq_ti = vq_ti(ss);
auto Dq_tf = vq_tf(ss);
//sobj result=Zero();
typedef decltype(coalescedRead(vcorr[0])) spinor;
spinor result=Zero();
//BaryonGamma3ptGroup3Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
BaryonGamma3ptGroup3Site(Dq_spec1,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); //WRONG
// vcorr[ss] += result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites
}
}
@ -997,7 +1039,7 @@ void BaryonUtils<FImpl>::BaryonGamma3pt(
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
@ -1010,37 +1052,48 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto GDsGDd = GammaB_sigma * Ds_ti * Gamma_H * g5 * adj(Dd_tf) * g5;
// Dq_loop * \gamma_\mu^L
auto DqG = Dq_loop * Gamma_H;
//auto Gn_adjDd_GH_Ds = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
auto adjDd_GH_Ds = g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
auto Gn_adjDd_GH_Ds = GammaB_nucl * adjDd_GH_Ds;
auto Du_Gs = Du_spec * GammaB_sigma;
auto Dq_GH = Dq_loop * Gamma_H;
auto Tr_Dq_GH = trace(Dq_GH)()()();
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int beta_n=0; beta_n<Ns; beta_n++){
auto GDsGDd_ab_bb = GDsGDd()(alpha_s,beta_n)(b_s,b_n);
for (int tau2=0; tau2<Ns; tau2++){
for (int j=0; j<Nc; j++){
auto DqG_tt_jj = DqG()(tau2,tau2)(j,j);
auto ee_GDGDDG = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DqG_tt_jj;
int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_s = (ie_s < 3 ? 1 : -1);
ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_n=0; alpha_n<Ns; alpha_n++){
for (int beta_s=0; beta_s<Ns; beta_s++){
auto Gn_adjDd_GH_Ds_ab_bb = Gn_adjDd_GH_Ds ()(alpha_n, beta_s)(b_n,b_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() += ee_GDGDDG * DuG()(alpha_s, beta_n)(a_s,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDDG * DuG()(gamma_s, beta_n)(c_s,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
result()(gamma_n,gamma_s)() += ee * Gn_adjDd_GH_Ds_ab_bb
* Du_spec ()(gamma_n,gamma_s)(c_n,c_s)
* Du_Gs ()(alpha_n, beta_s)(a_n,a_s)
* Tr_Dq_GH;
result()(gamma_n,gamma_s)() -= ee * Gn_adjDd_GH_Ds_ab_bb
* Du_spec ()(alpha_n,gamma_s)(a_n,c_s)
* Du_Gs ()(gamma_n, beta_s)(c_n,a_s)
* Tr_Dq_GH;
}}
}}
}}
}
}
}
/* Du_ti is a quark line from t_i to t_H
* Du_tf is a quark line from t_f to t_H
@ -1048,7 +1101,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1EyeSite(const mobj &Dq_loop,
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
@ -1062,43 +1115,54 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
auto adjDu = g5 * adj(Du_tf) * g5;
auto adjDuG = adjDu * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto GDsGDd = GammaB_sigma * Ds_ti * Gamma_H * g5 * adj(Dd_tf) * g5;
// Dq_loop * \gamma_\mu^L
auto DuGH = Du_ti * Gamma_H;
auto Du_Gs = Du_spec * GammaB_sigma;
//auto Gn_adjDd_GH_Ds = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
auto adjDd_GH_Ds = g5 * adj(Dd_tf) * g5 * Gamma_H * Ds_ti;
auto Gn_adjDd_GH_Ds = GammaB_nucl * adjDd_GH_Ds;
auto adjDu_GH_Du = g5 * adj(Du_tf) * g5 * Gamma_H * Du_ti;
auto adjDu_GH_Du_Gs = adjDu_GH_Du * GammaB_sigma;
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int beta_n=0; beta_n<Ns; beta_n++){
auto GDsGDd_ab_bb = GDsGDd()(alpha_s,beta_n)(b_s,b_n);
for (int tau2=0; tau2<Ns; tau2++){
for (int j=0; j<Nc; j++){
auto DuGH_at_aj = DuGH()(alpha_s,tau2)(a_s,j);
auto ee_GDGDDG_a = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DuGH_at_aj;
int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_s = (ie_s < 3 ? 1 : -1);
ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_n=0; alpha_n<Ns; alpha_n++){
for (int beta_s=0; beta_s<Ns; beta_s++){
auto Gn_adjDd_GH_Ds_ab_bb = Gn_adjDd_GH_Ds ()(alpha_n, beta_s)(b_n,b_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
auto DuGH_gt_cj = DuGH()(gamma_s,tau2)(c_s,j);
auto ee_GDGDDG_c = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsGDd_ab_bb * DuGH_gt_cj;
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() += ee_GDGDDG_a * DuG()(gamma_s, beta_n)(c_s,a_n) * adjDu()(tau2,gamma_n)(j,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDDG_c * adjDuG()(tau2, beta_n)(j,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDDG_a * adjDuG()(tau2, beta_n)(j,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDDG_c * DuG()(alpha_s, beta_n)(a_s,a_n) * adjDu()(tau2,gamma_n)(j,c_n);
}
}
result()(gamma_n,gamma_s)() += ee * Gn_adjDd_GH_Ds_ab_bb
* adjDu_GH_Du ()(alpha_n,gamma_s)(a_n,c_s)
* Du_Gs ()(gamma_n, beta_s)(c_n,a_s);
result()(gamma_n,gamma_s)() += ee * Gn_adjDd_GH_Ds_ab_bb
* adjDu_GH_Du_Gs ()(gamma_n, beta_s)(c_n,a_s)
* Du_spec ()(alpha_n,gamma_s)(a_n,c_s);
result()(gamma_n,gamma_s)() -= ee * Gn_adjDd_GH_Ds_ab_bb
* adjDu_GH_Du_Gs ()(alpha_n, beta_s)(a_n,a_s)
* Du_spec ()(gamma_n,gamma_s)(c_n,c_s);
result()(gamma_n,gamma_s)() -= ee * Gn_adjDd_GH_Ds_ab_bb
* adjDu_GH_Du ()(gamma_n,gamma_s)(c_n,c_s)
* Du_Gs ()(alpha_n, beta_s)(a_n,a_s);
}}
}}
}}
}
}
}
//Equivalent to "One-trace"
@ -1107,7 +1171,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ1NonEyeSite(const mobj &Du_ti,
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
const mobj2 &Du_spec,
const mobj &Dd_tf,
@ -1120,36 +1184,44 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L
auto GDsG = GammaB_sigma * Ds_ti * Gamma_H;
// Dq_loop * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto DqGDd = Dq_loop * Gamma_H * g5 * adj(Dd_tf) * g5;
//auto Gn_adjDd_GH_Duloop_GH_Ds = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Dq_loop * Gamma_H * Ds_ti;
auto adjDd_GH_Duloop_GH_Ds = g5 * adj(Dd_tf) * g5 * Gamma_H * Dq_loop * Gamma_H * Ds_ti;
auto Gn_adjDd_GH_Duloop_GH_Ds = GammaB_nucl * adjDd_GH_Duloop_GH_Ds;
auto Du_Gs = Du_spec * GammaB_sigma;
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int tau=0; tau<Ns; tau++){
for (int i=0; i<Nc; i++){
auto GDsG_at_bi = GDsG()(alpha_s,tau)(b_s,i);
for (int beta_n=0; beta_n<Ns; beta_n++){
auto DqGDd_tb_ib = DqGDd()(tau,beta_n)(i,b_n);
auto ee_GDGDGD = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DqGDd_tb_ib;
int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_s = (ie_s < 3 ? 1 : -1);
ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_n=0; alpha_n<Ns; alpha_n++){
for (int beta_s=0; beta_s<Ns; beta_s++){
auto Gn_adjDd_GH_Duloop_GH_Ds_ab_bb = Gn_adjDd_GH_Duloop_GH_Ds ()(alpha_n,beta_s)(b_n,b_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() -= ee_GDGDGD * DuG()(alpha_s, beta_n)(a_s,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD * DuG()(gamma_s, beta_n)(c_s,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
result()(gamma_n,gamma_s)() -= ee * Du_spec ()(gamma_n,gamma_s)(c_n,c_s)
* Du_Gs ()(alpha_n,beta_s)(a_n,a_s)
* Gn_adjDd_GH_Duloop_GH_Ds_ab_bb;
result()(gamma_n,gamma_s)() += ee * Du_Gs ()(alpha_n,gamma_s)(a_n,c_s)
* Du_spec ()(gamma_n,beta_s)(c_n,a_s)
* Gn_adjDd_GH_Duloop_GH_Ds_ab_bb;
}}
}}
}}
}
}}}
}
}
}
/* Du_ti is a quark line from t_i to t_H
@ -1158,7 +1230,7 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2EyeSite(const mobj &Dq_loop,
* Dd_tf is a quark line from t_f to t_H
* Ds_ti is a quark line from t_i to t_H */
template <class FImpl>
template <class mobj, class mobj2, class robj>
template <class mobj, class mobj2, class robj> accelerator_inline
void BaryonUtils<FImpl>::SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
const mobj &Du_tf,
const mobj2 &Du_spec,
@ -1172,43 +1244,58 @@ void BaryonUtils<FImpl>::SigmaToNucleonQ2NonEyeSite(const mobj &Du_ti,
Gamma g5(Gamma::Algebra::Gamma5);
auto DuG = Du_spec * GammaB_nucl;
auto adjDu = g5 * adj(Du_tf) * g5;
auto adjDuG = adjDu * GammaB_nucl;
// Gamma^B * Ds * \gamma_\mu^L
auto GDsG = GammaB_sigma * Ds_ti * Gamma_H;
// Du * \gamma_\mu^L * (\gamma_5 * Dd^\dagger * \gamma_5)
auto DuGDd = Du_ti * Gamma_H * g5 * adj(Dd_tf) * g5;
auto Du_Gs = Du_spec * GammaB_sigma;
auto adjDu_GH_Ds = g5 * adj(Du_tf) * g5 * Gamma_H * Ds_ti;
//auto Gn_adjDd_GH_Du = GammaB_nucl * g5 * adj(Dd_tf) * g5 * Gamma_H * Du_ti;
auto adjDd_GH_Du = g5 * adj(Dd_tf) * g5 * Gamma_H * Du_ti;
auto Gn_adjDd_GH_Du = GammaB_nucl * adjDd_GH_Du; // for some reason I needed to split this into two lines to avoid the compilation error 'error: identifier "Grid::Gamma::mul" is undefined in device code'
auto Gn_adjDd_GH_Du_Gs = Gn_adjDd_GH_Du * GammaB_sigma;
Real ee;
for (int ie_n=0; ie_n < 6 ; ie_n++){
int a_n = epsilon[ie_n][0]; //a
int b_n = epsilon[ie_n][1]; //b
int c_n = epsilon[ie_n][2]; //c
int a_n = (ie_n < 3 ? ie_n : (6-ie_n)%3 ); //epsilon[ie_n][0]; //a
int b_n = (ie_n < 3 ? (ie_n+1)%3 : (8-ie_n)%3 ); //epsilon[ie_n][1]; //b
int c_n = (ie_n < 3 ? (ie_n+2)%3 : (7-ie_n)%3 ); //epsilon[ie_n][2]; //c
int eSgn_n = (ie_n < 3 ? 1 : -1);
for (int ie_s=0; ie_s < 6 ; ie_s++){
int a_s = epsilon[ie_s][0]; //a'
int b_s = epsilon[ie_s][1]; //b'
int c_s = epsilon[ie_s][2]; //c'
for (int alpha_s=0; alpha_s<Ns; alpha_s++){
for (int tau=0; tau<Ns; tau++){
for (int i=0; i<Nc; i++){
auto GDsG_at_bi = GDsG()(alpha_s,tau)(b_s,i);
for (int beta_n=0; beta_n<Ns; beta_n++){
auto DuGDd_ab_ab = DuGDd()(alpha_s,beta_n)(a_s,b_n);
auto ee_GDGDGD_a = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DuGDd_ab_ab;
int a_s = (ie_s < 3 ? ie_s : (6-ie_s)%3 ); //epsilon[ie_s][0]; //a'
int b_s = (ie_s < 3 ? (ie_s+1)%3 : (8-ie_s)%3 ); //epsilon[ie_s][1]; //b'
int c_s = (ie_s < 3 ? (ie_s+2)%3 : (7-ie_s)%3 ); //epsilon[ie_s][2]; //c'
int eSgn_s = (ie_s < 3 ? 1 : -1);
ee = Real(eSgn_n * eSgn_s); //epsilon_sgn[ie_n] * epsilon_sgn[ie_s];
for (int alpha_n=0; alpha_n<Ns; alpha_n++){
for (int beta_s=0; beta_s<Ns; beta_s++){
auto adjDu_GH_Ds_ab_ab = adjDu_GH_Ds()(alpha_n, beta_s)(a_n,b_s);
auto Gn_adjDd_GH_Du_Gs_ab_ba = Gn_adjDd_GH_Du_Gs()(alpha_n, beta_s)(b_n,a_s);
for (int gamma_s=0; gamma_s<Ns; gamma_s++){
auto DuGDd_gb_cb = DuGDd()(gamma_s,beta_n)(c_s,b_n);
auto ee_GDGDGD_c = epsilon_sgn[ie_n] * epsilon_sgn[ie_s] * GDsG_at_bi * DuGDd_gb_cb;
auto Gn_adjDd_GH_Du_ag_bc = Gn_adjDd_GH_Du()(alpha_n,gamma_s)(b_n,c_s);
for (int gamma_n=0; gamma_n<Ns; gamma_n++){
result()(gamma_s,gamma_n)() -= ee_GDGDGD_a * DuG()(gamma_s, beta_n)(c_s,a_n) * adjDu()(tau,gamma_n)(i,c_n);
result()(gamma_s,gamma_n)() -= ee_GDGDGD_c * adjDuG()(tau, beta_n)(i,a_n) * Du_spec()(alpha_s,gamma_n)(a_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD_a * adjDuG()(tau, beta_n)(i,a_n) * Du_spec()(gamma_s,gamma_n)(c_s,c_n);
result()(gamma_s,gamma_n)() += ee_GDGDGD_c * DuG()(alpha_s, beta_n)(a_s,a_n) * adjDu()(tau,gamma_n)(i,c_n);
}
}
}
}}}
}
}
auto adjDu_GH_Ds_gb_cb = adjDu_GH_Ds()(gamma_n, beta_s)(c_n,b_s);
result()(gamma_n,gamma_s)() += ee * adjDu_GH_Ds_ab_ab
* Gn_adjDd_GH_Du_Gs_ab_ba
* Du_spec()(gamma_n,gamma_s)(c_n,c_s);
result()(gamma_n,gamma_s)() -= ee * adjDu_GH_Ds_gb_cb
* Gn_adjDd_GH_Du_Gs_ab_ba
* Du_spec()(alpha_n,gamma_s)(a_n,c_s);
result()(gamma_n,gamma_s)() += ee * adjDu_GH_Ds_gb_cb
* Gn_adjDd_GH_Du_ag_bc
* Du_Gs()(alpha_n, beta_s)(a_n,a_s);
result()(gamma_n,gamma_s)() -= ee * adjDu_GH_Ds_ab_ab
* Gn_adjDd_GH_Du_ag_bc
* Du_Gs()(gamma_n, beta_s)(c_n,a_s);
}}
}}
}}
}
template<class FImpl>
@ -1229,24 +1316,28 @@ void BaryonUtils<FImpl>::SigmaToNucleonEye(const PropagatorField &qq_loop,
GridBase *grid = qs_ti.Grid();
autoView( vcorr, stn_corr, CpuWrite);
autoView( vq_loop , qq_loop, CpuRead);
autoView( vd_tf , qd_tf, CpuRead);
autoView( vs_ti , qs_ti, CpuRead);
autoView( vcorr , stn_corr , AcceleratorWrite);
autoView( vq_loop , qq_loop , AcceleratorRead);
autoView( vd_tf , qd_tf , AcceleratorRead);
autoView( vs_ti , qs_ti , AcceleratorRead);
bool doQ1 = (op == "Q1");
bool doQ2 = (op == "Q2");
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_loop = vq_loop[ss];
auto Dd_tf = vd_tf[ss];
auto Ds_ti = vs_ti[ss];
sobj result=Zero();
if(op == "Q1"){
auto Dq_loop = vq_loop(ss);
auto Dd_tf = vd_tf(ss);
auto Ds_ti = vs_ti(ss);
typedef decltype(coalescedRead(vcorr[0])) spinor;
spinor result=Zero();
if(doQ1){
SigmaToNucleonQ1EyeSite(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else if(op == "Q2"){
} else if(doQ2){
SigmaToNucleonQ2EyeSite(Dq_loop,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else {
assert(0 && "Weak Operator not correctly specified");
}
vcorr[ss] = result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites
}
@ -1269,26 +1360,30 @@ void BaryonUtils<FImpl>::SigmaToNucleonNonEye(const PropagatorField &qq_ti,
GridBase *grid = qs_ti.Grid();
autoView( vcorr , stn_corr, CpuWrite);
autoView( vq_ti , qq_ti, CpuRead);
autoView( vq_tf , qq_tf, CpuRead);
autoView( vd_tf , qd_tf, CpuRead);
autoView( vs_ti , qs_ti, CpuRead);
// accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
thread_for(ss,grid->oSites(),{
auto Dq_ti = vq_ti[ss];
auto Dq_tf = vq_tf[ss];
auto Dd_tf = vd_tf[ss];
auto Ds_ti = vs_ti[ss];
sobj result=Zero();
if(op == "Q1"){
autoView( vcorr , stn_corr , AcceleratorWrite );
autoView( vq_ti , qq_ti , AcceleratorRead );
autoView( vq_tf , qq_tf , AcceleratorRead );
autoView( vd_tf , qd_tf , AcceleratorRead );
autoView( vs_ti , qs_ti , AcceleratorRead );
bool doQ1 = (op == "Q1");
bool doQ2 = (op == "Q2");
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
auto Dq_ti = vq_ti(ss);
auto Dq_tf = vq_tf(ss);
auto Dd_tf = vd_tf(ss);
auto Ds_ti = vs_ti(ss);
typedef decltype(coalescedRead(vcorr[0])) spinor;
spinor result=Zero();
if(doQ1){
SigmaToNucleonQ1NonEyeSite(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else if(op == "Q2"){
} else if(doQ2){
SigmaToNucleonQ2NonEyeSite(Dq_ti,Dq_tf,Du_spec,Dd_tf,Ds_ti,Gamma_H,GammaB_sigma,GammaB_nucl,result);
} else {
assert(0 && "Weak Operator not correctly specified");
}
vcorr[ss] = result;
coalescedWrite(vcorr[ss],result);
});//end loop over lattice sites
}