Merge branch 'develop' into feature/distil

* develop: (27 commits) Update README.md result layout standardised, iterator size more elegant updated syntac in Test_hadrons_spectrum chroma-regression test now prints difference correctly baryon input strings are now pairs of pairs of gammas - still ugly!! second update to pull request Changing back interface for Gamma3pt Removing old debug code Changes to A2Autils suggested changes for 1st pull request implemented changed input parameters for easier use Should compile everywhere now changed baryon interface added author information ready for pull request code compiling now - still need to test Baryons module works in 1 of 3 cases - still need SlicedProp and Msource part!! thread_for caused the problems - slow for loop for now still bugfix weird bug... ... # Conflicts: # Hadrons/Modules.hpp # Hadrons/modules.inc
2025-12-06 22:34:41 +00:00 · 2019-10-30 14:13:00 +00:00
parent ca234325bc f31e3278a6
commit eb8848a071
16 changed files with 1886 additions and 483 deletions
--- a/Grid/lattice/Lattice_reduction.h
+++ b/Grid/lattice/Lattice_reduction.h
@@ -317,116 +317,6 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
  }
 }

-template<class vobj>
-static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
-{
-  // std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
-
-  typedef typename vobj::scalar_type scalar_type;
-  std::vector<scalar_type> lsSum;
-  localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
-  globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
-  // std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
-}
-
-template <class vobj>
-static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
-{
-  // std::cout << GridLogMessage << "Start prep" << std::endl;
-  typedef typename vobj::vector_type   vector_type;
-  typedef typename vobj::scalar_type   scalar_type;
-  GridBase  *grid = lhs.Grid();
-  assert(grid!=NULL);
-  conformable(grid,rhs.Grid());
-
-  const int    Nd = grid->_ndimension;
-  const int Nsimd = grid->Nsimd();
-
-  assert(orthogdim >= 0);
-  assert(orthogdim < Nd);
-
-  int fd=grid->_fdimensions[orthogdim];
-  int ld=grid->_ldimensions[orthogdim];
-  int rd=grid->_rdimensions[orthogdim];
-  // std::cout << GridLogMessage << "Start alloc" << std::endl;
-
-  Vector<vector_type> lvSum(rd); // will locally sum vectors first
-  lsSum.resize(ld,scalar_type(0.0));                    // sum across these down to scalars
-  ExtractBuffer<iScalar<scalar_type> > extracted(Nsimd);   // splitting the SIMD  
-  // std::cout << GridLogMessage << "End alloc" << std::endl;
-
-  result.resize(fd); // And then global sum to return the same vector to every node for IO to file
-  for(int r=0;r<rd;r++){
-    lvSum[r]=Zero();
-  }
-
-  int e1=    grid->_slice_nblock[orthogdim];
-  int e2=    grid->_slice_block [orthogdim];
-  int stride=grid->_slice_stride[orthogdim];
-  // std::cout << GridLogMessage << "End prep" << std::endl;
-  // std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
-  vector_type vv;
-  auto l_v=lhs.View();
-  auto r_v=rhs.View();
-  thread_for( r,rd,{
-
-    int so=r*grid->_ostride[orthogdim]; // base offset for start of plane 
-
-    for(int n=0;n<e1;n++){
-      for(int b=0;b<e2;b++){
-        int ss = so + n * stride + b;
-        vv = TensorRemove(innerProduct(l_v[ss], r_v[ss]));
-        lvSum[r] = lvSum[r] + vv;
-      }
-    }
-  });
-  // std::cout << GridLogMessage << "End parallel inner product" << std::endl;
-
-  // Sum across simd lanes in the plane, breaking out orthog dir.
-  Coordinate icoor(Nd);
-  for(int rt=0;rt<rd;rt++){
-
-    iScalar<vector_type> temp; 
-    temp._internal = lvSum[rt];
-    extract(temp,extracted);
-
-    for(int idx=0;idx<Nsimd;idx++){
-
-      grid->iCoorFromIindex(icoor,idx);
-
-      int ldx =rt+icoor[orthogdim]*rd;
-
-      lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
-
-    }
-  }
-  // std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
-}
-template <class vobj>
-static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
-{
-  typedef typename vobj::scalar_type scalar_type;
-  GridBase *grid = lhs.Grid();
-  int fd = result.size();
-  int ld = lsSum.size();
-  // sum over nodes.
-  std::vector<scalar_type> gsum;
-  gsum.resize(fd, scalar_type(0.0));
-  // std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
-  for(int t=0;t<fd;t++){
-    int pt = t/ld; // processor plane
-    int lt = t%ld;
-    if ( pt == grid->_processor_coor[orthogdim] ) {
-      gsum[t]=lsSum[lt];
-    }
-  }
-  // std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
-  // std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
-  grid->GlobalSumVector(&gsum[0], fd);
-  // std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
-
-  result = gsum;
-}
 template<class vobj>
 static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim) 
 {
--- a/Grid/qcd/utils/A2Autils.h
+++ b/Grid/qcd/utils/A2Autils.h
@@ -76,8 +76,21 @@ public:
        const std::vector<ComplexField> &emB1,
        int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr);

-  static void ContractWWVV(std::vector<PropagatorField> &WWVV,
-			   const Eigen::Tensor<ComplexD,3> &WW_sd,
+  template <typename TensorType>
+  typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
+                           std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
+                           void>::type
+  static ContractWWVV(std::vector<PropagatorField> &WWVV,
+			   const TensorType &WW_sd,
+			   const FermionField *vs,
+			   const FermionField *vd);
+
+  template <typename TensorType>
+  typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
+                            std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
+                            void>::type
+  static ContractWWVV(std::vector<PropagatorField> &WWVV,
+			   const TensorType &WW_sd,
 			   const FermionField *vs,
 			   const FermionField *vd);

@@ -107,6 +120,11 @@ public:
 			const FermionField *vd,
 			int orthogdim);
 #endif
+private:
+  inline static void OuterProductWWVV(PropagatorField &WWVV,
+                               const vobj &lhs,
+                               const vobj &rhs,
+                               const int Ns, const int ss);
 };

 template<class FImpl>
@@ -1375,9 +1393,13 @@ void A2Autils<FImpl>::AslashField(TensorType &mat,
 // Take WW_sd v^dag_d (x) v_s
 // 

-template<class FImpl>
-void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
-				   const Eigen::Tensor<ComplexD,3> &WW_sd,
+template <class FImpl>
+template <typename TensorType>
+typename std::enable_if<(std::is_same<Eigen::Tensor<ComplexD,3>, TensorType>::value ||
+                         std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
+                         void>::type
+A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
+				   const TensorType &WW_sd,
 				   const FermionField *vs,
 				   const FermionField *vd)
 {
@@ -1399,39 +1421,100 @@ void A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
    for(int d_o=0;d_o<N_d;d_o+=d_unroll){
      for(int t=0;t<N_t;t++){
      for(int s=0;s<N_s;s++){
-	auto vs_v = vs[s].View();
-	auto tmp1 = vs_v[ss];
-	vobj tmp2 = Zero();
-	vobj tmp3 = Zero();
-	for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
-	  auto vd_v = vd[d].View();
-	  Scalar_v coeff = WW_sd(t,s,d);
-	  tmp3 = conjugate(vd_v[ss]);
-	  mac(&tmp2, &coeff, &tmp3);
-	}
+  auto vs_v = vs[s].View();
+  auto tmp1 = vs_v[ss];
+  vobj tmp2 = Zero();
+  vobj tmp3 = Zero();
+  for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
+    auto vd_v = vd[d].View();
+    Scalar_v coeff = WW_sd(t,s,d);
+    tmp3 = conjugate(vd_v[ss]);
+    mac(&tmp2, &coeff, &tmp3);
+  }

-	//////////////////////////
-	// Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
-	//////////////////////////
-	auto WWVV_v = WWVV[t].View();
-	for(int s1=0;s1<Ns;s1++){
-	for(int s2=0;s2<Ns;s2++){
-	  WWVV_v[ss]()(s1,s2)(0,0) += tmp1()(s1)(0)*tmp2()(s2)(0);
-	  WWVV_v[ss]()(s1,s2)(0,1) += tmp1()(s1)(0)*tmp2()(s2)(1);
-	  WWVV_v[ss]()(s1,s2)(0,2) += tmp1()(s1)(0)*tmp2()(s2)(2);
-	  WWVV_v[ss]()(s1,s2)(1,0) += tmp1()(s1)(1)*tmp2()(s2)(0);
-	  WWVV_v[ss]()(s1,s2)(1,1) += tmp1()(s1)(1)*tmp2()(s2)(1);
-	  WWVV_v[ss]()(s1,s2)(1,2) += tmp1()(s1)(1)*tmp2()(s2)(2);
-	  WWVV_v[ss]()(s1,s2)(2,0) += tmp1()(s1)(2)*tmp2()(s2)(0);
-	  WWVV_v[ss]()(s1,s2)(2,1) += tmp1()(s1)(2)*tmp2()(s2)(1);
-	  WWVV_v[ss]()(s1,s2)(2,2) += tmp1()(s1)(2)*tmp2()(s2)(2);
-	}}
+  //////////////////////////
+  // Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
+  //////////////////////////
+  OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);

      }}
    }
  });
 }

+template <class FImpl>
+template <typename TensorType>
+typename std::enable_if<!(std::is_same<Eigen::Tensor<ComplexD, 3>, TensorType>::value ||
+                          std::is_same<Eigen::TensorMap<Eigen::Tensor<Complex, 3, Eigen::RowMajor>>, TensorType>::value),
+                          void>::type
+A2Autils<FImpl>::ContractWWVV(std::vector<PropagatorField> &WWVV,
+                              const TensorType &WW_sd,
+                              const FermionField *vs,
+                              const FermionField *vd)
+{
+  GridBase *grid = vs[0].Grid();
+
+  int nd    = grid->_ndimension;
+  int Nsimd = grid->Nsimd();
+  int N_t = WW_sd.dimensions()[0];
+  int N_s = WW_sd.dimensions()[1];
+  int N_d = WW_sd.dimensions()[2];
+
+  int d_unroll = 32;// Empirical optimisation
+
+  Eigen::Matrix<Complex, -1, -1, Eigen::RowMajor> buf;
+
+  for(int t=0;t<N_t;t++){
+    WWVV[t] = Zero();
+  }
+
+  for (int t = 0; t < N_t; t++){
+    std::cout << GridLogMessage << "Contraction t = " << t << std::endl;
+    buf = WW_sd[t];
+    thread_for(ss,grid->oSites(),{
+      for(int d_o=0;d_o<N_d;d_o+=d_unroll){
+        for(int s=0;s<N_s;s++){
+    auto vs_v = vs[s].View();
+    auto tmp1 = vs_v[ss];
+    vobj tmp2 = Zero();
+    vobj tmp3 = Zero();
+    for(int d=d_o;d<MIN(d_o+d_unroll,N_d);d++){
+      auto vd_v = vd[d].View();
+      Scalar_v coeff = buf(s,d);
+      tmp3 = conjugate(vd_v[ss]);
+      mac(&tmp2, &coeff, &tmp3);
+    }
+
+    //////////////////////////
+    // Fast outer product of tmp1 with a sum of terms suppressed by d_unroll
+    //////////////////////////
+    OuterProductWWVV(WWVV[t], tmp1, tmp2, Ns, ss);
+      }}
+    });
+  }
+}
+
+template <class FImpl>
+inline void A2Autils<FImpl>::OuterProductWWVV(PropagatorField &WWVV,
+                                             const vobj &lhs,
+                                             const vobj &rhs,
+                                             const int Ns, const int ss)
+{
+  auto WWVV_v = WWVV.View();
+  for (int s1 = 0; s1 < Ns; s1++){
+    for (int s2 = 0; s2 < Ns; s2++){
+      WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0);
+      WWVV_v[ss]()(s1,s2)(0, 1) += lhs()(s1)(0) * rhs()(s2)(1);
+      WWVV_v[ss]()(s1,s2)(0, 2) += lhs()(s1)(0) * rhs()(s2)(2);
+      WWVV_v[ss]()(s1,s2)(1, 0) += lhs()(s1)(1) * rhs()(s2)(0);
+      WWVV_v[ss]()(s1,s2)(1, 1) += lhs()(s1)(1) * rhs()(s2)(1);
+      WWVV_v[ss]()(s1,s2)(1, 2) += lhs()(s1)(1) * rhs()(s2)(2);
+      WWVV_v[ss]()(s1,s2)(2, 0) += lhs()(s1)(2) * rhs()(s2)(0);
+      WWVV_v[ss]()(s1,s2)(2, 1) += lhs()(s1)(2) * rhs()(s2)(1);
+      WWVV_v[ss]()(s1,s2)(2, 2) += lhs()(s1)(2) * rhs()(s2)(2);
+    }
+  }
+}

 template<class FImpl>
 void A2Autils<FImpl>::ContractFourQuarkColourDiagonal(const PropagatorField &WWVV0,
--- a/Grid/qcd/utils/BaryonUtils.h
+++ b/Grid/qcd/utils/BaryonUtils.h
@@ -0,0 +1,252 @@
+/*************************************************************************************
+ 
+ Grid physics library, www.github.com/paboyle/Grid
+ 
+ Source file: ./lib/qcd/utils/BaryonUtils.h
+ 
+ Copyright (C) 2019
+ 
+ Author: Felix Erben <felix.erben@ed.ac.uk>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ 
+ See the full license in the file "LICENSE" in the top level distribution directory
+ *************************************************************************************/
+/*  END LEGAL */
+#pragma once
+//#include <Grid/Hadrons/Global.hpp>
+#include <Grid/Eigen/unsupported/CXX11/Tensor>
+
+NAMESPACE_BEGIN(Grid);
+
+template <typename FImpl>
+class BaryonUtils 
+{
+public:
+  typedef typename FImpl::ComplexField ComplexField;
+  typedef typename FImpl::FermionField FermionField;
+  typedef typename FImpl::PropagatorField PropagatorField;
+
+  typedef typename FImpl::SitePropagator pobj;
+  typedef typename ComplexField::vector_object vobj;
+  
+  static constexpr int epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}};
+  static constexpr Complex epsilon_sgn[6]= {1,1,1,-1,-1,-1};
+
+  private: 
+  template <class mobj, class robj>
+  static void baryon_site(const mobj &D1,
+				 const mobj &D2,
+				 const mobj &D3,
+				 const Gamma GammaA_left,
+				 const Gamma GammaB_left,
+				 const Gamma GammaA_right,
+				 const Gamma GammaB_right,
+				 const int parity,
+				 const int * wick_contractions,
+  				 robj &result);
+  public:
+  static void ContractBaryons(const PropagatorField &q1_left,
+				 const PropagatorField &q2_left,
+				 const PropagatorField &q3_left,
+				 const Gamma GammaA_left,
+				 const Gamma GammaB_left,
+				 const Gamma GammaA_right,
+				 const Gamma GammaB_right,
+				 const char * quarks_left,
+				 const char * quarks_right,
+				 const int parity,
+				 ComplexField &baryon_corr);
+  template <class mobj, class robj>
+  static void ContractBaryons_Sliced(const mobj &D1,
+				 const mobj &D2,
+				 const mobj &D3,
+				 const Gamma GammaA_left,
+				 const Gamma GammaB_left,
+				 const Gamma GammaA_right,
+				 const Gamma GammaB_right,
+				 const char * quarks_left,
+				 const char * quarks_right,
+				 const int parity,
+				 robj &result);
+};
+
+template <class FImpl>
+constexpr int BaryonUtils<FImpl>::epsilon[6][3];
+template <class FImpl>
+constexpr Complex BaryonUtils<FImpl>::epsilon_sgn[6];
+
+template <class FImpl>
+template <class mobj, class robj>
+void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
+						 const mobj &D2,
+						 const mobj &D3,
+				                 const Gamma GammaA_left,
+				                 const Gamma GammaB_left,
+				                 const Gamma GammaA_right,
+		                 		 const Gamma GammaB_right,
+						 const int parity,
+						 const int * wick_contraction,
+						 robj &result)
+{
+
+  Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4)
+
+    auto gD1a = GammaA_left * GammaA_right * D1;
+    auto gD1b = GammaA_left * g4 * GammaA_right * D1;
+    auto pD1 = 0.5* (gD1a + (double)parity * gD1b);
+    auto gD3 = GammaB_right * D3;
+
+    for (int ie_left=0; ie_left < 6 ; ie_left++){
+      int a_left = epsilon[ie_left][0]; //a
+      int b_left = epsilon[ie_left][1]; //b
+      int c_left = epsilon[ie_left][2]; //c
+      for (int ie_right=0; ie_right < 6 ; ie_right++){
+        int a_right = epsilon[ie_right][0]; //a'
+        int b_right = epsilon[ie_right][1]; //b'
+        int c_right = epsilon[ie_right][2]; //c'
+        //This is the \delta_{456}^{123} part
+	if (wick_contraction[0]){
+          auto D2g = D2 * GammaB_left;
+	  for (int alpha_right=0; alpha_right<Ns; alpha_right++){
+	  for (int beta_left=0; beta_left<Ns; beta_left++){
+	  for (int gamma_left=0; gamma_left<Ns; gamma_left++){
+	    result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,gamma_left)(c_right,c_left)*D2g()(alpha_right,beta_left)(a_right,a_left)*gD3()(alpha_right,beta_left)(b_right,b_left);
+          }}}
+  	}	  
+        //This is the \delta_{456}^{231} part
+	if (wick_contraction[1]){
+          auto pD1g = pD1 * GammaB_left;
+	  for (int alpha_right=0; alpha_right<Ns; alpha_right++){
+	  for (int beta_left=0; beta_left<Ns; beta_left++){
+	  for (int gamma_left=0; gamma_left<Ns; gamma_left++){
+	    result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1g()(gamma_left,beta_left)(c_right,a_left)*D2()(alpha_right,beta_left)(a_right,b_left)*gD3()(alpha_right,gamma_left)(b_right,c_left);
+          }}}
+        }	  
+        //This is the \delta_{456}^{312} part
+	if (wick_contraction[2]){
+          auto gD3g = gD3 * GammaB_left;
+	  for (int alpha_right=0; alpha_right<Ns; alpha_right++){
+	  for (int beta_left=0; beta_left<Ns; beta_left++){
+	  for (int gamma_left=0; gamma_left<Ns; gamma_left++){
+	    result()()() += epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,beta_left)(c_right,b_left)*D2()(alpha_right,gamma_left)(a_right,c_left)*gD3g()(alpha_right,beta_left)(b_right,a_left);
+          }}}
+        }	  
+        //This is the \delta_{456}^{132} part
+	if (wick_contraction[3]){
+          auto gD3g = gD3 * GammaB_left;
+	  for (int alpha_right=0; alpha_right<Ns; alpha_right++){
+	  for (int beta_left=0; beta_left<Ns; beta_left++){
+	  for (int gamma_left=0; gamma_left<Ns; gamma_left++){
+	    result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,gamma_left)(c_right,c_left)*D2()(alpha_right,beta_left)(a_right,b_left)*gD3g()(alpha_right,beta_left)(b_right,a_left);
+          }}}
+        }	  
+        //This is the \delta_{456}^{321} part
+	if (wick_contraction[4]){
+          auto D2g = D2 * GammaB_left;
+	  for (int alpha_right=0; alpha_right<Ns; alpha_right++){
+	  for (int beta_left=0; beta_left<Ns; beta_left++){
+	  for (int gamma_left=0; gamma_left<Ns; gamma_left++){
+	    result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1()(gamma_left,beta_left)(c_right,b_left)*D2g()(alpha_right,beta_left)(a_right,a_left)*gD3()(alpha_right,gamma_left)(b_right,c_left);
+          }}}
+        }	  
+        //This is the \delta_{456}^{213} part
+	if (wick_contraction[5]){
+          auto pD1g = pD1 * GammaB_left;
+	  for (int alpha_right=0; alpha_right<Ns; alpha_right++){
+	  for (int beta_left=0; beta_left<Ns; beta_left++){
+	  for (int gamma_left=0; gamma_left<Ns; gamma_left++){
+	    result()()() -= epsilon_sgn[ie_left] * epsilon_sgn[ie_right] * pD1g()(gamma_left,beta_left)(c_right,a_left)*D2()(alpha_right,gamma_left)(a_right,c_left)*gD3()(alpha_right,beta_left)(b_right,b_left);
+          }}}
+        }	  
+      }
+    }
+}
+
+template<class FImpl>
+void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
+						 const PropagatorField &q2_left,
+						 const PropagatorField &q3_left,
+				                 const Gamma GammaA_left,
+				                 const Gamma GammaB_left,
+				                 const Gamma GammaA_right,
+		                 		 const Gamma GammaB_right,
+						 const char * quarks_left,
+						 const char * quarks_right,
+						 const int parity,
+						 ComplexField &baryon_corr)
+{
+  std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
+    std::cout << "GammaA (left) " << (GammaA_left.g) <<  std::endl;
+    std::cout << "GammaB (left) " << (GammaB_left.g) <<  std::endl;
+    std::cout << "GammaA (right) " << (GammaA_right.g) <<  std::endl;
+    std::cout << "GammaB (right) " << (GammaB_right.g) <<  std::endl;
+ 
+  assert(parity==1 || parity == -1 && "Parity must be +1 or -1");
+
+  GridBase *grid = q1_left.Grid();
+
+  int wick_contraction[6];
+  for (int ie=0; ie < 6 ; ie++)
+    wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
+
+  auto vbaryon_corr= baryon_corr.View();
+  auto v1 = q1_left.View();
+  auto v2 = q2_left.View();
+  auto v3 = q3_left.View();
+
+ // accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
+  thread_for(ss,grid->oSites(),{
+  //for(int ss=0; ss < grid->oSites(); ss++){
+
+    auto D1 = v1[ss];
+    auto D2 = v2[ss];
+    auto D3 = v3[ss];
+
+    vobj result=Zero();
+    baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
+    vbaryon_corr[ss] = result; 
+  }  );//end loop over lattice sites
+}
+template <class FImpl>
+template <class mobj, class robj>
+void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
+						 const mobj &D2,
+						 const mobj &D3,
+				                 const Gamma GammaA_left,
+				                 const Gamma GammaB_left,
+				                 const Gamma GammaA_right,
+		                 		 const Gamma GammaB_right,
+						 const char * quarks_left,
+						 const char * quarks_right,
+						 const int parity,
+						 robj &result)
+{
+  std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
+    std::cout << "GammaA (left) " << (GammaA_left.g) <<  std::endl;
+    std::cout << "GammaB (left) " << (GammaB_left.g) <<  std::endl;
+    std::cout << "GammaA (right) " << (GammaA_right.g) <<  std::endl;
+    std::cout << "GammaB (right) " << (GammaB_right.g) <<  std::endl;
+ 
+  assert(parity==1 || parity == -1 && "Parity must be +1 or -1");
+
+  int wick_contraction[6];
+  for (int ie=0; ie < 6 ; ie++)
+    wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
+
+     result=Zero();
+     baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
+}
+NAMESPACE_END(Grid);