diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index eff7d958..b986edd7 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -394,6 +394,8 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, conformable(_grid, q_out._grid); Lattice> ph(_grid), coor(_grid); Complex i(0.0,1.0); + PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); + int tshift = (mu == Tp) ? 1 : 0; // Momentum projection ph = zero; @@ -404,8 +406,43 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, } ph = exp((Real)(2*M_PI)*i*ph); - Kernels::SeqConservedCurrentInternal(q_in, q_out, Umu, curr_type, mu, ph, - tmin, tmax); + q_out = zero; + LatticeInteger coords(_grid); + LatticeCoordinate(coords, Tp); + + // Need q(x + mu) and q(x - mu). + tmp = Cshift(q_in, mu, 1); + tmpFwd = tmp*ph; + tmp = ph*q_in; + tmpBwd = Cshift(tmp, mu, -1); + + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) + { + // Compute the sequential conserved current insertion only if our simd + // object contains a timeslice we need. + vInteger t_mask = ((coords._odata[sU] >= tmin) && + (coords._odata[sU] <= tmax)); + Integer timeSlices = Reduce(t_mask); + + if (timeSlices > 0) + { + Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sU], + q_out._odata[sU], + Umu, sU, mu, t_mask); + } + + // Repeat for backward direction. + t_mask = ((coords._odata[sU] >= (tmin + tshift)) && + (coords._odata[sU] <= (tmax + tshift))); + timeSlices = Reduce(t_mask); + + if (timeSlices > 0) + { + Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sU], + q_out._odata[sU], + Umu, sU, mu, t_mask); + } + } } FermOpTemplateInstantiate(WilsonFermion); diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 76218098..5daed3de 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -727,31 +727,73 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, { conformable(q_in._grid, FermionGrid()); conformable(q_in._grid, q_out._grid); - Lattice> ph(_FourDimGrid), coor(_FourDimGrid); - PropagatorField q_in_s(_FourDimGrid); - PropagatorField q_out_s(_FourDimGrid); + Lattice> ph(FermionGrid()), coor(FermionGrid()); + PropagatorField tmpFwd(FermionGrid()), tmpBwd(FermionGrid()), + tmp(FermionGrid()); Complex i(0.0, 1.0); + int tshift = (mu == Tp) ? 1 : 0; - // Momentum projection + // Momentum projection. ph = zero; for(unsigned int nu = 0; nu < Nd - 1; nu++) { - LatticeCoordinate(coor, nu); + // Shift coordinate lattice index by 1 to account for 5th dimension. + LatticeCoordinate(coor, nu + 1); ph = ph + mom[nu]*coor*((1./(_FourDimGrid->_fdimensions[nu]))); } ph = exp((Real)(2*M_PI)*i*ph); - // Sequential insertion across 5th dimension - for (int s = 0; s < Ls; s++) + q_out = zero; + LatticeInteger coords(_FourDimGrid); + LatticeCoordinate(coords, Tp); + + // Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu + // by one. + tmp = Cshift(q_in, mu + 1, 1); + tmpFwd = tmp*ph; + tmp = ph*q_in; + tmpBwd = Cshift(tmp, mu + 1, -1); + + parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) { - ExtractSlice(q_in_s, q_in, s, 0); - Kernels::SeqConservedCurrentInternal(q_in_s, q_out_s, Umu, curr_type, - mu, ph, tmin, tmax); - if ((curr_type == Current::Axial) && (s < Ls/2)) + // Compute the sequential conserved current insertion only if our simd + // object contains a timeslice we need. + vInteger t_mask = ((coords._odata[sU] >= tmin) && + (coords._odata[sU] <= tmax)); + Integer timeSlices = Reduce(t_mask); + + if (timeSlices > 0) { - q_out_s = -q_out_s; + unsigned int sF = sU * Ls; + for (unsigned int s = 0; s < Ls; ++s) + { + bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ + true : false; + Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sF], + q_out._odata[sF], Umu, sU, + mu, t_mask, axial_sign); + ++sF; + } + } + + // Repeat for backward direction. + t_mask = ((coords._odata[sU] >= (tmin + tshift)) && + (coords._odata[sU] <= (tmax + tshift))); + timeSlices = Reduce(t_mask); + + if (timeSlices > 0) + { + unsigned int sF = sU * Ls; + for (unsigned int s = 0; s < Ls; ++s) + { + bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \ + true : false; + Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sF], + q_out._odata[sF], Umu, sU, + mu, t_mask, axial_sign); + ++sF; + } } - InsertSlice(q_out_s, q_out, s, 0); } } diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 6b193766..dc66db23 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -383,63 +383,70 @@ NO_CURR_SITE(GparityWilsonImplFH); NO_CURR_SITE(GparityWilsonImplDF); -template -void WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, - PropagatorField &q_out, - DoubledGaugeField &U, - Current curr_type, - unsigned int mu, - Lattice> &ph, - unsigned int tmin, - unsigned int tmax) +/******************************************************************************* + * Name: SeqConservedCurrentSiteFwd + * Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in shifted in +ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign) { - int tshift = (mu == Tp) ? 1 : 0; - Real G_T = (curr_type == Current::Tadpole) ? -1. : 1.; - PropagatorField tmp(q_in._grid); - GaugeLinkField Umu(U._grid); - Umu = PeekIndex(U, mu); - Lattice> t(q_in._grid); - LatticeCoordinate(t, Tp); + SitePropagator result; + Impl::multLinkProp(result, U._odata[sU], q_in, mu); + result = WilsonCurrentFwd(result, mu); - tmp = this->CovShiftForward(Umu, mu, q_in)*ph; - tmp = where((t >= tmin) and (t <= tmax), tmp, 0.*tmp); - q_out = G_T*WilsonCurrentFwd(tmp, mu); + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); - tmp = q_in*ph; - tmp = this->CovShiftBackward(Umu, mu, tmp); - tmp = where((t >= tmin + tshift) and (t <= tmax + tshift), tmp, 0.*tmp); - q_out -= WilsonCurrentBwd(tmp, mu); + if (switch_sign) + { + q_out -= result; + } + else + { + q_out += result; + } } +/******************************************************************************* + * Name: SeqConservedCurrentSiteFwd + * Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in shifted in -ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign) +{ + SitePropagator result; + Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd); + result = WilsonCurrentBwd(result, mu); -// GParity, (Z)DomainWallVec5D -> require special implementation -#define NO_CURR(Impl) \ -template <> void \ -WilsonKernels::SeqConservedCurrentInternal(const PropagatorField &q_in, \ - PropagatorField &q_out, \ - DoubledGaugeField &U, \ - Current curr_type, \ - unsigned int mu, \ - Lattice> &ph, \ - unsigned int tmin, \ - unsigned int tmax) \ -{ \ - assert(0); \ + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); + + if (switch_sign) + { + q_out += result; + } + else + { + q_out -= result; + } } -NO_CURR(GparityWilsonImplF); -NO_CURR(GparityWilsonImplD); -NO_CURR(GparityWilsonImplFH); -NO_CURR(GparityWilsonImplDF); -NO_CURR(DomainWallVec5dImplF); -NO_CURR(DomainWallVec5dImplD); -NO_CURR(DomainWallVec5dImplFH); -NO_CURR(DomainWallVec5dImplDF); -NO_CURR(ZDomainWallVec5dImplF); -NO_CURR(ZDomainWallVec5dImplD); -NO_CURR(ZDomainWallVec5dImplFH); -NO_CURR(ZDomainWallVec5dImplDF); - FermOpTemplateInstantiate(WilsonKernels); AdjointFermOpTemplateInstantiate(WilsonKernels); TwoIndexFermOpTemplateInstantiate(WilsonKernels); diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 0294c740..ed8d6be9 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -197,14 +197,20 @@ public: unsigned int sU, unsigned int mu, bool switch_sign = false); - void SeqConservedCurrentInternal(const PropagatorField &q_in, - PropagatorField &q_out, - DoubledGaugeField &U, - Current curr_type, - unsigned int mu, - Lattice> &ph, - unsigned int tmin, - unsigned int tmax); + void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign = false); + void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeField &U, + unsigned int sU, + unsigned int mu, + vInteger t_mask, + bool switch_sign = false); private: // Specialised variants