mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-25 13:15:55 +01:00
Faster sequential conserved current implementation, now compatible with 5D vectorisation & G-parity.
This commit is contained in:
parent
41af8c12d7
commit
1bd311ba9c
@ -394,6 +394,8 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
conformable(_grid, q_out._grid);
|
conformable(_grid, q_out._grid);
|
||||||
Lattice<iSinglet<Simd>> ph(_grid), coor(_grid);
|
Lattice<iSinglet<Simd>> ph(_grid), coor(_grid);
|
||||||
Complex i(0.0,1.0);
|
Complex i(0.0,1.0);
|
||||||
|
PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid);
|
||||||
|
int tshift = (mu == Tp) ? 1 : 0;
|
||||||
|
|
||||||
// Momentum projection
|
// Momentum projection
|
||||||
ph = zero;
|
ph = zero;
|
||||||
@ -404,8 +406,43 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
}
|
}
|
||||||
ph = exp((Real)(2*M_PI)*i*ph);
|
ph = exp((Real)(2*M_PI)*i*ph);
|
||||||
|
|
||||||
Kernels::SeqConservedCurrentInternal(q_in, q_out, Umu, curr_type, mu, ph,
|
q_out = zero;
|
||||||
tmin, tmax);
|
LatticeInteger coords(_grid);
|
||||||
|
LatticeCoordinate(coords, Tp);
|
||||||
|
|
||||||
|
// Need q(x + mu) and q(x - mu).
|
||||||
|
tmp = Cshift(q_in, mu, 1);
|
||||||
|
tmpFwd = tmp*ph;
|
||||||
|
tmp = ph*q_in;
|
||||||
|
tmpBwd = Cshift(tmp, mu, -1);
|
||||||
|
|
||||||
|
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
|
||||||
|
{
|
||||||
|
// Compute the sequential conserved current insertion only if our simd
|
||||||
|
// object contains a timeslice we need.
|
||||||
|
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
|
||||||
|
(coords._odata[sU] <= tmax));
|
||||||
|
Integer timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
|
if (timeSlices > 0)
|
||||||
|
{
|
||||||
|
Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sU],
|
||||||
|
q_out._odata[sU],
|
||||||
|
Umu, sU, mu, t_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Repeat for backward direction.
|
||||||
|
t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
|
||||||
|
(coords._odata[sU] <= (tmax + tshift)));
|
||||||
|
timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
|
if (timeSlices > 0)
|
||||||
|
{
|
||||||
|
Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sU],
|
||||||
|
q_out._odata[sU],
|
||||||
|
Umu, sU, mu, t_mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonFermion);
|
FermOpTemplateInstantiate(WilsonFermion);
|
||||||
|
@ -727,31 +727,73 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
{
|
{
|
||||||
conformable(q_in._grid, FermionGrid());
|
conformable(q_in._grid, FermionGrid());
|
||||||
conformable(q_in._grid, q_out._grid);
|
conformable(q_in._grid, q_out._grid);
|
||||||
Lattice<iSinglet<Simd>> ph(_FourDimGrid), coor(_FourDimGrid);
|
Lattice<iSinglet<Simd>> ph(FermionGrid()), coor(FermionGrid());
|
||||||
PropagatorField q_in_s(_FourDimGrid);
|
PropagatorField tmpFwd(FermionGrid()), tmpBwd(FermionGrid()),
|
||||||
PropagatorField q_out_s(_FourDimGrid);
|
tmp(FermionGrid());
|
||||||
Complex i(0.0, 1.0);
|
Complex i(0.0, 1.0);
|
||||||
|
int tshift = (mu == Tp) ? 1 : 0;
|
||||||
|
|
||||||
// Momentum projection
|
// Momentum projection.
|
||||||
ph = zero;
|
ph = zero;
|
||||||
for(unsigned int nu = 0; nu < Nd - 1; nu++)
|
for(unsigned int nu = 0; nu < Nd - 1; nu++)
|
||||||
{
|
{
|
||||||
LatticeCoordinate(coor, nu);
|
// Shift coordinate lattice index by 1 to account for 5th dimension.
|
||||||
|
LatticeCoordinate(coor, nu + 1);
|
||||||
ph = ph + mom[nu]*coor*((1./(_FourDimGrid->_fdimensions[nu])));
|
ph = ph + mom[nu]*coor*((1./(_FourDimGrid->_fdimensions[nu])));
|
||||||
}
|
}
|
||||||
ph = exp((Real)(2*M_PI)*i*ph);
|
ph = exp((Real)(2*M_PI)*i*ph);
|
||||||
|
|
||||||
// Sequential insertion across 5th dimension
|
q_out = zero;
|
||||||
for (int s = 0; s < Ls; s++)
|
LatticeInteger coords(_FourDimGrid);
|
||||||
|
LatticeCoordinate(coords, Tp);
|
||||||
|
|
||||||
|
// Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu
|
||||||
|
// by one.
|
||||||
|
tmp = Cshift(q_in, mu + 1, 1);
|
||||||
|
tmpFwd = tmp*ph;
|
||||||
|
tmp = ph*q_in;
|
||||||
|
tmpBwd = Cshift(tmp, mu + 1, -1);
|
||||||
|
|
||||||
|
parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU)
|
||||||
{
|
{
|
||||||
ExtractSlice(q_in_s, q_in, s, 0);
|
// Compute the sequential conserved current insertion only if our simd
|
||||||
Kernels::SeqConservedCurrentInternal(q_in_s, q_out_s, Umu, curr_type,
|
// object contains a timeslice we need.
|
||||||
mu, ph, tmin, tmax);
|
vInteger t_mask = ((coords._odata[sU] >= tmin) &&
|
||||||
if ((curr_type == Current::Axial) && (s < Ls/2))
|
(coords._odata[sU] <= tmax));
|
||||||
|
Integer timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
|
if (timeSlices > 0)
|
||||||
{
|
{
|
||||||
q_out_s = -q_out_s;
|
unsigned int sF = sU * Ls;
|
||||||
|
for (unsigned int s = 0; s < Ls; ++s)
|
||||||
|
{
|
||||||
|
bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \
|
||||||
|
true : false;
|
||||||
|
Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sF],
|
||||||
|
q_out._odata[sF], Umu, sU,
|
||||||
|
mu, t_mask, axial_sign);
|
||||||
|
++sF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Repeat for backward direction.
|
||||||
|
t_mask = ((coords._odata[sU] >= (tmin + tshift)) &&
|
||||||
|
(coords._odata[sU] <= (tmax + tshift)));
|
||||||
|
timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
|
if (timeSlices > 0)
|
||||||
|
{
|
||||||
|
unsigned int sF = sU * Ls;
|
||||||
|
for (unsigned int s = 0; s < Ls; ++s)
|
||||||
|
{
|
||||||
|
bool axial_sign = ((curr_type == Current::Axial) && (s < (Ls / 2))) ? \
|
||||||
|
true : false;
|
||||||
|
Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sF],
|
||||||
|
q_out._odata[sF], Umu, sU,
|
||||||
|
mu, t_mask, axial_sign);
|
||||||
|
++sF;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
InsertSlice(q_out_s, q_out, s, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,63 +383,70 @@ NO_CURR_SITE(GparityWilsonImplFH);
|
|||||||
NO_CURR_SITE(GparityWilsonImplDF);
|
NO_CURR_SITE(GparityWilsonImplDF);
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
/*******************************************************************************
|
||||||
void WilsonKernels<Impl>::SeqConservedCurrentInternal(const PropagatorField &q_in,
|
* Name: SeqConservedCurrentSiteFwd
|
||||||
PropagatorField &q_out,
|
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
|
||||||
DoubledGaugeField &U,
|
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||||
Current curr_type,
|
* - Pass in q_in shifted in +ve mu direction.
|
||||||
unsigned int mu,
|
******************************************************************************/
|
||||||
Lattice<iSinglet<Simd>> &ph,
|
template<class Impl>
|
||||||
unsigned int tmin,
|
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||||
unsigned int tmax)
|
SitePropagator &q_out,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
unsigned int sU,
|
||||||
|
unsigned int mu,
|
||||||
|
vInteger t_mask,
|
||||||
|
bool switch_sign)
|
||||||
{
|
{
|
||||||
int tshift = (mu == Tp) ? 1 : 0;
|
SitePropagator result;
|
||||||
Real G_T = (curr_type == Current::Tadpole) ? -1. : 1.;
|
Impl::multLinkProp(result, U._odata[sU], q_in, mu);
|
||||||
PropagatorField tmp(q_in._grid);
|
result = WilsonCurrentFwd(result, mu);
|
||||||
GaugeLinkField Umu(U._grid);
|
|
||||||
Umu = PeekIndex<LorentzIndex>(U, mu);
|
|
||||||
Lattice<iScalar<vInteger>> t(q_in._grid);
|
|
||||||
LatticeCoordinate(t, Tp);
|
|
||||||
|
|
||||||
tmp = this->CovShiftForward(Umu, mu, q_in)*ph;
|
// Zero any unwanted timeslice entries.
|
||||||
tmp = where((t >= tmin) and (t <= tmax), tmp, 0.*tmp);
|
result = predicatedWhere(t_mask, result, 0.*result);
|
||||||
q_out = G_T*WilsonCurrentFwd(tmp, mu);
|
|
||||||
|
|
||||||
tmp = q_in*ph;
|
if (switch_sign)
|
||||||
tmp = this->CovShiftBackward(Umu, mu, tmp);
|
{
|
||||||
tmp = where((t >= tmin + tshift) and (t <= tmax + tshift), tmp, 0.*tmp);
|
q_out -= result;
|
||||||
q_out -= WilsonCurrentBwd(tmp, mu);
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
q_out += result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* Name: SeqConservedCurrentSiteFwd
|
||||||
|
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
|
||||||
|
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||||
|
* - Pass in q_in shifted in -ve mu direction.
|
||||||
|
******************************************************************************/
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||||
|
SitePropagator &q_out,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
unsigned int sU,
|
||||||
|
unsigned int mu,
|
||||||
|
vInteger t_mask,
|
||||||
|
bool switch_sign)
|
||||||
|
{
|
||||||
|
SitePropagator result;
|
||||||
|
Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd);
|
||||||
|
result = WilsonCurrentBwd(result, mu);
|
||||||
|
|
||||||
// GParity, (Z)DomainWallVec5D -> require special implementation
|
// Zero any unwanted timeslice entries.
|
||||||
#define NO_CURR(Impl) \
|
result = predicatedWhere(t_mask, result, 0.*result);
|
||||||
template <> void \
|
|
||||||
WilsonKernels<Impl>::SeqConservedCurrentInternal(const PropagatorField &q_in, \
|
if (switch_sign)
|
||||||
PropagatorField &q_out, \
|
{
|
||||||
DoubledGaugeField &U, \
|
q_out += result;
|
||||||
Current curr_type, \
|
}
|
||||||
unsigned int mu, \
|
else
|
||||||
Lattice<iSinglet<Simd>> &ph, \
|
{
|
||||||
unsigned int tmin, \
|
q_out -= result;
|
||||||
unsigned int tmax) \
|
}
|
||||||
{ \
|
|
||||||
assert(0); \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NO_CURR(GparityWilsonImplF);
|
|
||||||
NO_CURR(GparityWilsonImplD);
|
|
||||||
NO_CURR(GparityWilsonImplFH);
|
|
||||||
NO_CURR(GparityWilsonImplDF);
|
|
||||||
NO_CURR(DomainWallVec5dImplF);
|
|
||||||
NO_CURR(DomainWallVec5dImplD);
|
|
||||||
NO_CURR(DomainWallVec5dImplFH);
|
|
||||||
NO_CURR(DomainWallVec5dImplDF);
|
|
||||||
NO_CURR(ZDomainWallVec5dImplF);
|
|
||||||
NO_CURR(ZDomainWallVec5dImplD);
|
|
||||||
NO_CURR(ZDomainWallVec5dImplFH);
|
|
||||||
NO_CURR(ZDomainWallVec5dImplDF);
|
|
||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonKernels);
|
FermOpTemplateInstantiate(WilsonKernels);
|
||||||
AdjointFermOpTemplateInstantiate(WilsonKernels);
|
AdjointFermOpTemplateInstantiate(WilsonKernels);
|
||||||
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
|
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
|
||||||
|
@ -197,14 +197,20 @@ public:
|
|||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
bool switch_sign = false);
|
bool switch_sign = false);
|
||||||
void SeqConservedCurrentInternal(const PropagatorField &q_in,
|
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||||
PropagatorField &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
Current curr_type,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
Lattice<iSinglet<Simd>> &ph,
|
vInteger t_mask,
|
||||||
unsigned int tmin,
|
bool switch_sign = false);
|
||||||
unsigned int tmax);
|
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||||
|
SitePropagator &q_out,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
unsigned int sU,
|
||||||
|
unsigned int mu,
|
||||||
|
vInteger t_mask,
|
||||||
|
bool switch_sign = false);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Specialised variants
|
// Specialised variants
|
||||||
|
Loading…
x
Reference in New Issue
Block a user