mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-25 19:22:03 +01:00
Offload more loops
This commit is contained in:
@ -467,32 +467,7 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
conformable(_grid, q_in_1.Grid());
|
||||
conformable(_grid, q_in_2.Grid());
|
||||
conformable(_grid, q_out.Grid());
|
||||
#if 0
|
||||
PropagatorField tmp1(_grid), tmp2(_grid);
|
||||
q_out = Zero();
|
||||
|
||||
// Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu).
|
||||
// Inefficient comms method but not performance critical.
|
||||
tmp1 = Cshift(q_in_1, mu, 1);
|
||||
tmp2 = Cshift(q_in_2, mu, 1);
|
||||
autoView( tmp1_v , tmp1, CpuWrite);
|
||||
autoView( tmp2_v , tmp2, CpuWrite);
|
||||
autoView( q_in_1_v,q_in_1, CpuRead);
|
||||
autoView( q_in_2_v,q_in_2, CpuRead);
|
||||
autoView( q_out_v , q_out, CpuRead);
|
||||
autoView( Umu_v , Umu, CpuRead);
|
||||
thread_for(sU, Umu.Grid()->oSites(),{
|
||||
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
|
||||
q_in_2_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu);
|
||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU],
|
||||
tmp2_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu);
|
||||
});
|
||||
#else
|
||||
#endif
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
@ -508,62 +483,7 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
{
|
||||
conformable(_grid, q_in.Grid());
|
||||
conformable(_grid, q_out.Grid());
|
||||
#if 0
|
||||
|
||||
// Lattice<iSinglet<Simd>> ph(_grid), coor(_grid);
|
||||
Complex i(0.0,1.0);
|
||||
PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid);
|
||||
unsigned int tshift = (mu == Tp) ? 1 : 0;
|
||||
unsigned int LLt = GridDefaultLatt()[Tp];
|
||||
|
||||
q_out = Zero();
|
||||
LatticeInteger coords(_grid);
|
||||
LatticeCoordinate(coords, Tp);
|
||||
|
||||
// Need q(x + mu) and q(x - mu).
|
||||
tmp = Cshift(q_in, mu, 1);
|
||||
tmpFwd = tmp*lattice_cmplx;
|
||||
tmp = lattice_cmplx*q_in;
|
||||
tmpBwd = Cshift(tmp, mu, -1);
|
||||
|
||||
autoView( coords_v , coords, CpuRead);
|
||||
autoView( tmpFwd_v , tmpFwd, CpuRead);
|
||||
autoView( tmpBwd_v , tmpBwd, CpuRead);
|
||||
autoView( Umu_v , Umu, CpuRead);
|
||||
autoView( q_out_v , q_out, CpuWrite);
|
||||
|
||||
thread_for(sU, Umu.Grid()->oSites(), {
|
||||
|
||||
// Compute the sequential conserved current insertion only if our simd
|
||||
// object contains a timeslice we need.
|
||||
vPredicate t_mask;
|
||||
t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax));
|
||||
Integer timeSlices = Reduce(t_mask());
|
||||
|
||||
if (timeSlices > 0) {
|
||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, t_mask);
|
||||
}
|
||||
|
||||
// Repeat for backward direction.
|
||||
t_mask() = ((coords_v[sU] >= (tmin + tshift)) &&
|
||||
(coords_v[sU] <= (tmax + tshift)));
|
||||
|
||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||
unsigned int t0 = 0;
|
||||
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
|
||||
|
||||
timeSlices = Reduce(t_mask());
|
||||
|
||||
if (timeSlices > 0) {
|
||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, t_mask);
|
||||
}
|
||||
});
|
||||
#else
|
||||
#endif
|
||||
assert(0);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
Reference in New Issue
Block a user