diff --git a/Grid/qcd/QCD.h b/Grid/qcd/QCD.h index 858aead7..6dd6d5dc 100644 --- a/Grid/qcd/QCD.h +++ b/Grid/qcd/QCD.h @@ -451,9 +451,20 @@ template void pokeLorentz(vobj &lhs,const decltype(peekIndex propagator assignements ////////////////////////////////////////////// //template +#define FAST_FERM_TO_PROP template void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c) { +#ifdef FAST_FERM_TO_PROP + autoView(p_v,p,CpuWrite); + autoView(f_v,f,CpuRead); + thread_for(idx,p_v.oSites(),{ + for(int ss = 0; ss < Ns; ++ss) { + for(int cc = 0; cc < Fimpl::Dimension; ++cc) { + p_v[idx]()(ss,s)(cc,c) = f_v[idx]()(ss)(cc); // Propagator sink index is LEFT, suitable for left mult by gauge link (e.g.) + }} + }); +#else for(int j = 0; j < Ns; ++j) { auto pjs = peekSpin(p, j, s); @@ -465,12 +476,23 @@ void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::Fermio } pokeSpin(p, pjs, j, s); } +#endif } //template template void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c) { +#ifdef FAST_FERM_TO_PROP + autoView(p_v,p,CpuRead); + autoView(f_v,f,CpuWrite); + thread_for(idx,p_v.oSites(),{ + for(int ss = 0; ss < Ns; ++ss) { + for(int cc = 0; cc < Fimpl::Dimension; ++cc) { + f_v[idx]()(ss)(cc) = p_v[idx]()(ss,s)(cc,c); // LEFT index is copied across for s,c right index + }} + }); +#else for(int j = 0; j < Ns; ++j) { auto pjs = peekSpin(p, j, s); @@ -482,6 +504,7 @@ void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::Propagato } pokeSpin(f, fj, j); } +#endif } //////////////////////////////////////////////