diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc b/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc index 0b214d31..be727d5f 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc +++ b/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc @@ -28,221 +28,220 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - // FIXME -- make a version of these routines with site loop outermost for cache reuse. +// FIXME -- make a version of these routines with site loop outermost for cache reuse. - // Pminus fowards - // Pplus backwards.. - template - void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - int Ls = this->Ls; - GridBase* grid = psi._grid; +// Pminus fowards +// Pplus backwards.. +template +void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +{ + int Ls = this->Ls; + GridBase* grid = psi._grid; - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - for(int s=0; soSites(); ss+=Ls){ // adds Ls + for(int s=0; sM5Dtime += usecond(); + this->M5Dtime += usecond(); +} + +template +void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +{ + int Ls = this->Ls; + GridBase* grid = psi._grid; + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard=psi.checkerboard; + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls + auto tmp = psi._odata[0]; + for(int s=0; sM5Dtime += usecond(); +} + +template +void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) +{ + GridBase* grid = psi._grid; + int Ls = this->Ls; + + chi.checkerboard = psi.checkerboard; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls + + auto tmp1 = psi._odata[0]; + auto tmp2 = psi._odata[0]; + + // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops + // Apply (L^{\prime})^{-1} + chi[ss] = psi[ss]; // chi[0]=psi[0] + for(int s=1; slee[s-1]*tmp1; } - template - void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - int Ls = this->Ls; - GridBase* grid = psi._grid; - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard=psi.checkerboard; - - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - auto tmp = psi._odata[0]; - for(int s=0; sM5Dtime += usecond(); + // L_m^{-1} + for(int s=0; sleem[s]*tmp1; } - template - void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; + // U_m^{-1} D^{-1} + for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1; + } + spProj5m(tmp2, chi[ss+Ls-1]); + chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2; - chi.checkerboard = psi.checkerboard; + // Apply U^{-1} + for(int s=Ls-2; s>=0; s--){ + spProj5m(tmp1, chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1; + } + } - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); + this->MooeeInvTime += usecond(); +} - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls +template +void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) +{ + GridBase* grid = psi._grid; + int Ls = this->Ls; - auto tmp1 = psi._odata[0]; - auto tmp2 = psi._odata[0]; + assert(psi.checkerboard == psi.checkerboard); + chi.checkerboard = psi.checkerboard; - // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops - // Apply (L^{\prime})^{-1} - chi[ss] = psi[ss]; // chi[0]=psi[0] - for(int s=1; slee[s-1]*tmp1; - } + std::vector ueec(Ls); + std::vector deec(Ls+1); + std::vector leec(Ls); + std::vector ueemc(Ls); + std::vector leemc(Ls); - // L_m^{-1} - for(int s=0; sleem[s]*tmp1; - } + for(int s=0; suee[s]); + deec[s] = conjugate(this->dee[s]); + leec[s] = conjugate(this->lee[s]); + ueemc[s] = conjugate(this->ueem[s]); + leemc[s] = conjugate(this->leem[s]); + } + deec[Ls] = conjugate(this->dee[Ls]); - // U_m^{-1} D^{-1} - for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1; - } - spProj5m(tmp2, chi[ss+Ls-1]); - chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2; + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); - // Apply U^{-1} - for(int s=Ls-2; s>=0; s--){ - spProj5m(tmp1, chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1; - } - } + parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - this->MooeeInvTime += usecond(); + auto tmp1 = psi._odata[0]; + auto tmp2 = psi._odata[0]; + + // Apply (U^{\prime})^{-dagger} + chi[ss] = psi[ss]; + for(int s=1; s - void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; - - assert(psi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - - std::vector ueec(Ls); - std::vector deec(Ls+1); - std::vector leec(Ls); - std::vector ueemc(Ls); - std::vector leemc(Ls); - - for(int s=0; suee[s]); - deec[s] = conjugate(this->dee[s]); - leec[s] = conjugate(this->lee[s]); - ueemc[s] = conjugate(this->ueem[s]); - leemc[s] = conjugate(this->leem[s]); - } - deec[Ls] = conjugate(this->dee[Ls]); - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - - auto tmp1 = psi._odata[0]; - auto tmp2 = psi._odata[0]; - - // Apply (U^{\prime})^{-dagger} - chi[ss] = psi[ss]; - for(int s=1; s=0; s--){ - spProj5p(tmp1, chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - leec[s]*tmp1; - } - } - - this->MooeeInvTime += usecond(); + // U_m^{-\dagger} + for(int s=0; s=0; s--){ + spProj5p(tmp1, chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - leec[s]*tmp1; + } + } - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); + this->MooeeInvTime += usecond(); +} - #endif +#ifdef DOMAIN_WALL_EOFA_DPERP_CACHE -}} +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF); +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD); + +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); + +#endif + +NAMESPACE_END(Grid);