/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc Copyright (C) 2017 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle Author: David Murphy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #include #include NAMESPACE_BEGIN(Grid); template void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, Vector &lower, Vector &diag, Vector &upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; auto psi = psi_i.View(); auto phi = phi_i.View(); auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; this->M5Dtime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1; spinor tmp2; for(int s=0; sM5Dtime += usecond(); } template void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, Vector &lower, Vector &diag, Vector &upper, Vector &shift_coeffs) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; auto psi = psi_i.View(); auto phi = phi_i.View(); auto chi = chi_i.View(); int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator assert(phi.Checkerboard() == psi.Checkerboard()); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; this->M5Dtime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1; spinor tmp2; spinor tmp; for(int s=0; spm == 1){ spProj5p(tmp, psi(ss+shift_s)); } else { spProj5m(tmp, psi(ss+shift_s)); } coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 +lower[s]*tmp2 + shift_coeffs[s]*tmp); } }); this->M5Dtime += usecond(); } template void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, Vector &lower, Vector &diag, Vector &upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; auto psi = psi_i.View(); auto phi = phi_i.View(); auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; this->M5Dtime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(), { uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1, tmp2; for(int s=0; sM5Dtime += usecond(); } template void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, Vector &lower, Vector &diag, Vector &upper, Vector &shift_coeffs) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator auto psi = psi_i.View(); auto phi = phi_i.View(); auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; this->M5Dtime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1, tmp2, tmp; tmp1=Zero(); coalescedWrite(chi[ss+Ls-1],tmp1); for(int s=0; spm == 1){ spProj5p(tmp, psi(ss+s)); } else { spProj5m(tmp, psi(ss+s)); } coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+shift_coeffs[s]*tmp); } }); this->M5Dtime += usecond(); } template void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField &chi_i) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; auto psi = psi_i.View(); auto chi = chi_i.View(); if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; } this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp; // Apply (L^{\prime})^{-1} coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0] for(int s=1; slee[s-1]*tmp); } // L_m^{-1} for(int s=0; sleem[s]*tmp); } // U_m^{-1} D^{-1} for(int s=0; sdee[s])*chi(ss+s) - (this->ueem[s]/this->dee[Ls-1])*tmp); } coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); // Apply U^{-1} for(int s=Ls-2; s>=0; s--){ spProj5m(tmp, chi(ss+s+1)); coalescedWrite(chi[ss+s], chi(ss+s) - this->uee[s]*tmp); } }); this->MooeeInvTime += usecond(); } template void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; auto psi = psi_i.View(); auto chi = chi_i.View(); this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1,tmp2,tmp2_spProj; // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2 coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0] tmp2 = MooeeInv_shift_lc[0]*psi(ss); for(int s=1; slee[s-1]*tmp1); tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi(ss+s); } if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} else { spProj5m(tmp2_spProj, tmp2); } // L_m^{-1} for(int s=0; sleem[s]*tmp1); } // U_m^{-1} D^{-1} for(int s=0; sdee[s])*chi(ss+s) - (this->ueem[s]/this->dee[Ls-1])*tmp1); } // chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj; coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); spProj5m(tmp1, chi(ss+Ls-1)); coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + MooeeInv_shift_norm[Ls-1]*tmp2_spProj); // Apply U^{-1} and add shift term for(int s=Ls-2; s>=0; s--){ coalescedWrite(chi[ss+s] , chi(ss+s) - this->uee[s]*tmp1); spProj5m(tmp1, chi(ss+s)); coalescedWrite(chi[ss+s], chi(ss+s) + MooeeInv_shift_norm[s]*tmp2_spProj); } }); this->MooeeInvTime += usecond(); } template void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i) { if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; } chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; auto psi = psi_i.View(); auto chi = chi_i.View(); this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp; // Apply (U^{\prime})^{-dag} coalescedWrite(chi[ss], psi(ss)); for(int s=1; suee[s-1]*tmp); } // U_m^{-\dag} for(int s=0; sueem[s]*tmp); } // L_m^{-\dag} D^{-dag} for(int s=0; sdee[s])*chi(ss+s) - (this->leem[s]/this->dee[Ls-1])*tmp); } coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); // Apply L^{-dag} for(int s=Ls-2; s>=0; s--){ spProj5p(tmp, chi[ss+s+1]); coalescedWrite(chi[ss+s], chi(ss+s) - this->lee[s]*tmp); } }); this->MooeeInvTime += usecond(); } template void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); auto psi = psi_i.View(); auto chi = chi_i.View(); int Ls = this->Ls; this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss = sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1,tmp2,tmp2_spProj; // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2 coalescedWrite(chi[ss], psi(ss)); tmp2 = MooeeInvDag_shift_lc[0]*psi(ss); for(int s=1; suee[s-1]*tmp1); tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi(ss+s); } if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} else { spProj5m(tmp2_spProj, tmp2);} // U_m^{-\dag} for(int s=0; sueem[s]*tmp1); } // L_m^{-\dag} D^{-dag} for(int s=0; sdee[s])*chi(ss+s) - (this->leem[s]/this->dee[Ls-1])*tmp1); } coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); spProj5p(tmp1, chi(ss+Ls-1)); coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj); // Apply L^{-dag} for(int s=Ls-2; s>=0; s--){ coalescedWrite(chi[ss+s], chi(ss+s) - this->lee[s]*tmp1); spProj5p(tmp1, chi(ss+s)); coalescedWrite(chi[ss+s], chi(ss+s) + MooeeInvDag_shift_norm[s]*tmp2_spProj); } }); this->MooeeInvTime += usecond(); } NAMESPACE_END(Grid);