/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc Copyright (C) 2017 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle Author: David Murphy Author: Gianluca Filaci This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #include #include NAMESPACE_BEGIN(Grid); // FIXME -- make a version of these routines with site loop outermost for cache reuse. // Pminus fowards // Pplus backwards.. template void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); int Ls = this->Ls; GridBase* grid = psi_i.Grid(); autoView( phi , phi_i, AcceleratorRead); autoView( psi , psi_i, AcceleratorRead); autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; auto plower = &lower[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; this->M5Dtime -= usecond(); auto nloop=grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ auto ss=sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; for(int s=0; sM5Dtime += usecond(); } template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); int Ls = this->Ls; autoView( psi , psi_i, AcceleratorRead); autoView( phi , phi_i, AcceleratorRead); autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; auto plower = &lower[0]; // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; this->M5Dtime -= usecond(); auto nloop=grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ typedef decltype(coalescedRead(psi[0])) spinor; auto ss=sss*Ls; for(int s=0; sM5Dtime += usecond(); } template void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionField& chi_i) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); autoView( psi, psi_i, AcceleratorRead); autoView( chi, chi_i, AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; auto pdee = & this->dee[0]; auto puee = & this->uee[0]; auto pleem = & this->leem[0]; auto pueem = & this->ueem[0]; this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); uint64_t nloop=grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp, acc, res; // Apply (L^{\prime})^{-1} L_m^{-1} res = psi(ss); spProj5m(tmp,res); acc = pleem[0]*tmp; spProj5p(tmp,res); coalescedWrite(chi[ss],res); for(int s=1;s=0;s--){ res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc; spProj5m(tmp,res); coalescedWrite(chi[ss+s],res); } }); this->MooeeInvTime += usecond(); } template void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); autoView( psi, psi_i, AcceleratorRead); autoView( chi, chi_i, AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; auto pdee = & this->dee[0]; auto puee = & this->uee[0]; auto pleem = & this->leem[0]; auto pueem = & this->ueem[0]; assert(psi.Checkerboard() == psi.Checkerboard()); this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); auto nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp, acc, res; // Apply (U^{\prime})^{-dagger} U_m^{-\dagger} res = psi(ss); spProj5p(tmp,res); acc = conjugate(pueem[0])*tmp; spProj5m(tmp,res); coalescedWrite(chi[ss],res); for(int s=1;s=0;s--){ res = conjugate(1.0/pdee[s])*chi(ss+s) - conjugate(plee[s])*tmp - conjugate(pleem[s])*acc; spProj5p(tmp,res); coalescedWrite(chi[ss+s],res); } }); this->MooeeInvTime += usecond(); } NAMESPACE_END(Grid);