/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle Author: Gianluca Filaci This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #include #include NAMESPACE_BEGIN(Grid); // Pminus fowards // Pplus backwards.. template void CayleyFermion5D::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, Vector &lower, Vector &diag, Vector &upper) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); autoView(psi , psi_i,AcceleratorRead); autoView(phi , phi_i,AcceleratorRead); autoView(chi , chi_i,AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; auto plower = &lower[0]; int Ls =this->Ls; // 10 = 3 complex mult + 2 complex add // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) M5Dcalls++; M5Dtime-=usecond(); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss= sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1, tmp2; for(int s=0;s void CayleyFermion5D::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, Vector &lower, Vector &diag, Vector &upper) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); autoView(psi , psi_i,AcceleratorRead); autoView(phi , phi_i,AcceleratorRead); autoView(chi , chi_i,AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; auto plower = &lower[0]; int Ls=this->Ls; // Flops = 6.0*(Nc*Ns) *Ls*vol M5Dcalls++; M5Dtime-=usecond(); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1,tmp2; for(int s=0;s void CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi_i) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); autoView(psi , psi_i,AcceleratorRead); autoView(chi , chi_i,AcceleratorWrite); int Ls=this->Ls; auto plee = & lee [0]; auto pdee = & dee [0]; auto puee = & uee [0]; auto pleem = & leem[0]; auto pueem = & ueem[0]; MooeeInvCalls++; MooeeInvTime-=usecond(); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp, acc, res; // X = Nc*Ns // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops // Apply (L^{\prime})^{-1} L_m^{-1} res = psi(ss); spProj5m(tmp,res); acc = pleem[0]*tmp; spProj5p(tmp,res); coalescedWrite(chi[ss],res); for(int s=1;s=0;s--){ res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc; spProj5m(tmp,res); coalescedWrite(chi[ss+s],res); } }); MooeeInvTime+=usecond(); } template void CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); int Ls=this->Ls; autoView(psi , psi_i,AcceleratorRead); autoView(chi , chi_i,AcceleratorWrite); auto plee = & lee [0]; auto pdee = & dee [0]; auto puee = & uee [0]; auto pleem = & leem[0]; auto pueem = & ueem[0]; assert(psi.Checkerboard() == psi.Checkerboard()); MooeeInvCalls++; MooeeInvTime-=usecond(); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp, acc, res; // X = Nc*Ns // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops // Apply (U^{\prime})^{-dagger} U_m^{-\dagger} res = psi(ss); spProj5p(tmp,res); acc = conjugate(pueem[0])*tmp; spProj5m(tmp,res); coalescedWrite(chi[ss],res); for(int s=1;s=0;s--){ res = conjugate(1.0/pdee[s])*chi(ss+s) - conjugate(plee[s])*tmp - conjugate(pleem[s])*acc; spProj5p(tmp,res); coalescedWrite(chi[ss+s],res); } }); MooeeInvTime+=usecond(); } NAMESPACE_END(Grid);