diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index e4587308..916bd0c0 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -216,9 +216,13 @@ NAMESPACE_END(Grid); template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); +#ifdef GRID_NVCC +#define CAYLEY_DPERP_GPU +#else #undef CAYLEY_DPERP_DENSE #define CAYLEY_DPERP_CACHE #undef CAYLEY_DPERP_LINALG +#endif #define CAYLEY_DPERP_VEC #endif diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc b/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc new file mode 100644 index 00000000..f99804a5 --- /dev/null +++ b/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc @@ -0,0 +1,247 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + + +NAMESPACE_BEGIN(Grid); + +// Pminus fowards +// Pplus backwards.. +template +void CayleyFermion5D::M5D(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + int Ls =this->Ls; + assert(phi.Checkerboard() == psi.Checkerboard()); + // 10 = 3 complex mult + 2 complex add + // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) + M5Dcalls++; + M5Dtime-=usecond(); + + thread_loop( (int ss=0;ssoSites();ss+=Ls),{ // adds Ls + for(int s=0;s +void CayleyFermion5D::M5Ddag(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + int Ls =this->Ls; + assert(phi.Checkerboard() == psi.Checkerboard()); + + // Flops = 6.0*(Nc*Ns) *Ls*vol + M5Dcalls++; + M5Dtime-=usecond(); + + thread_loop( (int ss=0;ssoSites();ss+=Ls),{ // adds Ls + auto tmp = psi[0]; + for(int s=0;s +void CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + int Ls=this->Ls; + + MooeeInvCalls++; + MooeeInvTime-=usecond(); + + thread_loop((int ss=0;ssoSites();ss+=Ls),{ // adds Ls + auto tmp = psi[0]; + + // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops + // Apply (L^{\prime})^{-1} + chi[ss]=psi[ss]; // chi[0]=psi[0] + for(int s=1;s=0;s--){ + spProj5m(tmp,chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - uee[s]*tmp; + } + }); + + MooeeInvTime+=usecond(); + +} + +template +void CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + int Ls=this->Ls; + + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + assert(psi.Checkerboard() == psi.Checkerboard()); + + MooeeInvCalls++; + MooeeInvTime-=usecond(); + + thread_loop((int ss=0;ssoSites();ss+=Ls),{ // adds Ls + + auto tmp = psi[0]; + + // Apply (U^{\prime})^{-dagger} + chi[ss]=psi[ss]; + for (int s=1;s=0;s--){ + spProj5p(tmp,chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - conjugate(lee[s])*tmp; + } + }); + + MooeeInvTime+=usecond(); + +} + +#ifdef CAYLEY_DPERP_GPU +INSTANTIATE_DPERP(WilsonImplF); +INSTANTIATE_DPERP(WilsonImplD); +INSTANTIATE_DPERP(GparityWilsonImplF); +INSTANTIATE_DPERP(GparityWilsonImplD); +INSTANTIATE_DPERP(ZWilsonImplF); +INSTANTIATE_DPERP(ZWilsonImplD); + +INSTANTIATE_DPERP(WilsonImplFH); +INSTANTIATE_DPERP(WilsonImplDF); +INSTANTIATE_DPERP(GparityWilsonImplFH); +INSTANTIATE_DPERP(GparityWilsonImplDF); +INSTANTIATE_DPERP(ZWilsonImplFH); +INSTANTIATE_DPERP(ZWilsonImplDF); +#endif + +NAMESPACE_END(Grid);