From 22e35c9ddd9d8ccd812b875695c7b01770f0e11f Mon Sep 17 00:00:00 2001 From: gfilaci Date: Fri, 10 May 2019 12:23:39 +0100 Subject: [PATCH] M5Ddag offloaded to GPU --- Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc b/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc index 6b532c7a..d184b70e 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc +++ b/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc @@ -99,35 +99,37 @@ void CayleyFermion5D::M5Ddag(const FermionField &psi_i, auto psi = psi_i.View(); auto phi = phi_i.View(); auto chi = chi_i.View(); + Coeff_t *lower_v = &lower[0]; + Coeff_t *diag_v = &diag[0]; + Coeff_t *upper_v = &upper[0]; int Ls =this->Ls; assert(phi.Checkerboard() == psi.Checkerboard()); - // Flops = 6.0*(Nc*Ns) *Ls*vol + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites4d = nsimd * grid->oSites() / Ls; + + // 10 = 3 complex mult + 2 complex add + // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) M5Dcalls++; M5Dtime-=usecond(); - thread_loop( (int ss=0;ssoSites();ss+=Ls),{ // adds Ls - auto tmp = psi[0]; + accelerator_loopN( sss, sites4d ,{ + uint64_t lane = sss % nsimd; + uint64_t ss = Ls * (sss / nsimd); + for(int s=0;s