From 220050822a6e51dbbc921f9cb0d0bb02018e1151 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 30 Jun 2022 13:43:27 -0400 Subject: [PATCH] Speed up M5D and M5Ddag --- .../implementation/CayleyFermion5Dcache.h | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h index d2537ccf..1581bee4 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h @@ -66,18 +66,17 @@ CayleyFermion5D::M5D(const FermionField &psi_i, M5Dcalls++; M5Dtime-=usecond(); - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss= sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1, tmp2; - for(int s=0;s::M5Ddag(const FermionField &psi_i, M5Dcalls++; M5Dtime-=usecond(); - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss=sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1,tmp2; - for(int s=0;s