1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Speed up M5D and M5Ddag

This commit is contained in:
Peter Boyle 2022-06-30 13:43:27 -04:00
parent 87ad76d81b
commit 220050822a

View File

@ -66,18 +66,17 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
uint64_t nloop = grid->oSites();
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss= sss*Ls;
uint64_t s = sss%Ls;
uint64_t ss= sss-s;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
});
M5Dtime+=usecond();
}
@ -108,18 +107,17 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
M5Dcalls++;
M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls;
uint64_t nloop = grid->oSites();
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;
uint64_t s = sss%Ls;
uint64_t ss= sss-s;
typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
});
M5Dtime+=usecond();
}