1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

M5D offloaded to GPU

This commit is contained in:
gfilaci 2019-05-09 11:17:55 +01:00
parent 2b3c22f03d
commit f1744b3f01

View File

@ -50,35 +50,39 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
Coeff_t *lower_v = &lower[0];
Coeff_t *diag_v = &diag[0];
Coeff_t *upper_v = &upper[0];
int Ls =this->Ls;
assert(phi.Checkerboard() == psi.Checkerboard());
const uint64_t nsimd = grid->Nsimd();
const uint64_t sites4d = nsimd * grid->oSites() / Ls;
// 10 = 3 complex mult + 2 complex add
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
M5Dcalls++;
M5Dtime-=usecond();
thread_loop( (int ss=0;ss<grid->oSites();ss+=Ls),{ // adds Ls
typedef typename SiteSpinor::scalar_object ScalarSiteSpinor;
accelerator_loopN( sss, sites4d ,{
uint64_t lane = sss % nsimd;
uint64_t ss = Ls * (sss / nsimd);
for(int s=0;s<Ls;s++){
auto tmp = psi[0];
if ( s==0 ) {
spProj5m(tmp,psi[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi[ss+Ls-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else if ( s==(Ls-1)) {
spProj5m(tmp,psi[ss+0]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else {
spProj5m(tmp,psi[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
}
auto res = extractLane(lane,phi[ss+s]);
res = diag_v[s]*res;
auto tmp = extractLane(lane,psi[ss+(s+1)%Ls]);
spProj5m(tmp,tmp);
res += upper_v[s]*tmp;
tmp = extractLane(lane,psi[ss+(s+Ls-1)%Ls]);
spProj5p(tmp,tmp);
res += lower_v[s]*tmp;
insertLane(lane,chi[ss+s],res);
}
});
M5Dtime+=usecond();