mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
M5D offloaded to GPU
This commit is contained in:
parent
2b3c22f03d
commit
f1744b3f01
@ -50,35 +50,39 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
Coeff_t *lower_v = &lower[0];
|
||||
Coeff_t *diag_v = &diag[0];
|
||||
Coeff_t *upper_v = &upper[0];
|
||||
int Ls =this->Ls;
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
const uint64_t nsimd = grid->Nsimd();
|
||||
const uint64_t sites4d = nsimd * grid->oSites() / Ls;
|
||||
|
||||
// 10 = 3 complex mult + 2 complex add
|
||||
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
|
||||
M5Dcalls++;
|
||||
M5Dtime-=usecond();
|
||||
|
||||
thread_loop( (int ss=0;ss<grid->oSites();ss+=Ls),{ // adds Ls
|
||||
typedef typename SiteSpinor::scalar_object ScalarSiteSpinor;
|
||||
|
||||
accelerator_loopN( sss, sites4d ,{
|
||||
uint64_t lane = sss % nsimd;
|
||||
uint64_t ss = Ls * (sss / nsimd);
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
auto tmp = psi[0];
|
||||
if ( s==0 ) {
|
||||
spProj5m(tmp,psi[ss+s+1]);
|
||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||
auto res = extractLane(lane,phi[ss+s]);
|
||||
res = diag_v[s]*res;
|
||||
|
||||
spProj5p(tmp,psi[ss+Ls-1]);
|
||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||
} else if ( s==(Ls-1)) {
|
||||
spProj5m(tmp,psi[ss+0]);
|
||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||
auto tmp = extractLane(lane,psi[ss+(s+1)%Ls]);
|
||||
spProj5m(tmp,tmp);
|
||||
res += upper_v[s]*tmp;
|
||||
|
||||
spProj5p(tmp,psi[ss+s-1]);
|
||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||
} else {
|
||||
spProj5m(tmp,psi[ss+s+1]);
|
||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||
tmp = extractLane(lane,psi[ss+(s+Ls-1)%Ls]);
|
||||
spProj5p(tmp,tmp);
|
||||
res += lower_v[s]*tmp;
|
||||
|
||||
spProj5p(tmp,psi[ss+s-1]);
|
||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||
}
|
||||
insertLane(lane,chi[ss+s],res);
|
||||
}
|
||||
});
|
||||
M5Dtime+=usecond();
|
||||
|
Loading…
Reference in New Issue
Block a user