mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
M5D offloaded to GPU
This commit is contained in:
parent
2b3c22f03d
commit
f1744b3f01
@ -50,35 +50,39 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
|||||||
auto psi = psi_i.View();
|
auto psi = psi_i.View();
|
||||||
auto phi = phi_i.View();
|
auto phi = phi_i.View();
|
||||||
auto chi = chi_i.View();
|
auto chi = chi_i.View();
|
||||||
|
Coeff_t *lower_v = &lower[0];
|
||||||
|
Coeff_t *diag_v = &diag[0];
|
||||||
|
Coeff_t *upper_v = &upper[0];
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
|
const uint64_t nsimd = grid->Nsimd();
|
||||||
|
const uint64_t sites4d = nsimd * grid->oSites() / Ls;
|
||||||
|
|
||||||
// 10 = 3 complex mult + 2 complex add
|
// 10 = 3 complex mult + 2 complex add
|
||||||
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
|
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
|
||||||
M5Dcalls++;
|
M5Dcalls++;
|
||||||
M5Dtime-=usecond();
|
M5Dtime-=usecond();
|
||||||
|
|
||||||
thread_loop( (int ss=0;ss<grid->oSites();ss+=Ls),{ // adds Ls
|
typedef typename SiteSpinor::scalar_object ScalarSiteSpinor;
|
||||||
|
|
||||||
|
accelerator_loopN( sss, sites4d ,{
|
||||||
|
uint64_t lane = sss % nsimd;
|
||||||
|
uint64_t ss = Ls * (sss / nsimd);
|
||||||
|
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
auto tmp = psi[0];
|
auto res = extractLane(lane,phi[ss+s]);
|
||||||
if ( s==0 ) {
|
res = diag_v[s]*res;
|
||||||
spProj5m(tmp,psi[ss+s+1]);
|
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
auto tmp = extractLane(lane,psi[ss+(s+1)%Ls]);
|
||||||
|
spProj5m(tmp,tmp);
|
||||||
spProj5p(tmp,psi[ss+Ls-1]);
|
res += upper_v[s]*tmp;
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
|
||||||
} else if ( s==(Ls-1)) {
|
tmp = extractLane(lane,psi[ss+(s+Ls-1)%Ls]);
|
||||||
spProj5m(tmp,psi[ss+0]);
|
spProj5p(tmp,tmp);
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
res += lower_v[s]*tmp;
|
||||||
|
|
||||||
spProj5p(tmp,psi[ss+s-1]);
|
insertLane(lane,chi[ss+s],res);
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
|
||||||
} else {
|
|
||||||
spProj5m(tmp,psi[ss+s+1]);
|
|
||||||
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
|
||||||
|
|
||||||
spProj5p(tmp,psi[ss+s-1]);
|
|
||||||
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
M5Dtime+=usecond();
|
M5Dtime+=usecond();
|
||||||
|
Loading…
Reference in New Issue
Block a user