1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-12-23 22:24:30 +00:00

Getting HMC to run

This commit is contained in:
Peter Boyle
2019-07-26 12:18:29 +01:00
parent 34108296cd
commit fe700a183a
4 changed files with 107 additions and 57 deletions

View File

@@ -83,7 +83,8 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
auto phi = phi_i.View();
auto chi = chi_i.View();
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
auto pm = this->pm;
int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
assert(phi.Checkerboard() == psi.Checkerboard());
@@ -104,8 +105,8 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
spProj5m(tmp1, psi(idx_u));
spProj5p(tmp2, psi(idx_l));
if(this->pm == 1){ spProj5p(tmp, psi(ss+shift_s)); }
else { spProj5m(tmp, psi(ss+shift_s)); }
if(pm == 1){ spProj5p(tmp, psi(ss+shift_s)); }
else { spProj5m(tmp, psi(ss+shift_s)); }
coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 +lower[s]*tmp2 + shift_coeffs[s]*tmp);
}
@@ -169,6 +170,8 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto pm = this->pm;
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss = sss*Ls;
@@ -188,8 +191,8 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
if(s==(Ls-1)) coalescedWrite(chi[ss+s], chi(ss+s)+ diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
else coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2);
if(this->pm == 1){ spProj5p(tmp, psi(ss+s)); }
else { spProj5m(tmp, psi(ss+s)); }
if(pm == 1){ spProj5p(tmp, psi(ss+s)); }
else { spProj5m(tmp, psi(ss+s)); }
coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+shift_coeffs[s]*tmp);
}
@@ -207,6 +210,12 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
this->MooeeInvCalls++;
@@ -224,26 +233,26 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - this->lee[s-1]*tmp);
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp);
}
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->leem[s]*tmp);
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->ueem[s]/this->dee[Ls-1])*tmp);
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - this->uee[s]*tmp);
coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp);
}
});
@@ -259,6 +268,14 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
auto psi = psi_i.View();
auto chi = chi_i.View();
auto pm = this->pm;
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0];
auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
@@ -272,36 +289,36 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0]
tmp2 = MooeeInv_shift_lc[0]*psi(ss);
tmp2 = pMooeeInv_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5p(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - this->lee[s-1]*tmp1);
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi(ss+s);
coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInv_shift_lc[s]*psi(ss+s);
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2); }
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2); }
// L_m^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->leem[s]*tmp1);
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1);
}
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->ueem[s]/this->dee[Ls-1])*tmp1);
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp1);
}
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1));
// chi[ss+Ls-1] = (1.0/pdee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + MooeeInv_shift_norm[Ls-1]*tmp2_spProj);
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInv_shift_norm[Ls-1]*tmp2_spProj);
// Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s] , chi(ss+s) - this->uee[s]*tmp1);
coalescedWrite(chi[ss+s] , chi(ss+s) - puee[s]*tmp1);
spProj5m(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + MooeeInv_shift_norm[s]*tmp2_spProj);
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInv_shift_norm[s]*tmp2_spProj);
}
});
@@ -319,6 +336,12 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
auto psi = psi_i.View();
auto chi = chi_i.View();
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
@@ -334,26 +357,26 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
coalescedWrite(chi[ss], psi(ss));
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi(ss+s-1));
coalescedWrite(chi[ss+s], psi(ss+s) - this->uee[s-1]*tmp);
coalescedWrite(chi[ss+s], psi(ss+s) - puee[s-1]*tmp);
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->ueem[s]*tmp);
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->leem[s]/this->dee[Ls-1])*tmp);
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp);
}
coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi(ss+s+1));
coalescedWrite(chi[ss+s], chi(ss+s) - this->lee[s]*tmp);
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp);
}
});
@@ -369,6 +392,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
auto chi = chi_i.View();
int Ls = this->Ls;
auto pm = this->pm;
auto plee = & this->lee [0];
auto pdee = & this->dee [0];
auto puee = & this->uee [0];
auto pleem= & this->leem[0];
auto pueem= & this->ueem[0];
auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
@@ -383,36 +414,36 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
coalescedWrite(chi[ss], psi(ss));
tmp2 = MooeeInvDag_shift_lc[0]*psi(ss);
tmp2 = pMooeeInvDag_shift_lc[0]*psi(ss);
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi(ss+s-1));
coalescedWrite(chi[ss+s],psi(ss+s) - this->uee[s-1]*tmp1);
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi(ss+s);
coalescedWrite(chi[ss+s],psi(ss+s) - puee[s-1]*tmp1);
tmp2 = tmp2 + pMooeeInvDag_shift_lc[s]*psi(ss+s);
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2);}
if(pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else { spProj5m(tmp2_spProj, tmp2);}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->ueem[s]*tmp1);
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp1);
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->leem[s]/this->dee[Ls-1])*tmp1);
coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp1);
}
coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1));
spProj5p(tmp1, chi(ss+Ls-1));
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj);
coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInvDag_shift_norm[Ls-1]*tmp2_spProj);
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
coalescedWrite(chi[ss+s], chi(ss+s) - this->lee[s]*tmp1);
coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp1);
spProj5p(tmp1, chi(ss+s));
coalescedWrite(chi[ss+s], chi(ss+s) + MooeeInvDag_shift_norm[s]*tmp2_spProj);
coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInvDag_shift_norm[s]*tmp2_spProj);
}
});