mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-22 16:54:48 +01:00 
			
		
		
		
	Getting HMC to run
This commit is contained in:
		| @@ -127,6 +127,12 @@ CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi_i, FermionField &chi | ||||
|  | ||||
|   int Ls=this->Ls; | ||||
|  | ||||
|   auto plee  = & lee [0]; | ||||
|   auto pdee  = & dee [0]; | ||||
|   auto puee  = & uee [0]; | ||||
|   auto pleem = & leem[0]; | ||||
|   auto pueem = & ueem[0]; | ||||
|  | ||||
|   MooeeInvCalls++; | ||||
|   MooeeInvTime-=usecond(); | ||||
|   uint64_t nloop = grid->oSites()/Ls; | ||||
| @@ -140,27 +146,27 @@ CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi_i, FermionField &chi | ||||
|     coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0] | ||||
|     for(int s=1;s<Ls;s++){ | ||||
|       spProj5p(tmp,chi(ss+s-1));   | ||||
|       coalescedWrite(chi[ss+s] , psi(ss+s)-lee[s-1]*tmp); | ||||
|       coalescedWrite(chi[ss+s] , psi(ss+s)-plee[s-1]*tmp); | ||||
|     } | ||||
|  | ||||
|     // L_m^{-1}  | ||||
|     for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||
|     for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -pleem[s]P_- chi | ||||
|       spProj5m(tmp,chi(ss+s));     | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - leem[s]*tmp); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp); | ||||
|     } | ||||
|  | ||||
|     // U_m^{-1} D^{-1} | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       // Chi[s] + 1/d chi[s]  | ||||
|       spProj5p(tmp,chi(ss+Ls-1));  | ||||
|       coalescedWrite(chi[ss+s], (1.0/dee[s])*chi(ss+s)-(ueem[s]/dee[Ls-1])*tmp); | ||||
|       coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s)-(pueem[s]/pdee[Ls-1])*tmp); | ||||
|     }	 | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/dee[Ls-1])*chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1)); | ||||
|        | ||||
|     // Apply U^{-1} | ||||
|     for (int s=Ls-2;s>=0;s--){ | ||||
|       spProj5m(tmp,chi(ss+s+1));   | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - uee[s]*tmp); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp); | ||||
|     } | ||||
|   }); | ||||
|  | ||||
| @@ -179,6 +185,12 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi | ||||
|   auto psi = psi_i.View(); | ||||
|   auto chi = chi_i.View(); | ||||
|  | ||||
|   auto plee  = & lee [0]; | ||||
|   auto pdee  = & dee [0]; | ||||
|   auto puee  = & uee [0]; | ||||
|   auto pleem = & leem[0]; | ||||
|   auto pueem = & ueem[0]; | ||||
|  | ||||
|   assert(psi.Checkerboard() == psi.Checkerboard()); | ||||
|  | ||||
|   MooeeInvCalls++; | ||||
| @@ -195,25 +207,25 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi | ||||
|     coalescedWrite(chi[ss],psi(ss)); | ||||
|     for (int s=1;s<Ls;s++){ | ||||
|       spProj5m(tmp,chi(ss+s-1)); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s)-conjugate(uee[s-1])*tmp); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s)-conjugate(puee[s-1])*tmp); | ||||
|     } | ||||
|     // U_m^{-\dagger}  | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       spProj5p(tmp,chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - conjugate(ueem[s])*tmp); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - conjugate(pueem[s])*tmp); | ||||
|     } | ||||
|  | ||||
|     // L_m^{-\dagger} D^{-dagger} | ||||
|     for (int s=0;s<Ls-1;s++){ | ||||
|       spProj5m(tmp,chi(ss+Ls-1)); | ||||
|       coalescedWrite(chi[ss+s], conjugate(1.0/dee[s])*chi(ss+s)-conjugate(leem[s]/dee[Ls-1])*tmp); | ||||
|       coalescedWrite(chi[ss+s], conjugate(1.0/pdee[s])*chi(ss+s)-conjugate(pleem[s]/pdee[Ls-1])*tmp); | ||||
|     }	 | ||||
|     coalescedWrite(chi[ss+Ls-1], conjugate(1.0/dee[Ls-1])*chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1], conjugate(1.0/pdee[Ls-1])*chi(ss+Ls-1)); | ||||
|    | ||||
|     // Apply L^{-dagger} | ||||
|     for (int s=Ls-2;s>=0;s--){ | ||||
|       spProj5p(tmp,chi(ss+s+1)); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - conjugate(lee[s])*tmp); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - conjugate(plee[s])*tmp); | ||||
|     } | ||||
|   }); | ||||
|   MooeeInvTime+=usecond(); | ||||
|   | ||||
| @@ -113,6 +113,13 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie | ||||
|   auto chi=chi_i.View(); | ||||
|   int Ls = this->Ls; | ||||
|  | ||||
|   auto plee  = & this->lee[0]; | ||||
|   auto pdee  = & this->dee[0]; | ||||
|   auto puee  = & this->uee[0]; | ||||
|  | ||||
|   auto pleem = & this->leem[0]; | ||||
|   auto pueem = & this->ueem[0]; | ||||
|  | ||||
|   this->MooeeInvCalls++; | ||||
|   this->MooeeInvTime -= usecond(); | ||||
|   uint64_t nloop=grid->oSites()/Ls; | ||||
| @@ -126,27 +133,27 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie | ||||
|     coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0] | ||||
|     for(int s=1; s<Ls; s++){ | ||||
|       spProj5p(tmp1, chi(ss+s-1)); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - this->lee[s-1]*tmp1); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1); | ||||
|     } | ||||
|  | ||||
|     // L_m^{-1} | ||||
|     for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||
|       spProj5m(tmp1, chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->leem[s]*tmp1); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1); | ||||
|     } | ||||
|  | ||||
|     // U_m^{-1} D^{-1} | ||||
|     for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s] | ||||
|       spProj5p(tmp1, chi(ss+Ls-1)); | ||||
|       coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->ueem[s]/this->dee[Ls])*tmp1); | ||||
|       coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls])*tmp1); | ||||
|     } | ||||
|     spProj5m(tmp2, chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1],(1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2); | ||||
|     coalescedWrite(chi[ss+Ls-1],(1.0/pdee[Ls])*tmp1 + (1.0/pdee[Ls-1])*tmp2); | ||||
|  | ||||
|     // Apply U^{-1} | ||||
|     for(int s=Ls-2; s>=0; s--){ | ||||
|       spProj5m(tmp1, chi(ss+s+1)); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - this->uee[s]*tmp1); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp1); | ||||
|     } | ||||
|   }); | ||||
|   this->MooeeInvTime += usecond(); | ||||
|   | ||||
| @@ -83,7 +83,8 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion | ||||
|   auto phi = phi_i.View(); | ||||
|   auto chi = chi_i.View(); | ||||
|  | ||||
|   int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator | ||||
|   auto pm  = this->pm; | ||||
|   int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator | ||||
|  | ||||
|   assert(phi.Checkerboard() == psi.Checkerboard()); | ||||
|  | ||||
| @@ -104,8 +105,8 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion | ||||
|       spProj5m(tmp1, psi(idx_u)); | ||||
|       spProj5p(tmp2, psi(idx_l)); | ||||
|  | ||||
|       if(this->pm == 1){ spProj5p(tmp, psi(ss+shift_s)); } | ||||
|       else             { spProj5m(tmp, psi(ss+shift_s)); } | ||||
|       if(pm == 1){ spProj5p(tmp, psi(ss+shift_s)); } | ||||
|       else       { spProj5m(tmp, psi(ss+shift_s)); } | ||||
|  | ||||
|       coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 +lower[s]*tmp2 + shift_coeffs[s]*tmp); | ||||
|     } | ||||
| @@ -169,6 +170,8 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm | ||||
|   this->M5Dcalls++; | ||||
|   this->M5Dtime -= usecond(); | ||||
|  | ||||
|   auto pm = this->pm; | ||||
|  | ||||
|   int nloop = grid->oSites()/Ls; | ||||
|   accelerator_for(sss,nloop,Simd::Nsimd(),{ | ||||
|     uint64_t ss = sss*Ls; | ||||
| @@ -188,8 +191,8 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm | ||||
|  | ||||
|       if(s==(Ls-1)) coalescedWrite(chi[ss+s], chi(ss+s)+ diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2); | ||||
|       else          coalescedWrite(chi[ss+s], diag[s]*phi(ss+s) + upper[s]*tmp1 + lower[s]*tmp2); | ||||
|       if(this->pm == 1){ spProj5p(tmp, psi(ss+s)); } | ||||
|       else             { spProj5m(tmp, psi(ss+s)); } | ||||
|       if(pm == 1){ spProj5p(tmp, psi(ss+s)); } | ||||
|       else       { spProj5m(tmp, psi(ss+s)); } | ||||
|  | ||||
|       coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+shift_coeffs[s]*tmp); | ||||
|     } | ||||
| @@ -207,6 +210,12 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField & | ||||
|   auto psi = psi_i.View(); | ||||
|   auto chi = chi_i.View(); | ||||
|  | ||||
|   auto plee = & this->lee [0]; | ||||
|   auto pdee = & this->dee [0]; | ||||
|   auto puee = & this->uee [0]; | ||||
|   auto pleem= & this->leem[0]; | ||||
|   auto pueem= & this->ueem[0]; | ||||
|  | ||||
|   if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; } | ||||
|  | ||||
|   this->MooeeInvCalls++; | ||||
| @@ -224,26 +233,26 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField & | ||||
|     coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0] | ||||
|     for(int s=1; s<Ls; s++){ | ||||
|       spProj5p(tmp, chi(ss+s-1)); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - this->lee[s-1]*tmp); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp); | ||||
|     } | ||||
|  | ||||
|     // L_m^{-1} | ||||
|     for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||
|       spProj5m(tmp, chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->leem[s]*tmp); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp); | ||||
|     } | ||||
|  | ||||
|     // U_m^{-1} D^{-1} | ||||
|     for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s] | ||||
|       spProj5p(tmp, chi(ss+Ls-1)); | ||||
|       coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->ueem[s]/this->dee[Ls-1])*tmp); | ||||
|       coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp); | ||||
|     } | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1)); | ||||
|  | ||||
|     // Apply U^{-1} | ||||
|     for(int s=Ls-2; s>=0; s--){ | ||||
|       spProj5m(tmp, chi(ss+s+1)); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - this->uee[s]*tmp); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp); | ||||
|     } | ||||
|   }); | ||||
|     | ||||
| @@ -259,6 +268,14 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF | ||||
|   auto psi = psi_i.View(); | ||||
|   auto chi = chi_i.View(); | ||||
|  | ||||
|   auto pm = this->pm; | ||||
|   auto plee = & this->lee [0]; | ||||
|   auto pdee = & this->dee [0]; | ||||
|   auto puee = & this->uee [0]; | ||||
|   auto pleem= & this->leem[0]; | ||||
|   auto pueem= & this->ueem[0]; | ||||
|   auto pMooeeInv_shift_lc   = &MooeeInv_shift_lc[0]; | ||||
|   auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0]; | ||||
|   this->MooeeInvCalls++; | ||||
|   this->MooeeInvTime -= usecond(); | ||||
|  | ||||
| @@ -272,36 +289,36 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF | ||||
|  | ||||
|     // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2 | ||||
|     coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0] | ||||
|     tmp2 = MooeeInv_shift_lc[0]*psi(ss); | ||||
|     tmp2 = pMooeeInv_shift_lc[0]*psi(ss); | ||||
|     for(int s=1; s<Ls; s++){ | ||||
|       spProj5p(tmp1, chi(ss+s-1)); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - this->lee[s-1]*tmp1); | ||||
|       tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi(ss+s); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - plee[s-1]*tmp1); | ||||
|       tmp2 = tmp2 + pMooeeInv_shift_lc[s]*psi(ss+s); | ||||
|     } | ||||
|     if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} | ||||
|     else             { spProj5m(tmp2_spProj, tmp2); } | ||||
|     if(pm == 1){ spProj5p(tmp2_spProj, tmp2);} | ||||
|     else       { spProj5m(tmp2_spProj, tmp2); } | ||||
|  | ||||
|     // L_m^{-1} | ||||
|     for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi | ||||
|       spProj5m(tmp1, chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->leem[s]*tmp1); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pleem[s]*tmp1); | ||||
|     } | ||||
|  | ||||
|     // U_m^{-1} D^{-1} | ||||
|     for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s] | ||||
|       spProj5p(tmp1, chi(ss+Ls-1)); | ||||
|       coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->ueem[s]/this->dee[Ls-1])*tmp1); | ||||
|       coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pueem[s]/pdee[Ls-1])*tmp1); | ||||
|     } | ||||
|     // chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj; | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); | ||||
|     // chi[ss+Ls-1] = (1.0/pdee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj; | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1)); | ||||
|     spProj5m(tmp1, chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + MooeeInv_shift_norm[Ls-1]*tmp2_spProj); | ||||
|     coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInv_shift_norm[Ls-1]*tmp2_spProj); | ||||
|  | ||||
|     // Apply U^{-1} and add shift term | ||||
|     for(int s=Ls-2; s>=0; s--){ | ||||
|       coalescedWrite(chi[ss+s] , chi(ss+s) - this->uee[s]*tmp1); | ||||
|       coalescedWrite(chi[ss+s] , chi(ss+s) - puee[s]*tmp1); | ||||
|       spProj5m(tmp1, chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) + MooeeInv_shift_norm[s]*tmp2_spProj); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInv_shift_norm[s]*tmp2_spProj); | ||||
|     } | ||||
|   }); | ||||
|  | ||||
| @@ -319,6 +336,12 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel | ||||
|   auto psi = psi_i.View(); | ||||
|   auto chi = chi_i.View(); | ||||
|  | ||||
|   auto plee = & this->lee [0]; | ||||
|   auto pdee = & this->dee [0]; | ||||
|   auto puee = & this->uee [0]; | ||||
|   auto pleem= & this->leem[0]; | ||||
|   auto pueem= & this->ueem[0]; | ||||
|  | ||||
|   this->MooeeInvCalls++; | ||||
|   this->MooeeInvTime -= usecond(); | ||||
|  | ||||
| @@ -334,26 +357,26 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel | ||||
|     coalescedWrite(chi[ss], psi(ss)); | ||||
|     for(int s=1; s<Ls; s++){ | ||||
|       spProj5m(tmp, chi(ss+s-1)); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - this->uee[s-1]*tmp); | ||||
|       coalescedWrite(chi[ss+s], psi(ss+s) - puee[s-1]*tmp); | ||||
|     } | ||||
|      | ||||
|     // U_m^{-\dag} | ||||
|     for(int s=0; s<Ls-1; s++){ | ||||
|       spProj5p(tmp, chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->ueem[s]*tmp); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp); | ||||
|     } | ||||
|  | ||||
|     // L_m^{-\dag} D^{-dag} | ||||
|     for(int s=0; s<Ls-1; s++){ | ||||
|       spProj5m(tmp, chi(ss+Ls-1)); | ||||
|       coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->leem[s]/this->dee[Ls-1])*tmp); | ||||
|       coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp); | ||||
|     } | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1)); | ||||
|  | ||||
|     // Apply L^{-dag} | ||||
|     for(int s=Ls-2; s>=0; s--){ | ||||
|       spProj5p(tmp, chi(ss+s+1)); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - this->lee[s]*tmp); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp); | ||||
|     } | ||||
|   }); | ||||
|  | ||||
| @@ -369,6 +392,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi | ||||
|   auto chi = chi_i.View(); | ||||
|   int Ls = this->Ls; | ||||
|  | ||||
|   auto pm = this->pm; | ||||
|   auto plee = & this->lee [0]; | ||||
|   auto pdee = & this->dee [0]; | ||||
|   auto puee = & this->uee [0]; | ||||
|   auto pleem= & this->leem[0]; | ||||
|   auto pueem= & this->ueem[0]; | ||||
|   auto pMooeeInvDag_shift_lc   = &MooeeInvDag_shift_lc[0]; | ||||
|   auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0]; | ||||
|  | ||||
|   this->MooeeInvCalls++; | ||||
|   this->MooeeInvTime -= usecond(); | ||||
| @@ -383,36 +414,36 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi | ||||
|  | ||||
|     // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2 | ||||
|     coalescedWrite(chi[ss], psi(ss)); | ||||
|     tmp2 = MooeeInvDag_shift_lc[0]*psi(ss); | ||||
|     tmp2 = pMooeeInvDag_shift_lc[0]*psi(ss); | ||||
|     for(int s=1; s<Ls; s++){ | ||||
|       spProj5m(tmp1, chi(ss+s-1)); | ||||
|       coalescedWrite(chi[ss+s],psi(ss+s) - this->uee[s-1]*tmp1); | ||||
|       tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi(ss+s); | ||||
|       coalescedWrite(chi[ss+s],psi(ss+s) - puee[s-1]*tmp1); | ||||
|       tmp2 = tmp2 + pMooeeInvDag_shift_lc[s]*psi(ss+s); | ||||
|     } | ||||
|  | ||||
|     if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} | ||||
|     else             { spProj5m(tmp2_spProj, tmp2);} | ||||
|     if(pm == 1){ spProj5p(tmp2_spProj, tmp2);} | ||||
|     else       { spProj5m(tmp2_spProj, tmp2);} | ||||
|  | ||||
|     // U_m^{-\dag} | ||||
|     for(int s=0; s<Ls-1; s++){ | ||||
|       spProj5p(tmp1, chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - this->ueem[s]*tmp1); | ||||
|       coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) - pueem[s]*tmp1); | ||||
|     } | ||||
|  | ||||
|     // L_m^{-\dag} D^{-dag} | ||||
|     for(int s=0; s<Ls-1; s++){ | ||||
|       spProj5m(tmp1, chi(ss+Ls-1)); | ||||
|       coalescedWrite(chi[ss+s], (1.0/this->dee[s])*chi(ss+s) - (this->leem[s]/this->dee[Ls-1])*tmp1); | ||||
|       coalescedWrite(chi[ss+s], (1.0/pdee[s])*chi(ss+s) - (pleem[s]/pdee[Ls-1])*tmp1); | ||||
|     } | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/this->dee[Ls-1])*chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1], (1.0/pdee[Ls-1])*chi(ss+Ls-1)); | ||||
|     spProj5p(tmp1, chi(ss+Ls-1)); | ||||
|     coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj); | ||||
|     coalescedWrite(chi[ss+Ls-1], chi(ss+Ls-1) + pMooeeInvDag_shift_norm[Ls-1]*tmp2_spProj); | ||||
|  | ||||
|     // Apply L^{-dag} | ||||
|     for(int s=Ls-2; s>=0; s--){ | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - this->lee[s]*tmp1); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp1); | ||||
|       spProj5p(tmp1, chi(ss+s)); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) + MooeeInvDag_shift_norm[s]*tmp2_spProj); | ||||
|       coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInvDag_shift_norm[s]*tmp2_spProj); | ||||
|     } | ||||
|   }); | ||||
|  | ||||
|   | ||||
| @@ -382,20 +382,20 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st,  DoubledGaugeField | ||||
|  | ||||
|    if( interior && exterior ) {  | ||||
|      if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteDag); return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag);    return;} | ||||
| #ifndef GRID_NVCC | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag);    return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDag);     return;} | ||||
| #endif | ||||
|    } else if( interior ) { | ||||
|      if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteDagInt); return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt);    return;} | ||||
| #ifndef GRID_NVCC | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt);    return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagInt);     return;} | ||||
| #endif | ||||
|    } else if( exterior ) {  | ||||
|      if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt);    return;} | ||||
| #ifndef GRID_NVCC | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt);    return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagExt);     return;} | ||||
| #endif | ||||
|    } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user