1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-11 14:40:46 +01:00

Namespace, indent

This commit is contained in:
paboyle 2018-01-14 23:24:51 +00:00
parent e5b77c7fd8
commit e857d4d4c8

View File

@ -28,402 +28,400 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h> #include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h> #include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
namespace Grid { NAMESPACE_BEGIN(Grid);
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse. // FIXME -- make a version of these routines with site loop outermost for cache reuse.
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
template<class Impl> assert(phi.checkerboard == psi.checkerboard);
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, chi.checkerboard = psi.checkerboard;
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard); // Flops = 6.0*(Nc*Ns) *Ls*vol
chi.checkerboard = psi.checkerboard; this->M5Dcalls++;
this->M5Dtime -= usecond();
// Flops = 6.0*(Nc*Ns) *Ls*vol parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
this->M5Dcalls++; for(int s=0; s<Ls; s++){
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0];
if(s==0){
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+Ls-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else if(s==(Ls-1)) {
spProj5m(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5m(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5p(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
}
}
this->M5Dtime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0]; auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){ if(s==0){
if(s==0) { spProj5m(tmp, psi._odata[ss+s+1]);
spProj5p(tmp, psi._odata[ss+s+1]); chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp; spProj5p(tmp, psi._odata[ss+Ls-1]);
spProj5m(tmp, psi._odata[ss+Ls-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = chi[ss+s] + lower[s]*tmp; } else if(s==(Ls-1)) {
} else if(s==(Ls-1)) { spProj5m(tmp, psi._odata[ss+0]);
spProj5p(tmp, psi._odata[ss+0]); chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp; spProj5p(tmp, psi._odata[ss+s-1]);
spProj5m(tmp, psi._odata[ss+s-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = chi[ss+s] + lower[s]*tmp; } else {
} else { spProj5m(tmp, psi._odata[ss+s+1]);
spProj5p(tmp, psi._odata[ss+s+1]); chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp; spProj5p(tmp, psi._odata[ss+s-1]);
spProj5m(tmp, psi._odata[ss+s-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
} }
} }
this->M5Dtime += usecond();
} }
template<class Impl> this->M5Dtime += usecond();
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, }
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
assert(phi.checkerboard == psi.checkerboard); template<class Impl>
chi.checkerboard = psi.checkerboard; void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
// Flops = 6.0*(Nc*Ns) *Ls*vol assert(phi.checkerboard == psi.checkerboard);
this->M5Dcalls++; chi.checkerboard = psi.checkerboard;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // Flops = 6.0*(Nc*Ns) *Ls*vol
chi[ss+Ls-1] = zero; this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
for(int s=0; s<Ls; s++){
auto tmp = psi._odata[0]; auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){ if(s==0){
if(s==0) { spProj5m(tmp, psi._odata[ss+s+1]);
spProj5p(tmp, psi._odata[ss+s+1]); chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp; spProj5p(tmp, psi._odata[ss+Ls-1]);
spProj5m(tmp, psi._odata[ss+Ls-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = chi[ss+s] + lower[s]*tmp; } else if(s==(Ls-1)) {
} else if(s==(Ls-1)) { spProj5m(tmp, psi._odata[ss+0]);
spProj5p(tmp, psi._odata[ss+0]); chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp; spProj5p(tmp, psi._odata[ss+s-1]);
spProj5m(tmp, psi._odata[ss+s-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = chi[ss+s] + lower[s]*tmp; } else {
} else { spProj5m(tmp, psi._odata[ss+s+1]);
spProj5p(tmp, psi._odata[ss+s+1]); chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp; spProj5p(tmp, psi._odata[ss+s-1]);
spProj5m(tmp, psi._odata[ss+s-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
}
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
else{ spProj5m(tmp, psi._odata[ss+s]); }
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
} }
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
} }
this->M5Dtime += usecond();
} }
template<class Impl> this->M5Dtime += usecond();
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi) }
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
GridBase *grid = psi._grid; template<class Impl>
int Ls = this->Ls; void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
int Ls = this->Ls;
GridBase *grid = psi._grid;
chi.checkerboard = psi.checkerboard; assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++; // Flops = 6.0*(Nc*Ns) *Ls*vol
this->MooeeInvTime -= usecond(); this->M5Dcalls++;
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
auto tmp = psi._odata[0]; for(int s=0; s<Ls; s++){
if(s==0) {
// Apply (L^{\prime})^{-1} spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss] = psi[ss]; // chi[0]=psi[0] chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
for(int s=1; s<Ls; s++){ spProj5m(tmp, psi._odata[ss+Ls-1]);
spProj5p(tmp, chi[ss+s-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp; } else if(s==(Ls-1)) {
} spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
// L_m^{-1} spProj5m(tmp, psi._odata[ss+s-1]);
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi chi[ss+s] = chi[ss+s] + lower[s]*tmp;
spProj5m(tmp, chi[ss+s]); } else {
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp; spProj5p(tmp, psi._odata[ss+s+1]);
} chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
// U_m^{-1} D^{-1} chi[ss+s] = chi[ss+s] + lower[s]*tmp;
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
} }
} }
this->MooeeInvTime += usecond();
} }
template<class Impl> this->M5Dtime += usecond();
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi) }
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard; template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi._grid;
this->MooeeInvCalls++; assert(phi.checkerboard == psi.checkerboard);
this->MooeeInvTime -= usecond(); chi.checkerboard = psi.checkerboard;
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
auto tmp1 = psi._odata[0]; parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp2 = psi._odata[0]; chi[ss+Ls-1] = zero;
auto tmp2_spProj = psi._odata[0]; auto tmp = psi._odata[0];
for(int s=0; s<Ls; s++){
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2 if(s==0) {
chi[ss] = psi[ss]; // chi[0]=psi[0] spProj5p(tmp, psi._odata[ss+s+1]);
tmp2 = MooeeInv_shift_lc[0]*psi[ss]; chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
for(int s=1; s<Ls; s++){ spProj5m(tmp, psi._odata[ss+Ls-1]);
spProj5p(tmp1, chi[ss+s-1]); chi[ss+s] = chi[ss+s] + lower[s]*tmp;
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1; } else if(s==(Ls-1)) {
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s]; spProj5p(tmp, psi._odata[ss+0]);
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} else {
spProj5p(tmp, psi._odata[ss+s+1]);
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
spProj5m(tmp, psi._odata[ss+s-1]);
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
} }
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
else{ spProj5m(tmp2_spProj, tmp2); } else{ spProj5m(tmp, psi._odata[ss+s]); }
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
}
}
// L_m^{-1} this->M5Dtime += usecond();
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi }
spProj5m(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
}
// U_m^{-1} D^{-1} template<class Impl>
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s] void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
spProj5p(tmp1, chi[ss+Ls-1]); {
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1; if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
}
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
// Apply U^{-1} and add shift term GridBase *grid = psi._grid;
for(int s=Ls-2; s>=0; s--){ int Ls = this->Ls;
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
spProj5m(tmp1, chi[ss+s]); chi.checkerboard = psi.checkerboard;
chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
} this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (L^{\prime})^{-1}
chi[ss] = psi[ss]; // chi[0]=psi[0]
for(int s=1; s<Ls; s++){
spProj5p(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
} }
this->MooeeInvTime += usecond(); // L_m^{-1}
} for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp, chi[ss+s]);
template<class Impl> chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dag}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
}
} }
this->MooeeInvTime += usecond(); // U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for(int s=Ls-2; s>=0; s--){
spProj5m(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
}
} }
template<class Impl> this->MooeeInvTime += usecond();
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi) }
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard; template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
this->MooeeInvCalls++; chi.checkerboard = psi.checkerboard;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
auto tmp1 = psi._odata[0]; parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2 auto tmp1 = psi._odata[0];
chi[ss] = psi[ss]; auto tmp2 = psi._odata[0];
tmp2 = MooeeInvDag_shift_lc[0]*psi[ss]; auto tmp2_spProj = psi._odata[0];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// U_m^{-\dag} // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
for(int s=0; s<Ls-1; s++){ chi[ss] = psi[ss]; // chi[0]=psi[0]
spProj5p(tmp1, chi[ss+s]); tmp2 = MooeeInv_shift_lc[0]*psi[ss];
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1; for(int s=1; s<Ls; s++){
} spProj5p(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// L_m^{-\dag} D^{-dag} // L_m^{-1}
for(int s=0; s<Ls-1; s++){ for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp1, chi[ss+Ls-1]); spProj5m(tmp1, chi[ss+s]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1; chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
} }
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// U_m^{-1} D^{-1}
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
spProj5p(tmp1, chi[ss+Ls-1]); spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj; chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
spProj5p(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
}
} }
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
this->MooeeInvTime += usecond(); // Apply U^{-1} and add shift term
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
spProj5m(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
}
} }
#ifdef MOBIUS_EOFA_DPERP_CACHE this->MooeeInvTime += usecond();
}
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); template<class Impl>
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); {
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); GridBase *grid = psi._grid;
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); int Ls = this->Ls;
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif chi.checkerboard = psi.checkerboard;
}} this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dag}
chi[ss] = psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
}
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
spProj5p(tmp, chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
}
}
this->MooeeInvTime += usecond();
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
{
GridBase *grid = psi._grid;
int Ls = this->Ls;
chi.checkerboard = psi.checkerboard;
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
auto tmp1 = psi._odata[0];
auto tmp2 = psi._odata[0];
auto tmp2_spProj = psi._odata[0];
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
chi[ss] = psi[ss];
tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
for(int s=1; s<Ls; s++){
spProj5m(tmp1, chi[ss+s-1]);
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
}
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
else{ spProj5m(tmp2_spProj, tmp2); }
// U_m^{-\dag}
for(int s=0; s<Ls-1; s++){
spProj5p(tmp1, chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
}
// L_m^{-\dag} D^{-dag}
for(int s=0; s<Ls-1; s++){
spProj5m(tmp1, chi[ss+Ls-1]);
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
}
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
spProj5p(tmp1, chi[ss+Ls-1]);
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
// Apply L^{-dag}
for(int s=Ls-2; s>=0; s--){
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
spProj5p(tmp1, chi[ss+s]);
chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
}
}
this->MooeeInvTime += usecond();
}
#ifdef MOBIUS_EOFA_DPERP_CACHE
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
#endif
NAMESPACE_END(Grid);