mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Namespace, indent
This commit is contained in:
		@@ -28,402 +28,400 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
			   /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
 | 
			
		||||
  // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
				  std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
  // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
  this->M5Dcalls++;
 | 
			
		||||
  this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        auto tmp = psi._odata[0];
 | 
			
		||||
        if(s==0){
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
 | 
			
		||||
    std::vector<Coeff_t> &shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        auto tmp = psi._odata[0];
 | 
			
		||||
        if(s==0){
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
        if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
 | 
			
		||||
        else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
 | 
			
		||||
        chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        if(s==0) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
      if(s==0){
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else if(s==(Ls-1)) {
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else {
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
 | 
			
		||||
    std::vector<Coeff_t> &shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
  this->M5Dtime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
					std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
 | 
			
		||||
					std::vector<Coeff_t> &shift_coeffs)
 | 
			
		||||
{
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
  int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
      chi[ss+Ls-1] = zero;
 | 
			
		||||
  // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
  this->M5Dcalls++;
 | 
			
		||||
  this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        if(s==0) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
        if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
 | 
			
		||||
        else{ spProj5m(tmp, psi._odata[ss+s]); }
 | 
			
		||||
        chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
 | 
			
		||||
      if(s==0){
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else if(s==(Ls-1)) {
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else {
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
 | 
			
		||||
      else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
 | 
			
		||||
      chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
 | 
			
		||||
  this->M5Dtime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
				     std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
  // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
  this->M5Dcalls++;
 | 
			
		||||
  this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // Apply (L^{\prime})^{-1}
 | 
			
		||||
      chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5p(tmp, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
        spProj5m(tmp, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // U_m^{-1} D^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
        spProj5p(tmp, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
 | 
			
		||||
      }
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
 | 
			
		||||
      // Apply U^{-1}
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        spProj5m(tmp, chi[ss+s+1]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
    auto tmp = psi._odata[0];
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else if(s==(Ls-1)) {
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else {
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
  this->M5Dtime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
					   std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
 | 
			
		||||
					   std::vector<Coeff_t> &shift_coeffs)
 | 
			
		||||
{
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
  int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
  assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
  // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
  this->M5Dcalls++;
 | 
			
		||||
  this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
      auto tmp1        = psi._odata[0];
 | 
			
		||||
      auto tmp2        = psi._odata[0];
 | 
			
		||||
      auto tmp2_spProj = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
 | 
			
		||||
      chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
      tmp2 = MooeeInv_shift_lc[0]*psi[ss];
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5p(tmp1, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
 | 
			
		||||
        tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
    chi[ss+Ls-1] = zero;
 | 
			
		||||
    auto tmp = psi._odata[0];
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else if(s==(Ls-1)) {
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      } else {
 | 
			
		||||
	spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
	chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
	spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
	chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
 | 
			
		||||
      else{ spProj5m(tmp2_spProj, tmp2); }
 | 
			
		||||
      if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
 | 
			
		||||
      else{ spProj5m(tmp, psi._odata[ss+s]); }
 | 
			
		||||
      chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
        spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
  this->M5Dtime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
      // U_m^{-1} D^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
        spProj5p(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
      // chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
      spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
      // Apply U^{-1} and add shift term
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
 | 
			
		||||
        spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
 | 
			
		||||
      }
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
  this->MooeeInvCalls++;
 | 
			
		||||
  this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
    auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      spProj5p(tmp, chi[ss+s-1]);
 | 
			
		||||
      chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // Apply (U^{\prime})^{-dag}
 | 
			
		||||
      chi[ss] = psi[ss];
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5m(tmp, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // U_m^{-\dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5p(tmp, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-\dag} D^{-dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5m(tmp, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
 | 
			
		||||
      }
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
 | 
			
		||||
      // Apply L^{-dag}
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        spProj5p(tmp, chi[ss+s+1]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    // L_m^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
      spProj5m(tmp, chi[ss+s]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
      spProj5p(tmp, chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
 | 
			
		||||
    }
 | 
			
		||||
    chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
 | 
			
		||||
    // Apply U^{-1}
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      spProj5m(tmp, chi[ss+s+1]);
 | 
			
		||||
      chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
  this->MooeeInvTime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
  this->MooeeInvCalls++;
 | 
			
		||||
  this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
      auto tmp1        = psi._odata[0];
 | 
			
		||||
      auto tmp2        = psi._odata[0];
 | 
			
		||||
      auto tmp2_spProj = psi._odata[0];
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
      // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
 | 
			
		||||
      chi[ss] = psi[ss];
 | 
			
		||||
      tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5m(tmp1, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
 | 
			
		||||
        tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
 | 
			
		||||
      else{ spProj5m(tmp2_spProj, tmp2); }
 | 
			
		||||
    auto tmp1        = psi._odata[0];
 | 
			
		||||
    auto tmp2        = psi._odata[0];
 | 
			
		||||
    auto tmp2_spProj = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // U_m^{-\dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
    // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
 | 
			
		||||
    chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
    tmp2 = MooeeInv_shift_lc[0]*psi[ss];
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      spProj5p(tmp1, chi[ss+s-1]);
 | 
			
		||||
      chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
 | 
			
		||||
      tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
 | 
			
		||||
    }
 | 
			
		||||
    if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
 | 
			
		||||
    else{ spProj5m(tmp2_spProj, tmp2); }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-\dag} D^{-dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
    // L_m^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
      spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
      spProj5p(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
 | 
			
		||||
      // Apply L^{-dag}
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
 | 
			
		||||
        spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
 | 
			
		||||
      }
 | 
			
		||||
      chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
 | 
			
		||||
    }
 | 
			
		||||
    // chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
    chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
    spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
    chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
    // Apply U^{-1} and add shift term
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
 | 
			
		||||
      spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
      chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_CACHE
 | 
			
		||||
  this->MooeeInvTime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
  this->MooeeInvCalls++;
 | 
			
		||||
  this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
    auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
    // Apply (U^{\prime})^{-dag}
 | 
			
		||||
    chi[ss] = psi[ss];
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      spProj5m(tmp, chi[ss+s-1]);
 | 
			
		||||
      chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-\dag}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      spProj5p(tmp, chi[ss+s]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-\dag} D^{-dag}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      spProj5m(tmp, chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
 | 
			
		||||
    }
 | 
			
		||||
    chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
 | 
			
		||||
    // Apply L^{-dag}
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      spProj5p(tmp, chi[ss+s+1]);
 | 
			
		||||
      chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  this->MooeeInvTime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid = psi._grid;
 | 
			
		||||
  int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
  this->MooeeInvCalls++;
 | 
			
		||||
  this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
  parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
    auto tmp1        = psi._odata[0];
 | 
			
		||||
    auto tmp2        = psi._odata[0];
 | 
			
		||||
    auto tmp2_spProj = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
    // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
 | 
			
		||||
    chi[ss] = psi[ss];
 | 
			
		||||
    tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      spProj5m(tmp1, chi[ss+s-1]);
 | 
			
		||||
      chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
 | 
			
		||||
      tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
 | 
			
		||||
    }
 | 
			
		||||
    if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
 | 
			
		||||
    else{ spProj5m(tmp2_spProj, tmp2); }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-\dag}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-\dag} D^{-dag}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
 | 
			
		||||
    }
 | 
			
		||||
    chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
    spProj5p(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
    chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
 | 
			
		||||
    // Apply L^{-dag}
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
 | 
			
		||||
      spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
      chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  this->MooeeInvTime += usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef MOBIUS_EOFA_DPERP_CACHE
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user