mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Check-in of working Mobius EOFA class and tests
This commit is contained in:
		@@ -265,9 +265,9 @@ typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
 | 
			
		||||
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
 | 
			
		||||
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
 | 
			
		||||
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusFermionR;
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusFermionF;
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusFermionD;
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR;
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF;
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD;
 | 
			
		||||
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL;
 | 
			
		||||
typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH;
 | 
			
		||||
 
 | 
			
		||||
@@ -142,7 +142,7 @@ namespace QCD {
 | 
			
		||||
 | 
			
		||||
      RealD DtInv_p(0.0), DtInv_m(0.0);
 | 
			
		||||
      RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
 | 
			
		||||
      FermionField tmp = zero;
 | 
			
		||||
      FermionField tmp(this->FermionGrid());
 | 
			
		||||
 | 
			
		||||
      for(int s=0; s<Ls; ++s){
 | 
			
		||||
      for(int sp=0; sp<Ls; ++sp){
 | 
			
		||||
@@ -152,15 +152,14 @@ namespace QCD {
 | 
			
		||||
        DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
 | 
			
		||||
        DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
 | 
			
		||||
 | 
			
		||||
        if(dag){
 | 
			
		||||
          RealD tmp(DtInv_p);
 | 
			
		||||
          DtInv_p = DtInv_m;
 | 
			
		||||
          DtInv_m = tmp;
 | 
			
		||||
        if(sp == 0){
 | 
			
		||||
          axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
 | 
			
		||||
          axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
 | 
			
		||||
        } else {
 | 
			
		||||
          axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
 | 
			
		||||
          axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
 | 
			
		||||
        axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
 | 
			
		||||
 | 
			
		||||
      }}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -217,11 +216,11 @@ namespace QCD {
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int   Ls    = this->Ls;
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
      std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1 + shiftp;
 | 
			
		||||
      std::vector<Coeff_t> lower(Ls,-1.0); lower[0]    = this->mq1 + shiftm;
 | 
			
		||||
      std::vector<Coeff_t> upper(Ls,-1.0);  upper[Ls-1] = this->mq1;
 | 
			
		||||
      std::vector<Coeff_t> lower(Ls,-1.0);  lower[0]    = this->mq1;
 | 
			
		||||
 | 
			
		||||
      // no shift term
 | 
			
		||||
      if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
 | 
			
		||||
 
 | 
			
		||||
@@ -111,7 +111,7 @@ namespace QCD {
 | 
			
		||||
  };
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
 | 
			
		||||
#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\
 | 
			
		||||
template void MobiusEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
@@ -122,7 +122,7 @@ template void MobiusEOFAFermion<A>::M5Ddag_shift(const FermionField& psi, const
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInv_shift(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi);
 | 
			
		||||
 | 
			
		||||
#undef  MOBIUS_EOFA_DPERP_DENSE
 | 
			
		||||
 
 | 
			
		||||
@@ -410,19 +410,19 @@ namespace QCD {
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_CACHE
 | 
			
		||||
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
  INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										184
									
								
								lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,184 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid_Eigen_Dense.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
  * Dense matrix versions of routines
 | 
			
		||||
  */
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
    int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
    int pm      = this->pm;
 | 
			
		||||
    RealD shift = this->shift;
 | 
			
		||||
    RealD alpha = this->alpha;
 | 
			
		||||
    RealD k     = this->k;
 | 
			
		||||
    RealD mq1   = this->mq1;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    assert(Ls==LLs);
 | 
			
		||||
 | 
			
		||||
    Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
    Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
        Pplus(s,s)  = this->bee[s];
 | 
			
		||||
        Pminus(s,s) = this->bee[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        Pminus(s,s+1) = -this->cee[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        Pplus(s+1,s) = -this->cee[s+1];
 | 
			
		||||
    }
 | 
			
		||||
    Pplus (0,Ls-1) = mq1*this->cee[0];
 | 
			
		||||
    Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
 | 
			
		||||
 | 
			
		||||
    if(shift != 0.0){
 | 
			
		||||
      Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
 | 
			
		||||
      for(int s=0; s<Ls; ++s){
 | 
			
		||||
        if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
 | 
			
		||||
        else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Eigen::MatrixXd PplusMat ;
 | 
			
		||||
    Eigen::MatrixXd PminusMat;
 | 
			
		||||
 | 
			
		||||
    if(inv){
 | 
			
		||||
      PplusMat  = Pplus.inverse();
 | 
			
		||||
      PminusMat = Pminus.inverse();
 | 
			
		||||
    } else {
 | 
			
		||||
      PplusMat  = Pplus;
 | 
			
		||||
      PminusMat = Pminus;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(dag){
 | 
			
		||||
      PplusMat.adjointInPlace();
 | 
			
		||||
      PminusMat.adjointInPlace();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // For the non-vectorised s-direction this is simple
 | 
			
		||||
 | 
			
		||||
    for(auto site=0; site<vol; site++){
 | 
			
		||||
 | 
			
		||||
        SiteSpinor     SiteChi;
 | 
			
		||||
        SiteHalfSpinor SitePplus;
 | 
			
		||||
        SiteHalfSpinor SitePminus;
 | 
			
		||||
 | 
			
		||||
        for(int s1=0; s1<Ls; s1++){
 | 
			
		||||
            SiteChi = zero;
 | 
			
		||||
            for(int s2=0; s2<Ls; s2++){
 | 
			
		||||
                int lex2 = s2 + Ls*site;
 | 
			
		||||
                if(PplusMat(s1,s2) != 0.0){
 | 
			
		||||
                    spProj5p(SitePplus,psi[lex2]);
 | 
			
		||||
                    accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
 | 
			
		||||
                }
 | 
			
		||||
                if(PminusMat(s1,s2) != 0.0){
 | 
			
		||||
                    spProj5m(SitePminus, psi[lex2]);
 | 
			
		||||
                    accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            chi[s1+Ls*site] = SiteChi*0.5;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_DENSE
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										290
									
								
								lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										290
									
								
								lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,290 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
  // Pminus fowards
 | 
			
		||||
  // Pplus  backwards
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply U^{-1}
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1);  // chi[Ls]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
      axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply U^{-1} and add shift term
 | 
			
		||||
    if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1);  // chi[Ls]
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-\dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply L^{-dagger}
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1);  // chi[Ls]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
    // Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
 | 
			
		||||
      axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-\dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply L^{-dagger} and add shift
 | 
			
		||||
    if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1);  // chi[Ls]
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_LINALG
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										654
									
								
								lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										654
									
								
								lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,654 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
  * Dense matrix versions of routines
 | 
			
		||||
  */
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase* grid  = psi._grid;
 | 
			
		||||
    int Ls          = this->Ls;
 | 
			
		||||
    int LLs         = grid->_rdimensions[0];
 | 
			
		||||
    const int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
    Vector<iSinglet<Simd>> u(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd>> l(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd>> d(LLs);
 | 
			
		||||
 | 
			
		||||
    assert(Ls/LLs == nsimd);
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // just directly address via type pun
 | 
			
		||||
    typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
    scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
    scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
    scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
    for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
    for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
      int s   = o + i*LLs;
 | 
			
		||||
      int ss  = o*nsimd + i;
 | 
			
		||||
      u_p[ss] = upper[s];
 | 
			
		||||
      l_p[ss] = lower[s];
 | 
			
		||||
      d_p[ss] = diag[s];
 | 
			
		||||
    }}
 | 
			
		||||
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    assert(Nc == 3);
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
      #if 0
 | 
			
		||||
 | 
			
		||||
        alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
        alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
        alignas(64) SiteSpinor fp;
 | 
			
		||||
        alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          int vp = (v+1)%LLs;
 | 
			
		||||
          int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
          spProj5m(hp, psi[ss+vp]);
 | 
			
		||||
          spProj5p(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
          if (vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
          if (vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
          hp = 0.5*hp;
 | 
			
		||||
          hm = 0.5*hm;
 | 
			
		||||
 | 
			
		||||
          spRecon5m(fp, hp);
 | 
			
		||||
          spRecon5p(fm, hm);
 | 
			
		||||
 | 
			
		||||
          chi[ss+v] = d[v]*phi[ss+v];
 | 
			
		||||
          chi[ss+v] = chi[ss+v] + u[v]*fp;
 | 
			
		||||
          chi[ss+v] = chi[ss+v] + l[v]*fm;
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #else
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
          int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
          int vm = (v == 0)     ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
          Simd hp_00 = psi[ss+vp]()(2)(0);
 | 
			
		||||
          Simd hp_01 = psi[ss+vp]()(2)(1);
 | 
			
		||||
          Simd hp_02 = psi[ss+vp]()(2)(2);
 | 
			
		||||
          Simd hp_10 = psi[ss+vp]()(3)(0);
 | 
			
		||||
          Simd hp_11 = psi[ss+vp]()(3)(1);
 | 
			
		||||
          Simd hp_12 = psi[ss+vp]()(3)(2);
 | 
			
		||||
 | 
			
		||||
          Simd hm_00 = psi[ss+vm]()(0)(0);
 | 
			
		||||
          Simd hm_01 = psi[ss+vm]()(0)(1);
 | 
			
		||||
          Simd hm_02 = psi[ss+vm]()(0)(2);
 | 
			
		||||
          Simd hm_10 = psi[ss+vm]()(1)(0);
 | 
			
		||||
          Simd hm_11 = psi[ss+vm]()(1)(1);
 | 
			
		||||
          Simd hm_12 = psi[ss+vm]()(1)(2);
 | 
			
		||||
 | 
			
		||||
          if(vp <= v){
 | 
			
		||||
            hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
            hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
            hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
            hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
            hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
            hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(vm >= v){
 | 
			
		||||
            hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
            hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
            hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
            hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
            hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
            hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          // Can force these to real arithmetic and save 2x.
 | 
			
		||||
          Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
          Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
          Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
          Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
          Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
          Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
          Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
          Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
          Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
          Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
          Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
          Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
 | 
			
		||||
          vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #endif
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    this->M5D(psi, phi, chi, lower, diag, upper);
 | 
			
		||||
 | 
			
		||||
    // FIXME: possible gain from vectorizing shift operation as well?
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase* grid = psi._grid;
 | 
			
		||||
    int Ls  = this->Ls;
 | 
			
		||||
    int LLs = grid->_rdimensions[0];
 | 
			
		||||
    int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
    Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd> > d(LLs);
 | 
			
		||||
 | 
			
		||||
    assert(Ls/LLs == nsimd);
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // just directly address via type pun
 | 
			
		||||
    typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
    scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
    scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
    scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
    for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
    for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
      int s  = o + i*LLs;
 | 
			
		||||
      int ss = o*nsimd + i;
 | 
			
		||||
      u_p[ss] = upper[s];
 | 
			
		||||
      l_p[ss] = lower[s];
 | 
			
		||||
      d_p[ss] = diag[s];
 | 
			
		||||
    }}
 | 
			
		||||
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
      #if 0
 | 
			
		||||
 | 
			
		||||
        alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
        alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
        alignas(64) SiteSpinor fp;
 | 
			
		||||
        alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          int vp = (v+1)%LLs;
 | 
			
		||||
          int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
          spProj5p(hp, psi[ss+vp]);
 | 
			
		||||
          spProj5m(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
          if(vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
          if(vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
          hp = hp*0.5;
 | 
			
		||||
          hm = hm*0.5;
 | 
			
		||||
          spRecon5p(fp, hp);
 | 
			
		||||
          spRecon5m(fm, hm);
 | 
			
		||||
 | 
			
		||||
          chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
 | 
			
		||||
          chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #else
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
          int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
          int vm = (v == 0    ) ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
          Simd hp_00 = psi[ss+vp]()(0)(0);
 | 
			
		||||
          Simd hp_01 = psi[ss+vp]()(0)(1);
 | 
			
		||||
          Simd hp_02 = psi[ss+vp]()(0)(2);
 | 
			
		||||
          Simd hp_10 = psi[ss+vp]()(1)(0);
 | 
			
		||||
          Simd hp_11 = psi[ss+vp]()(1)(1);
 | 
			
		||||
          Simd hp_12 = psi[ss+vp]()(1)(2);
 | 
			
		||||
 | 
			
		||||
          Simd hm_00 = psi[ss+vm]()(2)(0);
 | 
			
		||||
          Simd hm_01 = psi[ss+vm]()(2)(1);
 | 
			
		||||
          Simd hm_02 = psi[ss+vm]()(2)(2);
 | 
			
		||||
          Simd hm_10 = psi[ss+vm]()(3)(0);
 | 
			
		||||
          Simd hm_11 = psi[ss+vm]()(3)(1);
 | 
			
		||||
          Simd hm_12 = psi[ss+vm]()(3)(2);
 | 
			
		||||
 | 
			
		||||
          if (vp <= v){
 | 
			
		||||
            hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
            hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
            hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
            hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
            hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
            hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(vm >= v){
 | 
			
		||||
            hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
            hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
            hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
            hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
            hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
            hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
          Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
          Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
          Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
          Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
          Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
          Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
          Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
          Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
          Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
          Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
          Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
 | 
			
		||||
          vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #endif
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    this->M5Ddag(psi, phi, chi, lower, diag, upper);
 | 
			
		||||
 | 
			
		||||
    // FIXME: possible gain from vectorizing shift operation as well?
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef AVX512
 | 
			
		||||
    #include<simd/Intel512common.h>
 | 
			
		||||
    #include<simd/Intel512avx.h>
 | 
			
		||||
    #include<simd/Intel512single.h>
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
    int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
  {
 | 
			
		||||
    #ifndef AVX512
 | 
			
		||||
      {
 | 
			
		||||
        SiteHalfSpinor BcastP;
 | 
			
		||||
        SiteHalfSpinor BcastM;
 | 
			
		||||
        SiteHalfSpinor SiteChiP;
 | 
			
		||||
        SiteHalfSpinor SiteChiM;
 | 
			
		||||
 | 
			
		||||
        // Ls*Ls * 2 * 12 * vol flops
 | 
			
		||||
        for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
          for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
          for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
 | 
			
		||||
            int s = s2 + l*LLs;
 | 
			
		||||
            int lex = s2 + LLs*site;
 | 
			
		||||
 | 
			
		||||
            if( s2==0 && l==0 ){
 | 
			
		||||
              SiteChiP=zero;
 | 
			
		||||
              SiteChiM=zero;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
 | 
			
		||||
            }}
 | 
			
		||||
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
 | 
			
		||||
            }}
 | 
			
		||||
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
 | 
			
		||||
              SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
 | 
			
		||||
            }}
 | 
			
		||||
          }}
 | 
			
		||||
 | 
			
		||||
          {
 | 
			
		||||
            int lex = s1 + LLs*site;
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              vstream(chi[lex]()(sp)(co),   SiteChiP()(sp)(co));
 | 
			
		||||
              vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
 | 
			
		||||
            }}
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    #else
 | 
			
		||||
      {
 | 
			
		||||
        // pointers
 | 
			
		||||
        //  MASK_REGS;
 | 
			
		||||
        #define Chi_00 %%zmm1
 | 
			
		||||
        #define Chi_01 %%zmm2
 | 
			
		||||
        #define Chi_02 %%zmm3
 | 
			
		||||
        #define Chi_10 %%zmm4
 | 
			
		||||
        #define Chi_11 %%zmm5
 | 
			
		||||
        #define Chi_12 %%zmm6
 | 
			
		||||
        #define Chi_20 %%zmm7
 | 
			
		||||
        #define Chi_21 %%zmm8
 | 
			
		||||
        #define Chi_22 %%zmm9
 | 
			
		||||
        #define Chi_30 %%zmm10
 | 
			
		||||
        #define Chi_31 %%zmm11
 | 
			
		||||
        #define Chi_32 %%zmm12
 | 
			
		||||
 | 
			
		||||
        #define BCAST0  %%zmm13
 | 
			
		||||
        #define BCAST1  %%zmm14
 | 
			
		||||
        #define BCAST2  %%zmm15
 | 
			
		||||
        #define BCAST3  %%zmm16
 | 
			
		||||
        #define BCAST4  %%zmm17
 | 
			
		||||
        #define BCAST5  %%zmm18
 | 
			
		||||
        #define BCAST6  %%zmm19
 | 
			
		||||
        #define BCAST7  %%zmm20
 | 
			
		||||
        #define BCAST8  %%zmm21
 | 
			
		||||
        #define BCAST9  %%zmm22
 | 
			
		||||
        #define BCAST10 %%zmm23
 | 
			
		||||
        #define BCAST11 %%zmm24
 | 
			
		||||
 | 
			
		||||
        int incr = LLs*LLs*sizeof(iSinglet<Simd>);
 | 
			
		||||
 | 
			
		||||
        for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
          for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
 | 
			
		||||
            int lex = s2 + LLs*site;
 | 
			
		||||
            uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
 | 
			
		||||
            uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
 | 
			
		||||
            uint64_t a2 = (uint64_t) &psi[lex];
 | 
			
		||||
 | 
			
		||||
            for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
 | 
			
		||||
              if((s2+l)==0) {
 | 
			
		||||
                asm(
 | 
			
		||||
                      VPREFETCH1(0,%2)              VPREFETCH1(0,%1)
 | 
			
		||||
                      VPREFETCH1(12,%2)  	          VPREFETCH1(13,%2)
 | 
			
		||||
                      VPREFETCH1(14,%2)  	          VPREFETCH1(15,%2)
 | 
			
		||||
                      VBCASTCDUP(0,%2,BCAST0)
 | 
			
		||||
                      VBCASTCDUP(1,%2,BCAST1)
 | 
			
		||||
                      VBCASTCDUP(2,%2,BCAST2)
 | 
			
		||||
                      VBCASTCDUP(3,%2,BCAST3)
 | 
			
		||||
                      VBCASTCDUP(4,%2,BCAST4)       VMULMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                      VBCASTCDUP(5,%2,BCAST5)       VMULMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                      VBCASTCDUP(6,%2,BCAST6)       VMULMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                      VBCASTCDUP(7,%2,BCAST7)       VMULMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                      VBCASTCDUP(8,%2,BCAST8)       VMULMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                      VBCASTCDUP(9,%2,BCAST9)       VMULMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                      VBCASTCDUP(10,%2,BCAST10)     VMULMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                      VBCASTCDUP(11,%2,BCAST11)     VMULMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                      : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
              } else {
 | 
			
		||||
                asm(
 | 
			
		||||
                      VBCASTCDUP(0,%2,BCAST0)   VMADDMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                      VBCASTCDUP(1,%2,BCAST1)   VMADDMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                      VBCASTCDUP(2,%2,BCAST2)   VMADDMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                      VBCASTCDUP(3,%2,BCAST3)   VMADDMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                      VBCASTCDUP(4,%2,BCAST4)   VMADDMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                      VBCASTCDUP(5,%2,BCAST5)   VMADDMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                      VBCASTCDUP(6,%2,BCAST6)   VMADDMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                      VBCASTCDUP(7,%2,BCAST7)   VMADDMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                      VBCASTCDUP(8,%2,BCAST8)   VMADDMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                      VBCASTCDUP(9,%2,BCAST9)   VMADDMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                      VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                      VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                      : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
              }
 | 
			
		||||
 | 
			
		||||
              a0 = a0 + incr;
 | 
			
		||||
              a1 = a1 + incr;
 | 
			
		||||
              a2 = a2 + sizeof(Simd::scalar_type);
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          {
 | 
			
		||||
            int lexa = s1+LLs*site;
 | 
			
		||||
            asm (
 | 
			
		||||
               VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01)  VSTORE(2 ,%0,Chi_02)
 | 
			
		||||
               VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11)  VSTORE(5 ,%0,Chi_12)
 | 
			
		||||
               VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21)  VSTORE(8 ,%0,Chi_22)
 | 
			
		||||
               VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31)  VSTORE(11,%0,Chi_32)
 | 
			
		||||
               : : "r" ((uint64_t)&chi[lexa]) : "memory" );
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      #undef Chi_00
 | 
			
		||||
      #undef Chi_01
 | 
			
		||||
      #undef Chi_02
 | 
			
		||||
      #undef Chi_10
 | 
			
		||||
      #undef Chi_11
 | 
			
		||||
      #undef Chi_12
 | 
			
		||||
      #undef Chi_20
 | 
			
		||||
      #undef Chi_21
 | 
			
		||||
      #undef Chi_22
 | 
			
		||||
      #undef Chi_30
 | 
			
		||||
      #undef Chi_31
 | 
			
		||||
      #undef Chi_32
 | 
			
		||||
 | 
			
		||||
      #undef BCAST0
 | 
			
		||||
      #undef BCAST1
 | 
			
		||||
      #undef BCAST2
 | 
			
		||||
      #undef BCAST3
 | 
			
		||||
      #undef BCAST4
 | 
			
		||||
      #undef BCAST5
 | 
			
		||||
      #undef BCAST6
 | 
			
		||||
      #undef BCAST7
 | 
			
		||||
      #undef BCAST8
 | 
			
		||||
      #undef BCAST9
 | 
			
		||||
      #undef BCAST10
 | 
			
		||||
      #undef BCAST11
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Z-mobius version
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
    int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
  {
 | 
			
		||||
    std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
 | 
			
		||||
    exit(-1);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls  = this->Ls;
 | 
			
		||||
    int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
    int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    Vector<iSinglet<Simd>>   Matp;
 | 
			
		||||
    Vector<iSinglet<Simd>>   Matm;
 | 
			
		||||
    Vector<iSinglet<Simd>>* _Matp;
 | 
			
		||||
    Vector<iSinglet<Simd>>* _Matm;
 | 
			
		||||
 | 
			
		||||
    //  MooeeInternalCompute(dag,inv,Matp,Matm);
 | 
			
		||||
    if(inv && dag){
 | 
			
		||||
      _Matp = &this->MatpInvDag;
 | 
			
		||||
      _Matm = &this->MatmInvDag;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(inv && (!dag)){
 | 
			
		||||
      _Matp = &this->MatpInv;
 | 
			
		||||
      _Matm = &this->MatmInv;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(!inv){
 | 
			
		||||
      MooeeInternalCompute(dag, inv, Matp, Matm);
 | 
			
		||||
      _Matp = &Matp;
 | 
			
		||||
      _Matm = &Matm;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    assert(_Matp->size() == Ls*LLs);
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
    if(switcheroo<Coeff_t>::iscomplex()){
 | 
			
		||||
      parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
        MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
        MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_VEC
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF);
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
		Reference in New Issue
	
	Block a user