mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Re-import DWF and abstract base EOFA fermion classes and tests
This commit is contained in:
		
							
								
								
									
										100
									
								
								lib/qcd/action/fermion/AbstractEOFAFermion.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								lib/qcd/action/fermion/AbstractEOFAFermion.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,100 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/AbstractEOFAFermion.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_QCD_ABSTRACT_EOFA_FERMION_H
 | 
			
		||||
#define  GRID_QCD_ABSTRACT_EOFA_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    // DJM: Abstract base class for EOFA fermion types.
 | 
			
		||||
    // Defines layout of additional EOFA-specific parameters and operators.
 | 
			
		||||
    // Use to construct EOFA pseudofermion actions that are agnostic to Shamir / Mobius / etc.,
 | 
			
		||||
    // and ensure that no one can construct EOFA pseudofermion action with non-EOFA fermion type.
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
 | 
			
		||||
        public:
 | 
			
		||||
            INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
        public:
 | 
			
		||||
            // Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
 | 
			
		||||
            RealD mq1;
 | 
			
		||||
            RealD mq2;
 | 
			
		||||
            RealD mq3;
 | 
			
		||||
            RealD shift;
 | 
			
		||||
            int pm;
 | 
			
		||||
 | 
			
		||||
            RealD alpha; // Mobius scale
 | 
			
		||||
            RealD k;     // EOFA normalization constant
 | 
			
		||||
 | 
			
		||||
            virtual void Instantiatable(void) = 0;
 | 
			
		||||
 | 
			
		||||
            // EOFA-specific operations
 | 
			
		||||
            // Force user to implement in derived classes
 | 
			
		||||
            virtual void Omega    (const FermionField& in, FermionField& out, int sign, int dag) = 0;
 | 
			
		||||
            virtual void Dtilde   (const FermionField& in, FermionField& out) = 0;
 | 
			
		||||
            virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
 | 
			
		||||
 | 
			
		||||
            // Implement derivatives in base clcass: for EOFA both DWF and Mobius just need d(Dw)/dU
 | 
			
		||||
            virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
 | 
			
		||||
                this->DhopDeriv(mat, U, V, dag);
 | 
			
		||||
            };
 | 
			
		||||
            virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
 | 
			
		||||
                this->DhopDerivOE(mat, U, V, dag);
 | 
			
		||||
            };
 | 
			
		||||
            virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
 | 
			
		||||
                this->DhopDerivEO(mat, U, V, dag);
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            // Recompute 5D coefficients for different value of shift constant (needed for heatbath loop over poles)
 | 
			
		||||
            virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
 | 
			
		||||
 | 
			
		||||
            // Constructors
 | 
			
		||||
            AbstractEOFAFermion(GaugeField&            _Umu,
 | 
			
		||||
                                GridCartesian&         FiveDimGrid,
 | 
			
		||||
                                GridRedBlackCartesian& FiveDimRedBlackGrid,
 | 
			
		||||
                                GridCartesian&         FourDimGrid,
 | 
			
		||||
                                GridRedBlackCartesian& FourDimRedBlackGrid,
 | 
			
		||||
                                RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
 | 
			
		||||
                                RealD _M5, RealD _b, RealD _c, const ImplParams &p= ImplParams()) :
 | 
			
		||||
                                CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, _mq1, _M5, p),
 | 
			
		||||
                                mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
 | 
			
		||||
            {
 | 
			
		||||
                int Ls = this->Ls;
 | 
			
		||||
                this->alpha = _b + _c;
 | 
			
		||||
                this->k = this->alpha * (_mq3 - _mq2) * std::pow(this->alpha+1.0,2*Ls) /
 | 
			
		||||
                    ( std::pow(alpha+1.0,Ls) + _mq2*std::pow(alpha-1.0,Ls) ) /
 | 
			
		||||
                    ( std::pow(alpha+1.0,Ls) + _mq3*std::pow(alpha-1.0,Ls) );
 | 
			
		||||
            }
 | 
			
		||||
    };
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										440
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermion.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										440
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermion.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,440 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid_Eigen_Dense.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
 | 
			
		||||
      GaugeField            &_Umu,
 | 
			
		||||
      GridCartesian         &FiveDimGrid,
 | 
			
		||||
      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
      GridCartesian         &FourDimGrid,
 | 
			
		||||
      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
      RealD _mq1, RealD _mq2, RealD _mq3,
 | 
			
		||||
      RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
 | 
			
		||||
    AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
 | 
			
		||||
        FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
 | 
			
		||||
        _shift, _pm, _M5, 1.0, 0.0, p)
 | 
			
		||||
    {
 | 
			
		||||
        RealD eps = 1.0;
 | 
			
		||||
        Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
 | 
			
		||||
        assert(zdata->n == this->Ls);
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
 | 
			
		||||
        this->SetCoefficientsTanh(zdata, 1.0, 0.0);
 | 
			
		||||
 | 
			
		||||
        Approx::zolotarev_free(zdata);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /***************************************************************
 | 
			
		||||
    /* Additional EOFA operators only called outside the inverter.
 | 
			
		||||
    /* Since speed is not essential, simple axpby-style
 | 
			
		||||
    /* implementations should be fine.
 | 
			
		||||
    /***************************************************************/
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        Din = zero;
 | 
			
		||||
        if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
 | 
			
		||||
        else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
 | 
			
		||||
        else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
 | 
			
		||||
        else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // This is just the identity for DWF
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
 | 
			
		||||
 | 
			
		||||
    // This is just the identity for DWF
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
 | 
			
		||||
 | 
			
		||||
    /*****************************************************************************************************/
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
        this->Meooe5D(psi, Din);
 | 
			
		||||
        this->DW(Din, chi, DaggerNo);
 | 
			
		||||
        axpby(chi, 1.0, 1.0, chi, psi);
 | 
			
		||||
        this->M5D(psi, chi);
 | 
			
		||||
        return(norm2(chi));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
        this->DW(psi, Din, DaggerYes);
 | 
			
		||||
        this->MeooeDag5D(Din, chi);
 | 
			
		||||
        this->M5Ddag(psi, chi);
 | 
			
		||||
        axpby(chi, 1.0, 1.0, chi, psi);
 | 
			
		||||
        return(norm2(chi));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /********************************************************************
 | 
			
		||||
    /* Performance critical fermion operators called inside the inverter
 | 
			
		||||
    /********************************************************************/
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int   Ls    = this->Ls;
 | 
			
		||||
        int   pm    = this->pm;
 | 
			
		||||
        RealD shift = this->shift;
 | 
			
		||||
        RealD mq1   = this->mq1;
 | 
			
		||||
        RealD mq2   = this->mq2;
 | 
			
		||||
        RealD mq3   = this->mq3;
 | 
			
		||||
 | 
			
		||||
        // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
 | 
			
		||||
        Coeff_t shiftp(0.0), shiftm(0.0);
 | 
			
		||||
        if(shift != 0.0){
 | 
			
		||||
          if(pm == 1){ shiftp = shift*(mq3-mq2); }
 | 
			
		||||
          else{ shiftm = -shift*(mq3-mq2); }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls,-1.0); lower[0]    = mq1 + shiftp;
 | 
			
		||||
 | 
			
		||||
        #if(0)
 | 
			
		||||
            std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
 | 
			
		||||
            for(int i=0; i<diag.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<upper.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<lower.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        this->M5D(psi, chi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int   Ls    = this->Ls;
 | 
			
		||||
        int   pm    = this->pm;
 | 
			
		||||
        RealD shift = this->shift;
 | 
			
		||||
        RealD mq1   = this->mq1;
 | 
			
		||||
        RealD mq2   = this->mq2;
 | 
			
		||||
        RealD mq3   = this->mq3;
 | 
			
		||||
 | 
			
		||||
        // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
 | 
			
		||||
        Coeff_t shiftp(0.0), shiftm(0.0);
 | 
			
		||||
        if(shift != 0.0){
 | 
			
		||||
          if(pm == 1){ shiftp = shift*(mq3-mq2); }
 | 
			
		||||
          else{ shiftm = -shift*(mq3-mq2); }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls,-1.0); lower[0]    = mq1 + shiftm;
 | 
			
		||||
 | 
			
		||||
        #if(0)
 | 
			
		||||
            std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
 | 
			
		||||
            for(int i=0; i<diag.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<upper.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<lower.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        this->M5Ddag(psi, chi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // half checkerboard operations
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag = this->bee;
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
          upper[s] = -this->cee[s];
 | 
			
		||||
          lower[s] = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
        upper[Ls-1] = this->dm;
 | 
			
		||||
        lower[0]    = this->dp;
 | 
			
		||||
 | 
			
		||||
        this->M5D(psi, psi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag = this->bee;
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
          upper[s] = -this->cee[s];
 | 
			
		||||
          lower[s] = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
        upper[Ls-1] = this->dp;
 | 
			
		||||
        lower[0]    = this->dm;
 | 
			
		||||
 | 
			
		||||
        this->M5Ddag(psi, psi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /****************************************************************************************/
 | 
			
		||||
 | 
			
		||||
    //Zolo
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
 | 
			
		||||
    {
 | 
			
		||||
        int   Ls    = this->Ls;
 | 
			
		||||
        int   pm    = this->pm;
 | 
			
		||||
        RealD mq1   = this->mq1;
 | 
			
		||||
        RealD mq2   = this->mq2;
 | 
			
		||||
        RealD mq3   = this->mq3;
 | 
			
		||||
        RealD shift = this->shift;
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////////////////////////////////
 | 
			
		||||
        // Constants for the preconditioned matrix Cayley form
 | 
			
		||||
        ////////////////////////////////////////////////////////
 | 
			
		||||
        this->bs.resize(Ls);
 | 
			
		||||
        this->cs.resize(Ls);
 | 
			
		||||
        this->aee.resize(Ls);
 | 
			
		||||
        this->aeo.resize(Ls);
 | 
			
		||||
        this->bee.resize(Ls);
 | 
			
		||||
        this->beo.resize(Ls);
 | 
			
		||||
        this->cee.resize(Ls);
 | 
			
		||||
        this->ceo.resize(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int i=0; i<Ls; ++i){
 | 
			
		||||
          this->bee[i] = 4.0 - this->M5 + 1.0;
 | 
			
		||||
          this->cee[i] = 1.0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int i=0; i<Ls; ++i){
 | 
			
		||||
          this->aee[i] = this->cee[i];
 | 
			
		||||
          this->bs[i] = this->beo[i] = 1.0;
 | 
			
		||||
          this->cs[i] = this->ceo[i] = 0.0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        // EOFA shift terms
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        if(pm == 1){
 | 
			
		||||
          this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
 | 
			
		||||
          this->dm = mq1*this->cee[Ls-1];
 | 
			
		||||
        } else if(this->pm == -1) {
 | 
			
		||||
          this->dp = mq1*this->cee[0];
 | 
			
		||||
          this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
 | 
			
		||||
        } else {
 | 
			
		||||
          this->dp = mq1*this->cee[0];
 | 
			
		||||
          this->dm = mq1*this->cee[Ls-1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        // LDU decomposition of eeoo
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        this->dee.resize(Ls+1);
 | 
			
		||||
        this->lee.resize(Ls);
 | 
			
		||||
        this->leem.resize(Ls);
 | 
			
		||||
        this->uee.resize(Ls);
 | 
			
		||||
        this->ueem.resize(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int i=0; i<Ls; ++i){
 | 
			
		||||
 | 
			
		||||
          if(i < Ls-1){
 | 
			
		||||
 | 
			
		||||
            this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
 | 
			
		||||
 | 
			
		||||
            this->leem[i] = this->dm/this->bee[i];
 | 
			
		||||
            for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
 | 
			
		||||
 | 
			
		||||
            this->dee[i] = this->bee[i];
 | 
			
		||||
 | 
			
		||||
            this->uee[i] = -this->aee[i]/this->bee[i];   // up-diag entry on the ith row
 | 
			
		||||
 | 
			
		||||
            this->ueem[i] = this->dp / this->bee[0];
 | 
			
		||||
            for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
 | 
			
		||||
 | 
			
		||||
          } else {
 | 
			
		||||
 | 
			
		||||
            this->lee[i]  = 0.0;
 | 
			
		||||
            this->leem[i] = 0.0;
 | 
			
		||||
            this->uee[i]  = 0.0;
 | 
			
		||||
            this->ueem[i] = 0.0;
 | 
			
		||||
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        {
 | 
			
		||||
          Coeff_t delta_d = 1.0 / this->bee[0];
 | 
			
		||||
          for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
 | 
			
		||||
          this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
 | 
			
		||||
          this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        int inv = 1;
 | 
			
		||||
        this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
 | 
			
		||||
        this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Recompute Cayley-form coefficients for different shift
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
 | 
			
		||||
    {
 | 
			
		||||
        this->shift = new_shift;
 | 
			
		||||
        Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
 | 
			
		||||
        this->SetCoefficientsTanh(zdata, 1.0, 0.0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
 | 
			
		||||
        Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        GridBase* grid = this->FermionRedBlackGrid();
 | 
			
		||||
        int LLs = grid->_rdimensions[0];
 | 
			
		||||
 | 
			
		||||
        if(LLs == Ls){
 | 
			
		||||
            return; // Not vectorised in 5th direction
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXcd Pplus  = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
        Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
            Pplus(s,s)  = this->bee[s];
 | 
			
		||||
            Pminus(s,s) = this->bee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pminus(s,s+1) = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pplus(s+1,s) = -this->cee[s+1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Pplus (0,Ls-1) = this->dp;
 | 
			
		||||
        Pminus(Ls-1,0) = this->dm;
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXcd PplusMat ;
 | 
			
		||||
        Eigen::MatrixXcd PminusMat;
 | 
			
		||||
 | 
			
		||||
        #if(0)
 | 
			
		||||
            std::cout << GridLogMessage << "Pplus:" << std::endl;
 | 
			
		||||
            for(int s=0; s<Ls; ++s){
 | 
			
		||||
                for(int ss=0; ss<Ls; ++ss){
 | 
			
		||||
                    std::cout << Pplus(s,ss) << "\t";
 | 
			
		||||
                }
 | 
			
		||||
                std::cout << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            std::cout << GridLogMessage << "Pminus:" << std::endl;
 | 
			
		||||
            for(int s=0; s<Ls; ++s){
 | 
			
		||||
                for(int ss=0; ss<Ls; ++ss){
 | 
			
		||||
                    std::cout << Pminus(s,ss) << "\t";
 | 
			
		||||
                }
 | 
			
		||||
                std::cout << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        if(inv) {
 | 
			
		||||
            PplusMat  = Pplus.inverse();
 | 
			
		||||
            PminusMat = Pminus.inverse();
 | 
			
		||||
        } else {
 | 
			
		||||
            PplusMat  = Pplus;
 | 
			
		||||
            PminusMat = Pminus;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(dag){
 | 
			
		||||
            PplusMat.adjointInPlace();
 | 
			
		||||
            PminusMat.adjointInPlace();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        typedef typename SiteHalfSpinor::scalar_type scalar_type;
 | 
			
		||||
        const int Nsimd = Simd::Nsimd();
 | 
			
		||||
        Matp.resize(Ls*LLs);
 | 
			
		||||
        Matm.resize(Ls*LLs);
 | 
			
		||||
 | 
			
		||||
        for(int s2=0; s2<Ls; s2++){
 | 
			
		||||
        for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
            int istride = LLs;
 | 
			
		||||
            int ostride = 1;
 | 
			
		||||
            Simd Vp;
 | 
			
		||||
            Simd Vm;
 | 
			
		||||
            scalar_type *sp = (scalar_type*) &Vp;
 | 
			
		||||
            scalar_type *sm = (scalar_type*) &Vm;
 | 
			
		||||
            for(int l=0; l<Nsimd; l++){
 | 
			
		||||
                if(switcheroo<Coeff_t>::iscomplex()) {
 | 
			
		||||
                    sp[l] = PplusMat (l*istride+s1*ostride,s2);
 | 
			
		||||
                    sm[l] = PminusMat(l*istride+s1*ostride,s2);
 | 
			
		||||
                } else {
 | 
			
		||||
                    // if real
 | 
			
		||||
                    scalar_type tmp;
 | 
			
		||||
                    tmp = PplusMat (l*istride+s1*ostride,s2);
 | 
			
		||||
                    sp[l] = scalar_type(tmp.real(),tmp.real());
 | 
			
		||||
                    tmp = PminusMat(l*istride+s1*ostride,s2);
 | 
			
		||||
                    sm[l] = scalar_type(tmp.real(),tmp.real());
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            Matp[LLs*s2+s1] = Vp;
 | 
			
		||||
            Matm[LLs*s2+s1] = Vm;
 | 
			
		||||
        }}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    FermOpTemplateInstantiate(DomainWallEOFAFermion);
 | 
			
		||||
    GparityFermOpTemplateInstantiate(DomainWallEOFAFermion);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										115
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermion.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermion.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,115 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
 | 
			
		||||
#define  GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    // DJM: EOFA with (Shamir) domain wall fermions.
 | 
			
		||||
    // We overload and re-implement only the routines which have a different operator
 | 
			
		||||
    // structure than the CayleyFermion5D base class.
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
 | 
			
		||||
    {
 | 
			
		||||
        public:
 | 
			
		||||
          INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
        public:
 | 
			
		||||
          // Modified (0,Ls-1) and (Ls-1,0) elements of Mooee for red-black preconditioned Shamir EOFA
 | 
			
		||||
          Coeff_t dm;
 | 
			
		||||
          Coeff_t dp;
 | 
			
		||||
 | 
			
		||||
          virtual void Instantiatable(void) {};
 | 
			
		||||
 | 
			
		||||
          // EOFA specific operators
 | 
			
		||||
          virtual void   Omega      (const FermionField& in, FermionField &out, int sign, int dag);
 | 
			
		||||
          virtual void   Dtilde     (const FermionField& in, FermionField &out);
 | 
			
		||||
          virtual void   DtildeInv  (const FermionField& in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
          // override multiply
 | 
			
		||||
          virtual RealD  M          (const FermionField& in, FermionField& out);
 | 
			
		||||
          virtual RealD  Mdag       (const FermionField& in, FermionField& out);
 | 
			
		||||
 | 
			
		||||
          // half checkerboard operations
 | 
			
		||||
          virtual void   Mooee      (const FermionField &in, FermionField &out);
 | 
			
		||||
          virtual void   MooeeDag   (const FermionField &in, FermionField &out);
 | 
			
		||||
          virtual void   MooeeInv   (const FermionField &in, FermionField &out);
 | 
			
		||||
          virtual void   MooeeInvDag(const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
          virtual void   M5D        (const FermionField &psi, FermionField &chi);
 | 
			
		||||
          virtual void   M5Ddag     (const FermionField &psi, FermionField &chi);
 | 
			
		||||
 | 
			
		||||
          /////////////////////////////////////////////////////
 | 
			
		||||
          // Instantiate different versions depending on Impl
 | 
			
		||||
          /////////////////////////////////////////////////////
 | 
			
		||||
          void M5D                 (const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
                                        std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper);
 | 
			
		||||
          void M5Ddag              (const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
                                        std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper);
 | 
			
		||||
          void MooeeInternal       (const FermionField &in, FermionField &out, int dag, int inv);
 | 
			
		||||
          void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm);
 | 
			
		||||
          void MooeeInternalAsm    (const FermionField &in, FermionField &out, int LLs, int site,
 | 
			
		||||
                                        Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm);
 | 
			
		||||
          void MooeeInternalZAsm   (const FermionField &in, FermionField &out, int LLs, int site,
 | 
			
		||||
                                        Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm);
 | 
			
		||||
 | 
			
		||||
          virtual void RefreshShiftCoefficients(RealD new_shift);
 | 
			
		||||
 | 
			
		||||
          // Constructors
 | 
			
		||||
          DomainWallEOFAFermion(GaugeField&            _Umu,
 | 
			
		||||
                                GridCartesian&         FiveDimGrid,
 | 
			
		||||
                                GridRedBlackCartesian& FiveDimRedBlackGrid,
 | 
			
		||||
                                GridCartesian&         FourDimGrid,
 | 
			
		||||
                                GridRedBlackCartesian& FourDimRedBlackGrid,
 | 
			
		||||
                                RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift,
 | 
			
		||||
                                int pm, RealD _M5, const ImplParams& p=ImplParams());
 | 
			
		||||
 | 
			
		||||
        protected:
 | 
			
		||||
          virtual void SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t> &gamma, RealD b, RealD c);
 | 
			
		||||
    };
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
 | 
			
		||||
template void DomainWallEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,\
 | 
			
		||||
    std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
 | 
			
		||||
template void DomainWallEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,\
 | 
			
		||||
    std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
 | 
			
		||||
template void DomainWallEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void DomainWallEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
 | 
			
		||||
 | 
			
		||||
#undef  DOMAIN_WALL_EOFA_DPERP_DENSE
 | 
			
		||||
#define DOMAIN_WALL_EOFA_DPERP_CACHE
 | 
			
		||||
#undef  DOMAIN_WALL_EOFA_DPERP_LINALG
 | 
			
		||||
#define DOMAIN_WALL_EOFA_DPERP_VEC
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										248
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										248
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,248 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
 | 
			
		||||
    // Pminus fowards
 | 
			
		||||
    // Pplus  backwards..
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
        // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
            for(int s=0; s<Ls; s++){
 | 
			
		||||
                auto tmp = psi._odata[0];
 | 
			
		||||
                if(s==0) {
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else if(s==(Ls-1)) {
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else {
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
        chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
            auto tmp = psi._odata[0];
 | 
			
		||||
            for(int s=0; s<Ls; s++){
 | 
			
		||||
                if(s==0) {
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else if(s==(Ls-1)) {
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else {
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvCalls++;
 | 
			
		||||
        this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
 | 
			
		||||
            auto tmp1 = psi._odata[0];
 | 
			
		||||
            auto tmp2 = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
            // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls  = 12*Ls * (9) = 108*Ls flops
 | 
			
		||||
            // Apply (L^{\prime})^{-1}
 | 
			
		||||
            chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
            for(int s=1; s<Ls; s++){
 | 
			
		||||
                spProj5p(tmp1, chi[ss+s-1]);
 | 
			
		||||
                chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // L_m^{-1}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
                spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
                chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // U_m^{-1} D^{-1}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
                spProj5p(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
                chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
            spProj5m(tmp2, chi[ss+Ls-1]);
 | 
			
		||||
            chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2;
 | 
			
		||||
 | 
			
		||||
            // Apply U^{-1}
 | 
			
		||||
            for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
                spProj5m(tmp1, chi[ss+s+1]);
 | 
			
		||||
                chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvTime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        assert(psi.checkerboard == psi.checkerboard);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> ueec(Ls);
 | 
			
		||||
        std::vector<Coeff_t> deec(Ls+1);
 | 
			
		||||
        std::vector<Coeff_t> leec(Ls);
 | 
			
		||||
        std::vector<Coeff_t> ueemc(Ls);
 | 
			
		||||
        std::vector<Coeff_t> leemc(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<ueec.size(); s++){
 | 
			
		||||
            ueec[s]  = conjugate(this->uee[s]);
 | 
			
		||||
            deec[s]  = conjugate(this->dee[s]);
 | 
			
		||||
            leec[s]  = conjugate(this->lee[s]);
 | 
			
		||||
            ueemc[s] = conjugate(this->ueem[s]);
 | 
			
		||||
            leemc[s] = conjugate(this->leem[s]);
 | 
			
		||||
        }
 | 
			
		||||
        deec[Ls] = conjugate(this->dee[Ls]);
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvCalls++;
 | 
			
		||||
        this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
 | 
			
		||||
            auto tmp1 = psi._odata[0];
 | 
			
		||||
            auto tmp2 = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
            // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
            chi[ss] = psi[ss];
 | 
			
		||||
            for(int s=1; s<Ls; s++){
 | 
			
		||||
                spProj5m(tmp1, chi[ss+s-1]);
 | 
			
		||||
                chi[ss+s] = psi[ss+s] - ueec[s-1]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // U_m^{-\dagger}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){
 | 
			
		||||
                spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
                chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){
 | 
			
		||||
                spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
                chi[ss+s] = (1.0/deec[s])*chi[ss+s] - (leemc[s]/deec[Ls-1])*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
            spProj5p(tmp2, chi[ss+Ls-1]);
 | 
			
		||||
            chi[ss+Ls-1] = (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2;
 | 
			
		||||
 | 
			
		||||
            // Apply L^{-dagger}
 | 
			
		||||
            for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
                spProj5p(tmp1, chi[ss+s+1]);
 | 
			
		||||
                chi[ss+s] = chi[ss+s] - leec[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvTime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_CACHE
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										159
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										159
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,159 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid_Eigen_Dense.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    * Dense matrix versions of routines
 | 
			
		||||
    */
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
        int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        assert(Ls==LLs);
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
        Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0;s<Ls;s++){
 | 
			
		||||
            Pplus(s,s)  = this->bee[s];
 | 
			
		||||
            Pminus(s,s) = this->bee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pminus(s,s+1) = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pplus(s+1,s) = -this->cee[s+1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Pplus (0,Ls-1) = this->dp;
 | 
			
		||||
        Pminus(Ls-1,0) = this->dm;
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXd PplusMat ;
 | 
			
		||||
        Eigen::MatrixXd PminusMat;
 | 
			
		||||
 | 
			
		||||
        if(inv) {
 | 
			
		||||
            PplusMat  = Pplus.inverse();
 | 
			
		||||
            PminusMat = Pminus.inverse();
 | 
			
		||||
        } else {
 | 
			
		||||
            PplusMat  = Pplus;
 | 
			
		||||
            PminusMat = Pminus;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(dag){
 | 
			
		||||
            PplusMat.adjointInPlace();
 | 
			
		||||
            PminusMat.adjointInPlace();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // For the non-vectorised s-direction this is simple
 | 
			
		||||
 | 
			
		||||
        for(auto site=0; site<vol; site++){
 | 
			
		||||
 | 
			
		||||
            SiteSpinor     SiteChi;
 | 
			
		||||
            SiteHalfSpinor SitePplus;
 | 
			
		||||
            SiteHalfSpinor SitePminus;
 | 
			
		||||
 | 
			
		||||
            for(int s1=0; s1<Ls; s1++){
 | 
			
		||||
                SiteChi = zero;
 | 
			
		||||
                for(int s2=0; s2<Ls; s2++){
 | 
			
		||||
                    int lex2 = s2 + Ls*site;
 | 
			
		||||
                    if(PplusMat(s1,s2) != 0.0){
 | 
			
		||||
                        spProj5p(SitePplus,psi[lex2]);
 | 
			
		||||
                        accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
 | 
			
		||||
                    }
 | 
			
		||||
                    if(PminusMat(s1,s2) != 0.0){
 | 
			
		||||
                        spProj5m(SitePminus, psi[lex2]);
 | 
			
		||||
                        accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                chi[s1+Ls*site] = SiteChi*0.5;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										168
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										168
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,168 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
    // Pminus fowards
 | 
			
		||||
    // Pplus  backwards
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
            if(s==0) {
 | 
			
		||||
              axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
            } else if (s==(Ls-1)) {
 | 
			
		||||
              axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
              axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            } else {
 | 
			
		||||
              axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
            if(s==0) {
 | 
			
		||||
              axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
            } else if (s==(Ls-1)) {
 | 
			
		||||
              axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
              axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            } else {
 | 
			
		||||
              axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        Coeff_t czero(0.0);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
        // Apply (L^{\prime})^{-1}
 | 
			
		||||
        axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
        for(int s=1; s<Ls; s++){
 | 
			
		||||
            axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // L_m^{-1}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
            axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // U_m^{-1} D^{-1}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
 | 
			
		||||
        }
 | 
			
		||||
        axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
        // Apply U^{-1}
 | 
			
		||||
        for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
            axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1);  // chi[Ls]
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        Coeff_t czero(0.0);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
        // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
        axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
        for(int s=1; s<Ls; s++){
 | 
			
		||||
            axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // U_m^{-\dagger}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
 | 
			
		||||
        }
 | 
			
		||||
        axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
        // Apply L^{-dagger}
 | 
			
		||||
        for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
            axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1);  // chi[Ls]
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
							
								
								
									
										605
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										605
									
								
								lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,605 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    * Dense matrix versions of routines
 | 
			
		||||
    */
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls  = this->Ls;
 | 
			
		||||
        int LLs = grid->_rdimensions[0];
 | 
			
		||||
        const int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
        Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > d(LLs);
 | 
			
		||||
 | 
			
		||||
        assert(Ls/LLs == nsimd);
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        // just directly address via type pun
 | 
			
		||||
        typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
        scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
        scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
        scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
        for(int o=0;o<LLs;o++){ // outer
 | 
			
		||||
        for(int i=0;i<nsimd;i++){ //inner
 | 
			
		||||
            int s  = o + i*LLs;
 | 
			
		||||
            int ss = o*nsimd + i;
 | 
			
		||||
            u_p[ss] = upper[s];
 | 
			
		||||
            l_p[ss] = lower[s];
 | 
			
		||||
            d_p[ss] = diag[s];
 | 
			
		||||
        }}
 | 
			
		||||
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        assert(Nc == 3);
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
            #if 0
 | 
			
		||||
 | 
			
		||||
                alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
                alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
                alignas(64) SiteSpinor fp;
 | 
			
		||||
                alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
                for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                    int vp = (v+1)%LLs;
 | 
			
		||||
                    int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
                    spProj5m(hp, psi[ss+vp]);
 | 
			
		||||
                    spProj5p(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
                    if (vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
                    if (vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
                    hp = 0.5*hp;
 | 
			
		||||
                    hm = 0.5*hm;
 | 
			
		||||
 | 
			
		||||
                    spRecon5m(fp, hp);
 | 
			
		||||
                    spRecon5p(fm, hm);
 | 
			
		||||
 | 
			
		||||
                    chi[ss+v] = d[v]*phi[ss+v];
 | 
			
		||||
                    chi[ss+v] = chi[ss+v] + u[v]*fp;
 | 
			
		||||
                    chi[ss+v] = chi[ss+v] + l[v]*fm;
 | 
			
		||||
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
            #else
 | 
			
		||||
 | 
			
		||||
                for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                    vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
                    int vp = (v==LLs-1) ? 0     : v+1;
 | 
			
		||||
                    int vm = (v==0)     ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
                    Simd hp_00 = psi[ss+vp]()(2)(0);
 | 
			
		||||
                    Simd hp_01 = psi[ss+vp]()(2)(1);
 | 
			
		||||
                    Simd hp_02 = psi[ss+vp]()(2)(2);
 | 
			
		||||
                    Simd hp_10 = psi[ss+vp]()(3)(0);
 | 
			
		||||
                    Simd hp_11 = psi[ss+vp]()(3)(1);
 | 
			
		||||
                    Simd hp_12 = psi[ss+vp]()(3)(2);
 | 
			
		||||
 | 
			
		||||
                    Simd hm_00 = psi[ss+vm]()(0)(0);
 | 
			
		||||
                    Simd hm_01 = psi[ss+vm]()(0)(1);
 | 
			
		||||
                    Simd hm_02 = psi[ss+vm]()(0)(2);
 | 
			
		||||
                    Simd hm_10 = psi[ss+vm]()(1)(0);
 | 
			
		||||
                    Simd hm_11 = psi[ss+vm]()(1)(1);
 | 
			
		||||
                    Simd hm_12 = psi[ss+vm]()(1)(2);
 | 
			
		||||
 | 
			
		||||
                    if(vp <= v){
 | 
			
		||||
                        hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
                        hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
                        hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
                        hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
                        hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
                        hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    if(vm >= v){
 | 
			
		||||
                        hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
                        hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
                        hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
                        hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
                        hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
                        hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Can force these to real arithmetic and save 2x.
 | 
			
		||||
                    Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
                    Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
                    Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
                    Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
                    Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
                    Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
                    Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
                    Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
                    Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
                    Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
                    Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
                    Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
 | 
			
		||||
                    vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
                    vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
                    vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
                    vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
                    vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
                    vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
                    vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
                    vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
                    vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
                    vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
                    vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
                    vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
            #endif
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls  = this->Ls;
 | 
			
		||||
        int LLs = grid->_rdimensions[0];
 | 
			
		||||
        int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
        Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > d(LLs);
 | 
			
		||||
 | 
			
		||||
        assert(Ls/LLs == nsimd);
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        // just directly address via type pun
 | 
			
		||||
        typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
        scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
        scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
        scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
        for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
        for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
            int s  = o + i*LLs;
 | 
			
		||||
            int ss = o*nsimd + i;
 | 
			
		||||
            u_p[ss] = upper[s];
 | 
			
		||||
            l_p[ss] = lower[s];
 | 
			
		||||
            d_p[ss] = diag[s];
 | 
			
		||||
        }}
 | 
			
		||||
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
        #if 0
 | 
			
		||||
 | 
			
		||||
            alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
            alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
            alignas(64) SiteSpinor fp;
 | 
			
		||||
            alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
            for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                int vp = (v+1)%LLs;
 | 
			
		||||
                int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
                spProj5p(hp, psi[ss+vp]);
 | 
			
		||||
                spProj5m(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
                if(vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
                if(vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
                hp = hp*0.5;
 | 
			
		||||
                hm = hm*0.5;
 | 
			
		||||
                spRecon5p(fp, hp);
 | 
			
		||||
                spRecon5m(fm, hm);
 | 
			
		||||
 | 
			
		||||
                chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
 | 
			
		||||
                chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        #else
 | 
			
		||||
 | 
			
		||||
            for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
                int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
                int vm = (v == 0    ) ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
                Simd hp_00 = psi[ss+vp]()(0)(0);
 | 
			
		||||
                Simd hp_01 = psi[ss+vp]()(0)(1);
 | 
			
		||||
                Simd hp_02 = psi[ss+vp]()(0)(2);
 | 
			
		||||
                Simd hp_10 = psi[ss+vp]()(1)(0);
 | 
			
		||||
                Simd hp_11 = psi[ss+vp]()(1)(1);
 | 
			
		||||
                Simd hp_12 = psi[ss+vp]()(1)(2);
 | 
			
		||||
 | 
			
		||||
                Simd hm_00 = psi[ss+vm]()(2)(0);
 | 
			
		||||
                Simd hm_01 = psi[ss+vm]()(2)(1);
 | 
			
		||||
                Simd hm_02 = psi[ss+vm]()(2)(2);
 | 
			
		||||
                Simd hm_10 = psi[ss+vm]()(3)(0);
 | 
			
		||||
                Simd hm_11 = psi[ss+vm]()(3)(1);
 | 
			
		||||
                Simd hm_12 = psi[ss+vm]()(3)(2);
 | 
			
		||||
 | 
			
		||||
                if (vp <= v){
 | 
			
		||||
                    hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
                    hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
                    hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
                    hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
                    hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
                    hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if(vm >= v){
 | 
			
		||||
                    hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
                    hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
                    hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
                    hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
                    hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
                    hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
                Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
                Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
                Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
                Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
                Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
                Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
                Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
                Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
                Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
                Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
                Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
 | 
			
		||||
                vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
                vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
                vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
                vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
                vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
                vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
                vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
                vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
                vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
                vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
                vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
                vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef AVX512
 | 
			
		||||
        #include<simd/Intel512common.h>
 | 
			
		||||
        #include<simd/Intel512avx.h>
 | 
			
		||||
        #include<simd/Intel512single.h>
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
        int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
    {
 | 
			
		||||
        #ifndef AVX512
 | 
			
		||||
        {
 | 
			
		||||
            SiteHalfSpinor BcastP;
 | 
			
		||||
            SiteHalfSpinor BcastM;
 | 
			
		||||
            SiteHalfSpinor SiteChiP;
 | 
			
		||||
            SiteHalfSpinor SiteChiM;
 | 
			
		||||
 | 
			
		||||
            // Ls*Ls * 2 * 12 * vol flops
 | 
			
		||||
            for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
                for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
                for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
 | 
			
		||||
                    int s = s2 + l*LLs;
 | 
			
		||||
                    int lex = s2 + LLs*site;
 | 
			
		||||
 | 
			
		||||
                    if( s2==0 && l==0 ){
 | 
			
		||||
                        SiteChiP=zero;
 | 
			
		||||
                        SiteChiM=zero;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
 | 
			
		||||
                    }}
 | 
			
		||||
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
 | 
			
		||||
                    }}
 | 
			
		||||
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
 | 
			
		||||
                        SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
 | 
			
		||||
                    }}
 | 
			
		||||
                }}
 | 
			
		||||
 | 
			
		||||
                {
 | 
			
		||||
                    int lex = s1 + LLs*site;
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        vstream(chi[lex]()(sp)(co),   SiteChiP()(sp)(co));
 | 
			
		||||
                        vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
 | 
			
		||||
                    }}
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
        #else
 | 
			
		||||
        {
 | 
			
		||||
            // pointers
 | 
			
		||||
            //  MASK_REGS;
 | 
			
		||||
            #define Chi_00 %%zmm1
 | 
			
		||||
            #define Chi_01 %%zmm2
 | 
			
		||||
            #define Chi_02 %%zmm3
 | 
			
		||||
            #define Chi_10 %%zmm4
 | 
			
		||||
            #define Chi_11 %%zmm5
 | 
			
		||||
            #define Chi_12 %%zmm6
 | 
			
		||||
            #define Chi_20 %%zmm7
 | 
			
		||||
            #define Chi_21 %%zmm8
 | 
			
		||||
            #define Chi_22 %%zmm9
 | 
			
		||||
            #define Chi_30 %%zmm10
 | 
			
		||||
            #define Chi_31 %%zmm11
 | 
			
		||||
            #define Chi_32 %%zmm12
 | 
			
		||||
 | 
			
		||||
            #define BCAST0  %%zmm13
 | 
			
		||||
            #define BCAST1  %%zmm14
 | 
			
		||||
            #define BCAST2  %%zmm15
 | 
			
		||||
            #define BCAST3  %%zmm16
 | 
			
		||||
            #define BCAST4  %%zmm17
 | 
			
		||||
            #define BCAST5  %%zmm18
 | 
			
		||||
            #define BCAST6  %%zmm19
 | 
			
		||||
            #define BCAST7  %%zmm20
 | 
			
		||||
            #define BCAST8  %%zmm21
 | 
			
		||||
            #define BCAST9  %%zmm22
 | 
			
		||||
            #define BCAST10 %%zmm23
 | 
			
		||||
            #define BCAST11 %%zmm24
 | 
			
		||||
 | 
			
		||||
            int incr = LLs*LLs*sizeof(iSinglet<Simd>);
 | 
			
		||||
            for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
                for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
 | 
			
		||||
                    int lex = s2 + LLs*site;
 | 
			
		||||
                    uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
 | 
			
		||||
                    uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
 | 
			
		||||
                    uint64_t a2 = (uint64_t) &psi[lex];
 | 
			
		||||
 | 
			
		||||
                    for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
                        if((s2+l)==0) {
 | 
			
		||||
                            asm(
 | 
			
		||||
                                    VPREFETCH1(0,%2)              VPREFETCH1(0,%1)
 | 
			
		||||
                                    VPREFETCH1(12,%2)  	          VPREFETCH1(13,%2)
 | 
			
		||||
                                    VPREFETCH1(14,%2)  	          VPREFETCH1(15,%2)
 | 
			
		||||
                                    VBCASTCDUP(0,%2,BCAST0)
 | 
			
		||||
                                    VBCASTCDUP(1,%2,BCAST1)
 | 
			
		||||
                                    VBCASTCDUP(2,%2,BCAST2)
 | 
			
		||||
                                    VBCASTCDUP(3,%2,BCAST3)
 | 
			
		||||
                                    VBCASTCDUP(4,%2,BCAST4)       VMULMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                                    VBCASTCDUP(5,%2,BCAST5)       VMULMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                                    VBCASTCDUP(6,%2,BCAST6)       VMULMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                                    VBCASTCDUP(7,%2,BCAST7)       VMULMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                                    VBCASTCDUP(8,%2,BCAST8)       VMULMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                                    VBCASTCDUP(9,%2,BCAST9)       VMULMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                                    VBCASTCDUP(10,%2,BCAST10)     VMULMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                                    VBCASTCDUP(11,%2,BCAST11)     VMULMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                                    : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
                        } else {
 | 
			
		||||
                            asm(
 | 
			
		||||
                                    VBCASTCDUP(0,%2,BCAST0)   VMADDMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                                    VBCASTCDUP(1,%2,BCAST1)   VMADDMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                                    VBCASTCDUP(2,%2,BCAST2)   VMADDMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                                    VBCASTCDUP(3,%2,BCAST3)   VMADDMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                                    VBCASTCDUP(4,%2,BCAST4)   VMADDMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                                    VBCASTCDUP(5,%2,BCAST5)   VMADDMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                                    VBCASTCDUP(6,%2,BCAST6)   VMADDMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                                    VBCASTCDUP(7,%2,BCAST7)   VMADDMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                                    VBCASTCDUP(8,%2,BCAST8)   VMADDMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                                    VBCASTCDUP(9,%2,BCAST9)   VMADDMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                                    VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                                    VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                                    : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
                        }
 | 
			
		||||
                        a0 = a0 + incr;
 | 
			
		||||
                        a1 = a1 + incr;
 | 
			
		||||
                        a2 = a2 + sizeof(Simd::scalar_type);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                {
 | 
			
		||||
                  int lexa = s1+LLs*site;
 | 
			
		||||
                  asm (
 | 
			
		||||
                     VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01)  VSTORE(2 ,%0,Chi_02)
 | 
			
		||||
                     VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11)  VSTORE(5 ,%0,Chi_12)
 | 
			
		||||
                     VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21)  VSTORE(8 ,%0,Chi_22)
 | 
			
		||||
                     VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31)  VSTORE(11,%0,Chi_32)
 | 
			
		||||
                     : : "r" ((uint64_t)&chi[lexa]) : "memory" );
 | 
			
		||||
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        #undef Chi_00
 | 
			
		||||
        #undef Chi_01
 | 
			
		||||
        #undef Chi_02
 | 
			
		||||
        #undef Chi_10
 | 
			
		||||
        #undef Chi_11
 | 
			
		||||
        #undef Chi_12
 | 
			
		||||
        #undef Chi_20
 | 
			
		||||
        #undef Chi_21
 | 
			
		||||
        #undef Chi_22
 | 
			
		||||
        #undef Chi_30
 | 
			
		||||
        #undef Chi_31
 | 
			
		||||
        #undef Chi_32
 | 
			
		||||
 | 
			
		||||
        #undef BCAST0
 | 
			
		||||
        #undef BCAST1
 | 
			
		||||
        #undef BCAST2
 | 
			
		||||
        #undef BCAST3
 | 
			
		||||
        #undef BCAST4
 | 
			
		||||
        #undef BCAST5
 | 
			
		||||
        #undef BCAST6
 | 
			
		||||
        #undef BCAST7
 | 
			
		||||
        #undef BCAST8
 | 
			
		||||
        #undef BCAST9
 | 
			
		||||
        #undef BCAST10
 | 
			
		||||
        #undef BCAST11
 | 
			
		||||
        #endif
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Z-mobius version
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
        int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
    {
 | 
			
		||||
        std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
 | 
			
		||||
        exit(-1);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls  = this->Ls;
 | 
			
		||||
        int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
        int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        Vector<iSinglet<Simd> > Matp;
 | 
			
		||||
        Vector<iSinglet<Simd> > Matm;
 | 
			
		||||
        Vector<iSinglet<Simd> > *_Matp;
 | 
			
		||||
        Vector<iSinglet<Simd> > *_Matm;
 | 
			
		||||
 | 
			
		||||
        //  MooeeInternalCompute(dag,inv,Matp,Matm);
 | 
			
		||||
        if(inv && dag){
 | 
			
		||||
            _Matp = &this->MatpInvDag;
 | 
			
		||||
            _Matm = &this->MatmInvDag;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(inv && (!dag)){
 | 
			
		||||
            _Matp = &this->MatpInv;
 | 
			
		||||
            _Matm = &this->MatmInv;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(!inv){
 | 
			
		||||
            MooeeInternalCompute(dag, inv, Matp, Matm);
 | 
			
		||||
            _Matp = &Matp;
 | 
			
		||||
            _Matm = &Matm;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        assert(_Matp->size() == Ls*LLs);
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvCalls++;
 | 
			
		||||
        this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
        if(switcheroo<Coeff_t>::iscomplex()){
 | 
			
		||||
            parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
                MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
                MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvTime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_VEC
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -55,7 +55,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>     // Cayley types
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
 | 
			
		||||
@@ -113,6 +113,14 @@ typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
 | 
			
		||||
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
 | 
			
		||||
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
 | 
			
		||||
 | 
			
		||||
typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR;
 | 
			
		||||
typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF;
 | 
			
		||||
typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD;
 | 
			
		||||
 | 
			
		||||
typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL;
 | 
			
		||||
typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH;
 | 
			
		||||
typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF;
 | 
			
		||||
 | 
			
		||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
 | 
			
		||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
 | 
			
		||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
 | 
			
		||||
@@ -138,6 +146,14 @@ typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
 | 
			
		||||
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
 | 
			
		||||
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
 | 
			
		||||
 | 
			
		||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplR> DomainWallEOFAFermionVec5dR;
 | 
			
		||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplF> DomainWallEOFAFermionVec5dF;
 | 
			
		||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplD> DomainWallEOFAFermionVec5dD;
 | 
			
		||||
 | 
			
		||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplRL> DomainWallEOFAFermionVec5dRL;
 | 
			
		||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplFH> DomainWallEOFAFermionVec5dFH;
 | 
			
		||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplDF> DomainWallEOFAFermionVec5dDF;
 | 
			
		||||
 | 
			
		||||
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
 | 
			
		||||
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
 | 
			
		||||
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
 | 
			
		||||
@@ -206,6 +222,14 @@ typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
 | 
			
		||||
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
 | 
			
		||||
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
 | 
			
		||||
 | 
			
		||||
typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR;
 | 
			
		||||
typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF;
 | 
			
		||||
typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD;
 | 
			
		||||
 | 
			
		||||
typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL;
 | 
			
		||||
typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH;
 | 
			
		||||
typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF;
 | 
			
		||||
 | 
			
		||||
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
 | 
			
		||||
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
 | 
			
		||||
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
 | 
			
		||||
 
 | 
			
		||||
@@ -202,6 +202,39 @@ class DomainWallFermionModule: public FermionOperatorModule<DomainWallFermion, F
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DomainWallEOFAFermionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(DomainWallEOFAFermionParameters,
 | 
			
		||||
    RealD, mq1,
 | 
			
		||||
    RealD, mq2,
 | 
			
		||||
    RealD, mq3,
 | 
			
		||||
    RealD, shift,
 | 
			
		||||
    int, pm,
 | 
			
		||||
    RealD, M5,
 | 
			
		||||
    unsigned int, Ls);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class FermionImpl >
 | 
			
		||||
class DomainWallEOFAFermionModule: public FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> {
 | 
			
		||||
  typedef FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> FermBase;
 | 
			
		||||
  using FermBase::FermBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  virtual unsigned int Ls(){
 | 
			
		||||
    return this->Par_.Ls;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    auto GridMod = this->GridRefs[0];
 | 
			
		||||
    auto GridMod5d = this->GridRefs[1];
 | 
			
		||||
    typename FermionImpl::GaugeField U(GridMod->get_full());
 | 
			
		||||
    this->FOPtr.reset(new DomainWallEOFAFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
 | 
			
		||||
                                                      *(GridMod5d->get_full()), *(GridMod5d->get_rb()),
 | 
			
		||||
                                                      this->Par_.mq1, this->Par_.mq2, this->Par_.mq3,
 | 
			
		||||
                                                      this->Par_.shift, this->Par_.pm, this->Par_.M5));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
} // QCD
 | 
			
		||||
} // Grid
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										239
									
								
								tests/core/Test_dwf_eofa_even_odd.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										239
									
								
								tests/core/Test_dwf_eofa_even_odd.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,239 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./tests/core/Test_dwf_eofa_even_odd.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class d>
 | 
			
		||||
struct scal {
 | 
			
		||||
    d internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
Gamma::Algebra Gmu [] = {
 | 
			
		||||
    Gamma::Algebra::GammaX,
 | 
			
		||||
    Gamma::Algebra::GammaY,
 | 
			
		||||
    Gamma::Algebra::GammaZ,
 | 
			
		||||
    Gamma::Algebra::GammaT
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
    Grid_init(&argc, &argv);
 | 
			
		||||
 | 
			
		||||
    int threads = GridThread::GetThreads();
 | 
			
		||||
    std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
 | 
			
		||||
 | 
			
		||||
    const int Ls = 8;
 | 
			
		||||
    // GridCartesian*         UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
 | 
			
		||||
    GridCartesian*         UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
 | 
			
		||||
    GridCartesian*         FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
 | 
			
		||||
    GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
    GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
 | 
			
		||||
 | 
			
		||||
    std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
    std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
 | 
			
		||||
    GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
    GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
 | 
			
		||||
    LatticeFermion    src   (FGrid); random(RNG5, src);
 | 
			
		||||
    LatticeFermion    phi   (FGrid); random(RNG5, phi);
 | 
			
		||||
    LatticeFermion    chi   (FGrid); random(RNG5, chi);
 | 
			
		||||
    LatticeFermion    result(FGrid); result = zero;
 | 
			
		||||
    LatticeFermion    ref   (FGrid); ref = zero;
 | 
			
		||||
    LatticeFermion    tmp   (FGrid); tmp = zero;
 | 
			
		||||
    LatticeFermion    err   (FGrid); err = zero;
 | 
			
		||||
    LatticeGaugeField Umu   (UGrid); SU3::HotConfiguration(RNG4, Umu);
 | 
			
		||||
    std::vector<LatticeColourMatrix> U(4,UGrid);
 | 
			
		||||
 | 
			
		||||
    // Only one non-zero (y)
 | 
			
		||||
    Umu = zero;
 | 
			
		||||
    for(int nn=0; nn<Nd; nn++){
 | 
			
		||||
        random(RNG4, U[nn]);
 | 
			
		||||
        if(nn>0){ U[nn] = zero; }
 | 
			
		||||
        PokeIndex<LorentzIndex>(Umu, U[nn], nn);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    RealD mq1   = 0.1;
 | 
			
		||||
    RealD mq2   = 0.5;
 | 
			
		||||
    RealD mq3   = 1.0;
 | 
			
		||||
    RealD shift = 0.1234;
 | 
			
		||||
    RealD M5    = 1.8;
 | 
			
		||||
    int   pm    = 1;
 | 
			
		||||
    DomainWallEOFAFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mq1, mq2, mq3, shift, pm, M5);
 | 
			
		||||
 | 
			
		||||
    LatticeFermion src_e (FrbGrid);
 | 
			
		||||
    LatticeFermion src_o (FrbGrid);
 | 
			
		||||
    LatticeFermion r_e   (FrbGrid);
 | 
			
		||||
    LatticeFermion r_o   (FrbGrid);
 | 
			
		||||
    LatticeFermion r_eo  (FGrid);
 | 
			
		||||
    LatticeFermion r_eeoo(FGrid);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "==========================================================" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "= Testing that Meo + Moe + Moo + Mee = Munprec " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "==========================================================" << std::endl;
 | 
			
		||||
 | 
			
		||||
    pickCheckerboard(Even, src_e, src);
 | 
			
		||||
    pickCheckerboard(Odd,  src_o, src);
 | 
			
		||||
 | 
			
		||||
    Ddwf.Meooe(src_e, r_o); std::cout << GridLogMessage << "Applied Meo" << std::endl;
 | 
			
		||||
    Ddwf.Meooe(src_o, r_e); std::cout << GridLogMessage << "Applied Moe" << std::endl;
 | 
			
		||||
    setCheckerboard(r_eo, r_o);
 | 
			
		||||
    setCheckerboard(r_eo, r_e);
 | 
			
		||||
 | 
			
		||||
    Ddwf.Mooee(src_e, r_e); std::cout << GridLogMessage << "Applied Mee" << std::endl;
 | 
			
		||||
    Ddwf.Mooee(src_o, r_o); std::cout << GridLogMessage << "Applied Moo" << std::endl;
 | 
			
		||||
    setCheckerboard(r_eeoo, r_e);
 | 
			
		||||
    setCheckerboard(r_eeoo, r_o);
 | 
			
		||||
 | 
			
		||||
    r_eo = r_eo + r_eeoo;
 | 
			
		||||
    Ddwf.M(src, ref);
 | 
			
		||||
 | 
			
		||||
    // std::cout << GridLogMessage << r_eo << std::endl;
 | 
			
		||||
    // std::cout << GridLogMessage << ref  << std::endl;
 | 
			
		||||
 | 
			
		||||
    err = ref - r_eo;
 | 
			
		||||
    std::cout << GridLogMessage << "EO norm diff   " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl;
 | 
			
		||||
 | 
			
		||||
    LatticeComplex cerr(FGrid);
 | 
			
		||||
    cerr = localInnerProduct(err,err);
 | 
			
		||||
    // std::cout << GridLogMessage << cerr << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring                " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "=  < phi | Deo | chi > * = < chi | Deo^dag| phi>  " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
 | 
			
		||||
    LatticeFermion chi_e (FrbGrid);
 | 
			
		||||
    LatticeFermion chi_o (FrbGrid);
 | 
			
		||||
 | 
			
		||||
    LatticeFermion dchi_e(FrbGrid);
 | 
			
		||||
    LatticeFermion dchi_o(FrbGrid);
 | 
			
		||||
 | 
			
		||||
    LatticeFermion phi_e (FrbGrid);
 | 
			
		||||
    LatticeFermion phi_o (FrbGrid);
 | 
			
		||||
 | 
			
		||||
    LatticeFermion dphi_e(FrbGrid);
 | 
			
		||||
    LatticeFermion dphi_o(FrbGrid);
 | 
			
		||||
 | 
			
		||||
    pickCheckerboard(Even, chi_e, chi);
 | 
			
		||||
    pickCheckerboard(Odd , chi_o, chi);
 | 
			
		||||
    pickCheckerboard(Even, phi_e, phi);
 | 
			
		||||
    pickCheckerboard(Odd , phi_o, phi);
 | 
			
		||||
 | 
			
		||||
    Ddwf.Meooe   (chi_e, dchi_o);
 | 
			
		||||
    Ddwf.Meooe   (chi_o, dchi_e);
 | 
			
		||||
    Ddwf.MeooeDag(phi_e, dphi_o);
 | 
			
		||||
    Ddwf.MeooeDag(phi_o, dphi_e);
 | 
			
		||||
 | 
			
		||||
    ComplexD pDce = innerProduct(phi_e, dchi_e);
 | 
			
		||||
    ComplexD pDco = innerProduct(phi_o, dchi_o);
 | 
			
		||||
    ComplexD cDpe = innerProduct(chi_e, dphi_e);
 | 
			
		||||
    ComplexD cDpo = innerProduct(chi_o, dphi_o);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce-conj(cDpo) << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco-conj(cDpe) << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "= Test MeeInv Mee = 1                                         " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
 | 
			
		||||
    pickCheckerboard(Even, chi_e, chi);
 | 
			
		||||
    pickCheckerboard(Odd , chi_o, chi);
 | 
			
		||||
 | 
			
		||||
    Ddwf.Mooee   (chi_e, src_e);
 | 
			
		||||
    Ddwf.MooeeInv(src_e, phi_e);
 | 
			
		||||
 | 
			
		||||
    Ddwf.Mooee   (chi_o, src_o);
 | 
			
		||||
    Ddwf.MooeeInv(src_o, phi_o);
 | 
			
		||||
 | 
			
		||||
    setCheckerboard(phi, phi_e);
 | 
			
		||||
    setCheckerboard(phi, phi_o);
 | 
			
		||||
 | 
			
		||||
    err = phi - chi;
 | 
			
		||||
    std::cout << GridLogMessage << "norm diff   " << norm2(err) << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "= Test MeeInvDag MeeDag = 1                                   " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
 | 
			
		||||
    pickCheckerboard(Even, chi_e, chi);
 | 
			
		||||
    pickCheckerboard(Odd , chi_o, chi);
 | 
			
		||||
 | 
			
		||||
    Ddwf.MooeeDag   (chi_e, src_e);
 | 
			
		||||
    Ddwf.MooeeInvDag(src_e, phi_e);
 | 
			
		||||
 | 
			
		||||
    Ddwf.MooeeDag   (chi_o, src_o);
 | 
			
		||||
    Ddwf.MooeeInvDag(src_o, phi_o);
 | 
			
		||||
 | 
			
		||||
    setCheckerboard(phi, phi_e);
 | 
			
		||||
    setCheckerboard(phi, phi_o);
 | 
			
		||||
 | 
			
		||||
    err = phi - chi;
 | 
			
		||||
    std::cout << GridLogMessage << "norm diff   " << norm2(err) << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "= Test MpcDagMpc is Hermitian              " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "==============================================================" << std::endl;
 | 
			
		||||
 | 
			
		||||
    random(RNG5, phi);
 | 
			
		||||
    random(RNG5, chi);
 | 
			
		||||
    pickCheckerboard(Even, chi_e, chi);
 | 
			
		||||
    pickCheckerboard(Odd , chi_o, chi);
 | 
			
		||||
    pickCheckerboard(Even, phi_e, phi);
 | 
			
		||||
    pickCheckerboard(Odd , phi_o, phi);
 | 
			
		||||
    RealD t1,t2;
 | 
			
		||||
 | 
			
		||||
    SchurDiagMooeeOperator<DomainWallEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
 | 
			
		||||
    HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2);
 | 
			
		||||
    HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2);
 | 
			
		||||
 | 
			
		||||
    HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2);
 | 
			
		||||
    HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2);
 | 
			
		||||
 | 
			
		||||
    pDce = innerProduct(phi_e, dchi_e);
 | 
			
		||||
    pDco = innerProduct(phi_o, dchi_o);
 | 
			
		||||
    cDpe = innerProduct(chi_e, dphi_e);
 | 
			
		||||
    cDpo = innerProduct(chi_o, dphi_o);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDco-conj(cDpo) << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDce-conj(cDpe) << std::endl;
 | 
			
		||||
 | 
			
		||||
    Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										206
									
								
								tests/debug/Test_reweight_dwf_eofa.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										206
									
								
								tests/debug/Test_reweight_dwf_eofa.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,206 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./tests/debug/Test_reweight_dwf_eofa.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
// parameters for test
 | 
			
		||||
const std::vector<int> grid_dim = { 8, 8, 8, 8 };
 | 
			
		||||
const int Ls = 8;
 | 
			
		||||
const int Nhits = 25;
 | 
			
		||||
const int max_iter = 5000;
 | 
			
		||||
const RealD mf = 0.1;
 | 
			
		||||
const RealD mb = 0.11;
 | 
			
		||||
const RealD M5 = 1.8;
 | 
			
		||||
const RealD stop_tol = 1.0e-12;
 | 
			
		||||
 | 
			
		||||
RealD mean(const std::vector<RealD>& data)
 | 
			
		||||
{
 | 
			
		||||
    int N = data.size();
 | 
			
		||||
    RealD mean(0.0);
 | 
			
		||||
    for(int i=0; i<N; ++i){ mean += data[i]; }
 | 
			
		||||
    return mean/RealD(N);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RealD jack_mean(const std::vector<RealD>& data, int sample)
 | 
			
		||||
{
 | 
			
		||||
    int N = data.size();
 | 
			
		||||
    RealD mean(0.0);
 | 
			
		||||
    for(int i=0; i<N; ++i){ if(i != sample){ mean += data[i]; } }
 | 
			
		||||
    return mean/RealD(N-1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RealD jack_std(const std::vector<RealD>& jacks, RealD mean)
 | 
			
		||||
{
 | 
			
		||||
    int N = jacks.size();
 | 
			
		||||
    RealD std(0.0);
 | 
			
		||||
    for(int i=0; i<N; ++i){ std += std::pow(jacks[i]-mean, 2.0); }
 | 
			
		||||
    return std::sqrt(RealD(N-1)/RealD(N)*std);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<RealD> jack_stats(const std::vector<RealD>& data)
 | 
			
		||||
{
 | 
			
		||||
    int N = data.size();
 | 
			
		||||
    std::vector<RealD> jack_samples(N);
 | 
			
		||||
    std::vector<RealD> jack_stats(2);
 | 
			
		||||
 | 
			
		||||
    jack_stats[0] = mean(data);
 | 
			
		||||
    for(int i=0; i<N; i++){ jack_samples[i] = jack_mean(data,i); }
 | 
			
		||||
    jack_stats[1] = jack_std(jack_samples, jack_stats[0]);
 | 
			
		||||
    return jack_stats;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char **argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc, &argv);
 | 
			
		||||
 | 
			
		||||
  // Initialize spacetime grid
 | 
			
		||||
  std::cout << GridLogMessage << "Lattice dimensions: "
 | 
			
		||||
    << grid_dim << "   Ls: " << Ls << std::endl;
 | 
			
		||||
  GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(grid_dim,
 | 
			
		||||
      GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
 | 
			
		||||
  GridRedBlackCartesian* FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
 | 
			
		||||
 | 
			
		||||
  // Set up RNGs
 | 
			
		||||
  std::vector<int> seeds4({1, 2, 3, 4});
 | 
			
		||||
  std::vector<int> seeds5({5, 6, 7, 8});
 | 
			
		||||
  GridParallelRNG RNG5(FGrid);
 | 
			
		||||
  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  GridParallelRNG RNG4(UGrid);
 | 
			
		||||
  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
 | 
			
		||||
  // Random gauge field
 | 
			
		||||
  LatticeGaugeField Umu(UGrid);
 | 
			
		||||
  SU3::HotConfiguration(RNG4, Umu);
 | 
			
		||||
 | 
			
		||||
  // Initialize RHMC fermion operators
 | 
			
		||||
  DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5);
 | 
			
		||||
  DomainWallFermionR Ddwf_b(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, M5);
 | 
			
		||||
  SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> MdagM(Ddwf_f);
 | 
			
		||||
  SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> VdagV(Ddwf_b);
 | 
			
		||||
 | 
			
		||||
  // Degree 12 rational approximations to x^(1/4) and x^(-1/4)
 | 
			
		||||
  double     lo = 0.0001;
 | 
			
		||||
  double     hi = 95.0;
 | 
			
		||||
  int precision = 64;
 | 
			
		||||
  int    degree = 12;
 | 
			
		||||
  AlgRemez remez(lo, hi, precision);
 | 
			
		||||
  std::cout << GridLogMessage << "Generating degree " << degree << " for x^(1/4)" << std::endl;
 | 
			
		||||
  remez.generateApprox(degree, 1, 4);
 | 
			
		||||
  MultiShiftFunction PowerQuarter(remez, stop_tol, false);
 | 
			
		||||
  MultiShiftFunction PowerNegQuarter(remez, stop_tol, true);
 | 
			
		||||
 | 
			
		||||
  // Stochastically estimate reweighting factor via RHMC
 | 
			
		||||
  RealD scale = std::sqrt(0.5);
 | 
			
		||||
  std::vector<RealD> rw_rhmc(Nhits);
 | 
			
		||||
  ConjugateGradientMultiShift<LatticeFermion> msCG_V(max_iter, PowerQuarter);
 | 
			
		||||
  ConjugateGradientMultiShift<LatticeFermion> msCG_M(max_iter, PowerNegQuarter);
 | 
			
		||||
  std::cout.precision(12);
 | 
			
		||||
 | 
			
		||||
  for(int hit=0; hit<Nhits; hit++){
 | 
			
		||||
 | 
			
		||||
    // Gaussian source
 | 
			
		||||
    LatticeFermion Phi    (Ddwf_f.FermionGrid());
 | 
			
		||||
    LatticeFermion PhiOdd (Ddwf_f.FermionRedBlackGrid());
 | 
			
		||||
    std::vector<LatticeFermion> tmp(2, Ddwf_f.FermionRedBlackGrid());
 | 
			
		||||
    gaussian(RNG5, Phi);
 | 
			
		||||
    Phi = Phi*scale;
 | 
			
		||||
 | 
			
		||||
    pickCheckerboard(Odd, PhiOdd, Phi);
 | 
			
		||||
 | 
			
		||||
    // evaluate -log(rw)
 | 
			
		||||
    msCG_V(VdagV, PhiOdd, tmp[0]);
 | 
			
		||||
    msCG_M(MdagM, tmp[0], tmp[1]);
 | 
			
		||||
    rw_rhmc[hit] = norm2(tmp[1]) - norm2(PhiOdd);
 | 
			
		||||
    std::cout << std::endl << "==================================================" << std::endl;
 | 
			
		||||
    std::cout << " --- RHMC: Hit " << hit << ": rw = " << rw_rhmc[hit];
 | 
			
		||||
    std::cout << std::endl << "==================================================" << std::endl << std::endl;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Initialize EOFA fermion operators
 | 
			
		||||
  RealD shift_L = 0.0;
 | 
			
		||||
  RealD shift_R = -1.0;
 | 
			
		||||
  int pm = 1;
 | 
			
		||||
  DomainWallEOFAFermionR Deofa_L(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mb, shift_L, pm, M5);
 | 
			
		||||
  DomainWallEOFAFermionR Deofa_R(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, shift_R, pm, M5);
 | 
			
		||||
  MdagMLinearOperator<DomainWallEOFAFermionR, LatticeFermion> LdagL(Deofa_L);
 | 
			
		||||
  MdagMLinearOperator<DomainWallEOFAFermionR, LatticeFermion> RdagR(Deofa_R);
 | 
			
		||||
 | 
			
		||||
  // Stochastically estimate reweighting factor via EOFA
 | 
			
		||||
  RealD k = Deofa_L.k;
 | 
			
		||||
  std::vector<RealD> rw_eofa(Nhits);
 | 
			
		||||
  ConjugateGradient<LatticeFermion> CG(stop_tol, max_iter);
 | 
			
		||||
  SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
 | 
			
		||||
 | 
			
		||||
  for(int hit=0; hit<Nhits; hit++){
 | 
			
		||||
 | 
			
		||||
    // Gaussian source
 | 
			
		||||
    LatticeFermion Phi       (Deofa_L.FermionGrid());
 | 
			
		||||
    LatticeFermion spProj_Phi(Deofa_L.FermionGrid());
 | 
			
		||||
    std::vector<LatticeFermion> tmp(2, Deofa_L.FermionGrid());
 | 
			
		||||
    gaussian(RNG5, Phi);
 | 
			
		||||
    Phi = Phi*scale;
 | 
			
		||||
 | 
			
		||||
    // evaluate -log(rw)
 | 
			
		||||
    // LH term
 | 
			
		||||
    for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
 | 
			
		||||
    Deofa_L.Omega(spProj_Phi, tmp[0], -1, 0);
 | 
			
		||||
    G5R5(tmp[1], tmp[0]);
 | 
			
		||||
    tmp[0] = zero;
 | 
			
		||||
    SchurSolver(Deofa_L, tmp[1], tmp[0]);
 | 
			
		||||
    Deofa_L.Omega(tmp[0], tmp[1], -1, 1);
 | 
			
		||||
    rw_eofa[hit] = -k*innerProduct(spProj_Phi,tmp[1]).real();
 | 
			
		||||
 | 
			
		||||
    // RH term
 | 
			
		||||
    for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(spProj_Phi, 0.0, Phi, 1.0, Phi, s, s); }
 | 
			
		||||
    Deofa_R.Omega(spProj_Phi, tmp[0], 1, 0);
 | 
			
		||||
    G5R5(tmp[1], tmp[0]);
 | 
			
		||||
    tmp[0] = zero;
 | 
			
		||||
    SchurSolver(Deofa_R, tmp[1], tmp[0]);
 | 
			
		||||
    Deofa_R.Omega(tmp[0], tmp[1], 1, 1);
 | 
			
		||||
    rw_eofa[hit] += k*innerProduct(spProj_Phi,tmp[1]).real();
 | 
			
		||||
    std::cout << std::endl << "==================================================" << std::endl;
 | 
			
		||||
    std::cout << " --- EOFA: Hit " << hit << ": rw = " << rw_eofa[hit];
 | 
			
		||||
    std::cout << std::endl << "==================================================" << std::endl << std::endl;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::vector<RealD> rhmc_result = jack_stats(rw_rhmc);
 | 
			
		||||
  std::vector<RealD> eofa_result = jack_stats(rw_eofa);
 | 
			
		||||
  std::cout << std::endl << "RHMC: rw = " << rhmc_result[0] << " +/- " << rhmc_result[1] << std::endl;
 | 
			
		||||
  std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl;
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user