mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
Merge branch 'develop' into feature/hadrons
# Conflicts: # lib/qcd/action/fermion/FermionOperatorImpl.h
This commit is contained in:
100
lib/qcd/action/fermion/AbstractEOFAFermion.h
Normal file
100
lib/qcd/action/fermion/AbstractEOFAFermion.h
Normal file
@ -0,0 +1,100 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/AbstractEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H
|
||||
#define GRID_QCD_ABSTRACT_EOFA_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// DJM: Abstract base class for EOFA fermion types.
|
||||
// Defines layout of additional EOFA-specific parameters and operators.
|
||||
// Use to construct EOFA pseudofermion actions that are agnostic to
|
||||
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
|
||||
// pseudofermion action with non-EOFA fermion type.
|
||||
template<class Impl>
|
||||
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
|
||||
RealD mq1;
|
||||
RealD mq2;
|
||||
RealD mq3;
|
||||
RealD shift;
|
||||
int pm;
|
||||
|
||||
RealD alpha; // Mobius scale
|
||||
RealD k; // EOFA normalization constant
|
||||
|
||||
virtual void Instantiatable(void) = 0;
|
||||
|
||||
// EOFA-specific operations
|
||||
// Force user to implement in derived classes
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
|
||||
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
|
||||
|
||||
// Implement derivatives in base class:
|
||||
// for EOFA both DWF and Mobius just need d(Dw)/dU
|
||||
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDeriv(mat, U, V, dag);
|
||||
};
|
||||
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDerivOE(mat, U, V, dag);
|
||||
};
|
||||
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDerivEO(mat, U, V, dag);
|
||||
};
|
||||
|
||||
// Recompute 5D coefficients for different value of shift constant
|
||||
// (needed for heatbath loop over poles)
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
|
||||
|
||||
// Constructors
|
||||
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
|
||||
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
|
||||
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
|
||||
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
this->alpha = _b + _c;
|
||||
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
|
||||
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
|
||||
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
|
||||
};
|
||||
};
|
||||
}}
|
||||
|
||||
#endif
|
@ -77,7 +77,6 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
|
||||
{
|
||||
this->Report();
|
||||
@ -119,7 +118,6 @@ template<class Impl> void CayleyFermion5D<Impl>::CayleyZeroCounters(void)
|
||||
MooeeInvTime=0;
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.h
|
||||
|
||||
@ -35,24 +35,24 @@ namespace Grid {
|
||||
|
||||
namespace QCD {
|
||||
|
||||
template<typename T> struct switcheroo {
|
||||
static inline int iscomplex() { return 0; }
|
||||
template<typename T> struct switcheroo {
|
||||
static inline int iscomplex() { return 0; }
|
||||
|
||||
template<class vec>
|
||||
static inline vec mult(vec a, vec b) {
|
||||
return real_mult(a,b);
|
||||
}
|
||||
};
|
||||
template<> struct switcheroo<ComplexD> {
|
||||
static inline int iscomplex() { return 1; }
|
||||
template<> struct switcheroo<ComplexD> {
|
||||
static inline int iscomplex() { return 1; }
|
||||
|
||||
template<class vec>
|
||||
static inline vec mult(vec a, vec b) {
|
||||
return a*b;
|
||||
}
|
||||
};
|
||||
template<> struct switcheroo<ComplexF> {
|
||||
static inline int iscomplex() { return 1; }
|
||||
template<> struct switcheroo<ComplexF> {
|
||||
static inline int iscomplex() { return 1; }
|
||||
template<class vec>
|
||||
static inline vec mult(vec a, vec b) {
|
||||
return a*b;
|
||||
@ -90,14 +90,14 @@ namespace Grid {
|
||||
// Instantiate different versions depending on Impl
|
||||
/////////////////////////////////////////////////////
|
||||
void M5D(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
std::vector<Coeff_t> &lower,
|
||||
std::vector<Coeff_t> &diag,
|
||||
std::vector<Coeff_t> &upper);
|
||||
|
||||
void M5Ddag(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
std::vector<Coeff_t> &lower,
|
||||
std::vector<Coeff_t> &diag,
|
||||
@ -125,7 +125,7 @@ namespace Grid {
|
||||
|
||||
// Efficient support for multigrid coarsening
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
|
||||
|
||||
|
||||
void Meooe5D (const FermionField &in, FermionField &out);
|
||||
void MeooeDag5D (const FermionField &in, FermionField &out);
|
||||
|
||||
@ -133,23 +133,23 @@ namespace Grid {
|
||||
RealD mass;
|
||||
|
||||
// Cayley form Moebius (tanh and zolotarev)
|
||||
std::vector<Coeff_t> omega;
|
||||
std::vector<Coeff_t> omega;
|
||||
std::vector<Coeff_t> bs; // S dependent coeffs
|
||||
std::vector<Coeff_t> cs;
|
||||
std::vector<Coeff_t> as;
|
||||
std::vector<Coeff_t> cs;
|
||||
std::vector<Coeff_t> as;
|
||||
// For preconditioning Cayley form
|
||||
std::vector<Coeff_t> bee;
|
||||
std::vector<Coeff_t> cee;
|
||||
std::vector<Coeff_t> aee;
|
||||
std::vector<Coeff_t> beo;
|
||||
std::vector<Coeff_t> ceo;
|
||||
std::vector<Coeff_t> aeo;
|
||||
std::vector<Coeff_t> bee;
|
||||
std::vector<Coeff_t> cee;
|
||||
std::vector<Coeff_t> aee;
|
||||
std::vector<Coeff_t> beo;
|
||||
std::vector<Coeff_t> ceo;
|
||||
std::vector<Coeff_t> aeo;
|
||||
// LDU factorisation of the eeoo matrix
|
||||
std::vector<Coeff_t> lee;
|
||||
std::vector<Coeff_t> leem;
|
||||
std::vector<Coeff_t> uee;
|
||||
std::vector<Coeff_t> ueem;
|
||||
std::vector<Coeff_t> dee;
|
||||
std::vector<Coeff_t> lee;
|
||||
std::vector<Coeff_t> leem;
|
||||
std::vector<Coeff_t> uee;
|
||||
std::vector<Coeff_t> ueem;
|
||||
std::vector<Coeff_t> dee;
|
||||
|
||||
// Matrices of 5d ee inverse params
|
||||
Vector<iSinglet<Simd> > MatpInv;
|
||||
@ -165,7 +165,7 @@ namespace Grid {
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
||||
|
||||
|
||||
|
||||
|
||||
void CayleyReport(void);
|
||||
void CayleyZeroCounters(void);
|
||||
@ -179,9 +179,9 @@ namespace Grid {
|
||||
double MooeeInvTime;
|
||||
|
||||
protected:
|
||||
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
|
||||
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||
virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||
virtual void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
|
||||
};
|
||||
|
||||
}
|
||||
|
438
lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Normal file
438
lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Normal file
@ -0,0 +1,438 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
|
||||
GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3,
|
||||
RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
|
||||
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
|
||||
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
|
||||
_shift, _pm, _M5, 1.0, 0.0, p)
|
||||
{
|
||||
RealD eps = 1.0;
|
||||
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
|
||||
assert(zdata->n == this->Ls);
|
||||
|
||||
std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
|
||||
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
|
||||
|
||||
Approx::zolotarev_free(zdata);
|
||||
}
|
||||
|
||||
/***************************************************************
|
||||
/* Additional EOFA operators only called outside the inverter.
|
||||
/* Since speed is not essential, simple axpby-style
|
||||
/* implementations should be fine.
|
||||
/***************************************************************/
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
Din = zero;
|
||||
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
|
||||
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
|
||||
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
|
||||
else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
|
||||
}
|
||||
|
||||
// This is just the identity for DWF
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
|
||||
|
||||
// This is just the identity for DWF
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
|
||||
|
||||
/*****************************************************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->Meooe5D(psi, Din);
|
||||
this->DW(Din, chi, DaggerNo);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
this->M5D(psi, chi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->DW(psi, Din, DaggerYes);
|
||||
this->MeooeDag5D(Din, chi);
|
||||
this->M5Ddag(psi, chi);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
/* Performance critical fermion operators called inside the inverter
|
||||
/********************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD shift = this->shift;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD mq2 = this->mq2;
|
||||
RealD mq3 = this->mq3;
|
||||
|
||||
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
|
||||
Coeff_t shiftp(0.0), shiftm(0.0);
|
||||
if(shift != 0.0){
|
||||
if(pm == 1){ shiftp = shift*(mq3-mq2); }
|
||||
else{ shiftm = -shift*(mq3-mq2); }
|
||||
}
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftp;
|
||||
|
||||
#if(0)
|
||||
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
|
||||
for(int i=0; i<diag.size(); ++i){
|
||||
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<upper.size(); ++i){
|
||||
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<lower.size(); ++i){
|
||||
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
this->M5D(psi, chi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD shift = this->shift;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD mq2 = this->mq2;
|
||||
RealD mq3 = this->mq3;
|
||||
|
||||
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
|
||||
Coeff_t shiftp(0.0), shiftm(0.0);
|
||||
if(shift != 0.0){
|
||||
if(pm == 1){ shiftp = shift*(mq3-mq2); }
|
||||
else{ shiftm = -shift*(mq3-mq2); }
|
||||
}
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
|
||||
|
||||
#if(0)
|
||||
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
|
||||
for(int i=0; i<diag.size(); ++i){
|
||||
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<upper.size(); ++i){
|
||||
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<lower.size(); ++i){
|
||||
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
this->M5Ddag(psi, chi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
// half checkerboard operations
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
upper[s] = -this->cee[s];
|
||||
lower[s] = -this->cee[s];
|
||||
}
|
||||
upper[Ls-1] = this->dm;
|
||||
lower[0] = this->dp;
|
||||
|
||||
this->M5D(psi, psi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
upper[s] = -this->cee[s];
|
||||
lower[s] = -this->cee[s];
|
||||
}
|
||||
upper[Ls-1] = this->dp;
|
||||
lower[0] = this->dm;
|
||||
|
||||
this->M5Ddag(psi, psi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
/****************************************************************************************/
|
||||
|
||||
//Zolo
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD mq2 = this->mq2;
|
||||
RealD mq3 = this->mq3;
|
||||
RealD shift = this->shift;
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Constants for the preconditioned matrix Cayley form
|
||||
////////////////////////////////////////////////////////
|
||||
this->bs.resize(Ls);
|
||||
this->cs.resize(Ls);
|
||||
this->aee.resize(Ls);
|
||||
this->aeo.resize(Ls);
|
||||
this->bee.resize(Ls);
|
||||
this->beo.resize(Ls);
|
||||
this->cee.resize(Ls);
|
||||
this->ceo.resize(Ls);
|
||||
|
||||
for(int i=0; i<Ls; ++i){
|
||||
this->bee[i] = 4.0 - this->M5 + 1.0;
|
||||
this->cee[i] = 1.0;
|
||||
}
|
||||
|
||||
for(int i=0; i<Ls; ++i){
|
||||
this->aee[i] = this->cee[i];
|
||||
this->bs[i] = this->beo[i] = 1.0;
|
||||
this->cs[i] = this->ceo[i] = 0.0;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// EOFA shift terms
|
||||
//////////////////////////////////////////
|
||||
if(pm == 1){
|
||||
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
|
||||
this->dm = mq1*this->cee[Ls-1];
|
||||
} else if(this->pm == -1) {
|
||||
this->dp = mq1*this->cee[0];
|
||||
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
|
||||
} else {
|
||||
this->dp = mq1*this->cee[0];
|
||||
this->dm = mq1*this->cee[Ls-1];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// LDU decomposition of eeoo
|
||||
//////////////////////////////////////////
|
||||
this->dee.resize(Ls+1);
|
||||
this->lee.resize(Ls);
|
||||
this->leem.resize(Ls);
|
||||
this->uee.resize(Ls);
|
||||
this->ueem.resize(Ls);
|
||||
|
||||
for(int i=0; i<Ls; ++i){
|
||||
|
||||
if(i < Ls-1){
|
||||
|
||||
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
|
||||
|
||||
this->leem[i] = this->dm/this->bee[i];
|
||||
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
|
||||
|
||||
this->dee[i] = this->bee[i];
|
||||
|
||||
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
|
||||
|
||||
this->ueem[i] = this->dp / this->bee[0];
|
||||
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
|
||||
|
||||
} else {
|
||||
|
||||
this->lee[i] = 0.0;
|
||||
this->leem[i] = 0.0;
|
||||
this->uee[i] = 0.0;
|
||||
this->ueem[i] = 0.0;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
Coeff_t delta_d = 1.0 / this->bee[0];
|
||||
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
|
||||
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
|
||||
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
|
||||
}
|
||||
|
||||
int inv = 1;
|
||||
this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
|
||||
this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
|
||||
}
|
||||
|
||||
// Recompute Cayley-form coefficients for different shift
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
|
||||
{
|
||||
this->shift = new_shift;
|
||||
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
|
||||
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
|
||||
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
GridBase* grid = this->FermionRedBlackGrid();
|
||||
int LLs = grid->_rdimensions[0];
|
||||
|
||||
if(LLs == Ls){ return; } // Not vectorised in 5th direction
|
||||
|
||||
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
|
||||
Pplus (0,Ls-1) = this->dp;
|
||||
Pminus(Ls-1,0) = this->dm;
|
||||
|
||||
Eigen::MatrixXcd PplusMat ;
|
||||
Eigen::MatrixXcd PminusMat;
|
||||
|
||||
#if(0)
|
||||
std::cout << GridLogMessage << "Pplus:" << std::endl;
|
||||
for(int s=0; s<Ls; ++s){
|
||||
for(int ss=0; ss<Ls; ++ss){
|
||||
std::cout << Pplus(s,ss) << "\t";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << "Pminus:" << std::endl;
|
||||
for(int s=0; s<Ls; ++s){
|
||||
for(int ss=0; ss<Ls; ++ss){
|
||||
std::cout << Pminus(s,ss) << "\t";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
if(inv) {
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
||||
const int Nsimd = Simd::Nsimd();
|
||||
Matp.resize(Ls*LLs);
|
||||
Matm.resize(Ls*LLs);
|
||||
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
int istride = LLs;
|
||||
int ostride = 1;
|
||||
Simd Vp;
|
||||
Simd Vm;
|
||||
scalar_type *sp = (scalar_type*) &Vp;
|
||||
scalar_type *sm = (scalar_type*) &Vm;
|
||||
for(int l=0; l<Nsimd; l++){
|
||||
if(switcheroo<Coeff_t>::iscomplex()) {
|
||||
sp[l] = PplusMat (l*istride+s1*ostride,s2);
|
||||
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
||||
} else {
|
||||
// if real
|
||||
scalar_type tmp;
|
||||
tmp = PplusMat (l*istride+s1*ostride,s2);
|
||||
sp[l] = scalar_type(tmp.real(),tmp.real());
|
||||
tmp = PminusMat(l*istride+s1*ostride,s2);
|
||||
sm[l] = scalar_type(tmp.real(),tmp.real());
|
||||
}
|
||||
}
|
||||
Matp[LLs*s2+s1] = Vp;
|
||||
Matm[LLs*s2+s1] = Vm;
|
||||
}}
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(DomainWallEOFAFermion);
|
||||
GparityFermOpTemplateInstantiate(DomainWallEOFAFermion);
|
||||
|
||||
}}
|
115
lib/qcd/action/fermion/DomainWallEOFAFermion.h
Normal file
115
lib/qcd/action/fermion/DomainWallEOFAFermion.h
Normal file
@ -0,0 +1,115 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
|
||||
#define GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
|
||||
// for red-black preconditioned Shamir EOFA
|
||||
Coeff_t dm;
|
||||
Coeff_t dp;
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
|
||||
// EOFA-specific operations
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out);
|
||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField& in, FermionField& out);
|
||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeDag (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInv (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
|
||||
|
||||
virtual void M5D (const FermionField& psi, FermionField& chi);
|
||||
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Instantiate different versions depending on Impl
|
||||
/////////////////////////////////////////////////////
|
||||
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
|
||||
|
||||
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift);
|
||||
|
||||
// Constructors
|
||||
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
|
||||
RealD _M5, const ImplParams& p=ImplParams());
|
||||
|
||||
protected:
|
||||
void SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c);
|
||||
};
|
||||
}}
|
||||
|
||||
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
|
||||
template void DomainWallEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void DomainWallEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void DomainWallEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
|
||||
template void DomainWallEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
|
||||
|
||||
#undef DOMAIN_WALL_EOFA_DPERP_DENSE
|
||||
#define DOMAIN_WALL_EOFA_DPERP_CACHE
|
||||
#undef DOMAIN_WALL_EOFA_DPERP_LINALG
|
||||
#define DOMAIN_WALL_EOFA_DPERP_VEC
|
||||
|
||||
#endif
|
248
lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
Normal file
248
lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
Normal file
@ -0,0 +1,248 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
|
||||
// Pminus fowards
|
||||
// Pplus backwards..
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase* grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
for(int s=0; s<Ls; s++){
|
||||
auto tmp = psi._odata[0];
|
||||
if(s==0) {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5m(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase* grid = psi._grid;
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard=psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
auto tmp = psi._odata[0];
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5p(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
|
||||
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
|
||||
// Apply (L^{\prime})^{-1}
|
||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5p(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
spProj5m(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
|
||||
spProj5p(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1;
|
||||
}
|
||||
spProj5m(tmp2, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2;
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5m(tmp1, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
assert(psi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
std::vector<Coeff_t> ueec(Ls);
|
||||
std::vector<Coeff_t> deec(Ls+1);
|
||||
std::vector<Coeff_t> leec(Ls);
|
||||
std::vector<Coeff_t> ueemc(Ls);
|
||||
std::vector<Coeff_t> leemc(Ls);
|
||||
|
||||
for(int s=0; s<ueec.size(); s++){
|
||||
ueec[s] = conjugate(this->uee[s]);
|
||||
deec[s] = conjugate(this->dee[s]);
|
||||
leec[s] = conjugate(this->lee[s]);
|
||||
ueemc[s] = conjugate(this->ueem[s]);
|
||||
leemc[s] = conjugate(this->leem[s]);
|
||||
}
|
||||
deec[Ls] = conjugate(this->dee[Ls]);
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
|
||||
// Apply (U^{\prime})^{-dagger}
|
||||
chi[ss] = psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5m(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - ueec[s-1]*tmp1;
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5p(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp1;
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5m(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/deec[s])*chi[ss+s] - (leemc[s]/deec[Ls-1])*tmp1;
|
||||
}
|
||||
spProj5p(tmp2, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2;
|
||||
|
||||
// Apply L^{-dagger}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5p(tmp1, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - leec[s]*tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_CACHE
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
159
lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
Normal file
159
lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
Normal file
@ -0,0 +1,159 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
assert(Ls==LLs);
|
||||
|
||||
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
|
||||
Pplus (0,Ls-1) = this->dp;
|
||||
Pminus(Ls-1,0) = this->dm;
|
||||
|
||||
Eigen::MatrixXd PplusMat ;
|
||||
Eigen::MatrixXd PminusMat;
|
||||
|
||||
if(inv) {
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
// For the non-vectorised s-direction this is simple
|
||||
|
||||
for(auto site=0; site<vol; site++){
|
||||
|
||||
SiteSpinor SiteChi;
|
||||
SiteHalfSpinor SitePplus;
|
||||
SiteHalfSpinor SitePminus;
|
||||
|
||||
for(int s1=0; s1<Ls; s1++){
|
||||
SiteChi = zero;
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
int lex2 = s2 + Ls*site;
|
||||
if(PplusMat(s1,s2) != 0.0){
|
||||
spProj5p(SitePplus,psi[lex2]);
|
||||
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
|
||||
}
|
||||
if(PminusMat(s1,s2) != 0.0){
|
||||
spProj5m(SitePminus, psi[lex2]);
|
||||
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
|
||||
}
|
||||
}
|
||||
chi[s1+Ls*site] = SiteChi*0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
|
||||
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
|
||||
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
168
lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
Normal file
168
lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
Normal file
@ -0,0 +1,168 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
// Pminus fowards
|
||||
// Pplus backwards
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
|
||||
axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (U^{\prime})^{-dagger}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
|
||||
axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply L^{-dagger}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
605
lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
Normal file
605
lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
Normal file
@ -0,0 +1,605 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd> > u(LLs);
|
||||
Vector<iSinglet<Simd> > l(LLs);
|
||||
Vector<iSinglet<Simd> > d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0;o<LLs;o++){ // outer
|
||||
for(int i=0;i<nsimd;i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
assert(Nc == 3);
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5m(hp, psi[ss+vp]);
|
||||
spProj5p(hm, psi[ss+vm]);
|
||||
|
||||
if (vp <= v){ rotate(hp, hp, 1); }
|
||||
if (vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = 0.5*hp;
|
||||
hm = 0.5*hm;
|
||||
|
||||
spRecon5m(fp, hp);
|
||||
spRecon5p(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v];
|
||||
chi[ss+v] = chi[ss+v] + u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] + l[v]*fm;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v==LLs-1) ? 0 : v+1;
|
||||
int vm = (v==0) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(2)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(2)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(2)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(3)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(3)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(3)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(0)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(0)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(0)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(1)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||
|
||||
if(vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
// Can force these to real arithmetic and save 2x.
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd> > u(LLs);
|
||||
Vector<iSinglet<Simd> > l(LLs);
|
||||
Vector<iSinglet<Simd> > d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5p(hp, psi[ss+vp]);
|
||||
spProj5m(hm, psi[ss+vm]);
|
||||
|
||||
if(vp <= v){ rotate(hp, hp, 1); }
|
||||
if(vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = hp*0.5;
|
||||
hm = hm*0.5;
|
||||
spRecon5p(fp, hp);
|
||||
spRecon5m(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0 ) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(0)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(0)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(0)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(1)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(1)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(1)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(2)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(2)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(2)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(3)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(3)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(3)(2);
|
||||
|
||||
if (vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
#ifdef AVX512
|
||||
#include<simd/Intel512common.h>
|
||||
#include<simd/Intel512avx.h>
|
||||
#include<simd/Intel512single.h>
|
||||
#endif
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
SiteHalfSpinor BcastM;
|
||||
SiteHalfSpinor SiteChiP;
|
||||
SiteHalfSpinor SiteChiM;
|
||||
|
||||
// Ls*Ls * 2 * 12 * vol flops
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
|
||||
|
||||
int s = s2 + l*LLs;
|
||||
int lex = s2 + LLs*site;
|
||||
|
||||
if( s2==0 && l==0 ){
|
||||
SiteChiP=zero;
|
||||
SiteChiM=zero;
|
||||
}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
|
||||
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
|
||||
}}
|
||||
}}
|
||||
|
||||
{
|
||||
int lex = s1 + LLs*site;
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
|
||||
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
|
||||
}}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
{
|
||||
// pointers
|
||||
// MASK_REGS;
|
||||
#define Chi_00 %%zmm1
|
||||
#define Chi_01 %%zmm2
|
||||
#define Chi_02 %%zmm3
|
||||
#define Chi_10 %%zmm4
|
||||
#define Chi_11 %%zmm5
|
||||
#define Chi_12 %%zmm6
|
||||
#define Chi_20 %%zmm7
|
||||
#define Chi_21 %%zmm8
|
||||
#define Chi_22 %%zmm9
|
||||
#define Chi_30 %%zmm10
|
||||
#define Chi_31 %%zmm11
|
||||
#define Chi_32 %%zmm12
|
||||
|
||||
#define BCAST0 %%zmm13
|
||||
#define BCAST1 %%zmm14
|
||||
#define BCAST2 %%zmm15
|
||||
#define BCAST3 %%zmm16
|
||||
#define BCAST4 %%zmm17
|
||||
#define BCAST5 %%zmm18
|
||||
#define BCAST6 %%zmm19
|
||||
#define BCAST7 %%zmm20
|
||||
#define BCAST8 %%zmm21
|
||||
#define BCAST9 %%zmm22
|
||||
#define BCAST10 %%zmm23
|
||||
#define BCAST11 %%zmm24
|
||||
|
||||
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
|
||||
int lex = s2 + LLs*site;
|
||||
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
|
||||
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
|
||||
uint64_t a2 = (uint64_t) &psi[lex];
|
||||
|
||||
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
|
||||
if((s2+l)==0) {
|
||||
asm(
|
||||
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
|
||||
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
|
||||
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
|
||||
VBCASTCDUP(0,%2,BCAST0)
|
||||
VBCASTCDUP(1,%2,BCAST1)
|
||||
VBCASTCDUP(2,%2,BCAST2)
|
||||
VBCASTCDUP(3,%2,BCAST3)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
|
||||
VMULMEM(0,%1,BCAST8,Chi_22)
|
||||
VMULMEM(0,%1,BCAST9,Chi_30)
|
||||
VMULMEM(0,%1,BCAST10,Chi_31)
|
||||
VMULMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
} else {
|
||||
asm(
|
||||
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
}
|
||||
a0 = a0 + incr;
|
||||
a1 = a1 + incr;
|
||||
a2 = a2 + sizeof(Simd::scalar_type);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int lexa = s1+LLs*site;
|
||||
asm (
|
||||
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
|
||||
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
|
||||
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
|
||||
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
|
||||
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef Chi_00
|
||||
#undef Chi_01
|
||||
#undef Chi_02
|
||||
#undef Chi_10
|
||||
#undef Chi_11
|
||||
#undef Chi_12
|
||||
#undef Chi_20
|
||||
#undef Chi_21
|
||||
#undef Chi_22
|
||||
#undef Chi_30
|
||||
#undef Chi_31
|
||||
#undef Chi_32
|
||||
|
||||
#undef BCAST0
|
||||
#undef BCAST1
|
||||
#undef BCAST2
|
||||
#undef BCAST3
|
||||
#undef BCAST4
|
||||
#undef BCAST5
|
||||
#undef BCAST6
|
||||
#undef BCAST7
|
||||
#undef BCAST8
|
||||
#undef BCAST9
|
||||
#undef BCAST10
|
||||
#undef BCAST11
|
||||
#endif
|
||||
};
|
||||
|
||||
// Z-mobius version
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
|
||||
exit(-1);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
Vector<iSinglet<Simd> > Matp;
|
||||
Vector<iSinglet<Simd> > Matm;
|
||||
Vector<iSinglet<Simd> > *_Matp;
|
||||
Vector<iSinglet<Simd> > *_Matm;
|
||||
|
||||
// MooeeInternalCompute(dag,inv,Matp,Matm);
|
||||
if(inv && dag){
|
||||
_Matp = &this->MatpInvDag;
|
||||
_Matm = &this->MatmInvDag;
|
||||
}
|
||||
|
||||
if(inv && (!dag)){
|
||||
_Matp = &this->MatpInv;
|
||||
_Matm = &this->MatmInv;
|
||||
}
|
||||
|
||||
if(!inv){
|
||||
MooeeInternalCompute(dag, inv, Matp, Matm);
|
||||
_Matp = &Matp;
|
||||
_Matm = &Matm;
|
||||
}
|
||||
|
||||
assert(_Matp->size() == Ls*LLs);
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
if(switcheroo<Coeff_t>::iscomplex()){
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
} else {
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_VEC
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH);
|
||||
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,6 +1,6 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/Fermion_base_aggregate.h
|
||||
|
||||
@ -38,6 +38,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
// - ContinuedFractionFermion5D.cc
|
||||
// - WilsonFermion.cc
|
||||
// - WilsonKernels.cc
|
||||
// - DomainWallEOFAFermion.cc
|
||||
// - MobiusEOFAFermion.cc
|
||||
//
|
||||
// The explicit instantiation is only avoidable if we move this source to headers and end up with include/parse/recompile
|
||||
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
|
||||
@ -55,8 +57,9 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
#include <Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h>
|
||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
|
||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusFermion.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
|
||||
#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
|
||||
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
|
||||
@ -113,6 +116,14 @@ typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
|
||||
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
|
||||
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
|
||||
|
||||
typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR;
|
||||
typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF;
|
||||
typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD;
|
||||
|
||||
typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL;
|
||||
typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH;
|
||||
typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF;
|
||||
|
||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
||||
@ -121,6 +132,14 @@ typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
|
||||
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
|
||||
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
|
||||
|
||||
typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR;
|
||||
typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF;
|
||||
typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD;
|
||||
|
||||
typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL;
|
||||
typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH;
|
||||
typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF;
|
||||
|
||||
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
||||
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
||||
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
||||
@ -129,7 +148,7 @@ typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
|
||||
typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
|
||||
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
|
||||
|
||||
// Ls vectorised
|
||||
// Ls vectorised
|
||||
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR;
|
||||
typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF;
|
||||
typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD;
|
||||
@ -138,6 +157,14 @@ typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
|
||||
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
|
||||
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
|
||||
|
||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplR> DomainWallEOFAFermionVec5dR;
|
||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplF> DomainWallEOFAFermionVec5dF;
|
||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplD> DomainWallEOFAFermionVec5dD;
|
||||
|
||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplRL> DomainWallEOFAFermionVec5dRL;
|
||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplFH> DomainWallEOFAFermionVec5dFH;
|
||||
typedef DomainWallEOFAFermion<DomainWallVec5dImplDF> DomainWallEOFAFermionVec5dDF;
|
||||
|
||||
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
|
||||
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
|
||||
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
|
||||
@ -146,6 +173,14 @@ typedef MobiusFermion<DomainWallVec5dImplRL> MobiusFermionVec5dRL;
|
||||
typedef MobiusFermion<DomainWallVec5dImplFH> MobiusFermionVec5dFH;
|
||||
typedef MobiusFermion<DomainWallVec5dImplDF> MobiusFermionVec5dDF;
|
||||
|
||||
typedef MobiusEOFAFermion<DomainWallVec5dImplR> MobiusEOFAFermionVec5dR;
|
||||
typedef MobiusEOFAFermion<DomainWallVec5dImplF> MobiusEOFAFermionVec5dF;
|
||||
typedef MobiusEOFAFermion<DomainWallVec5dImplD> MobiusEOFAFermionVec5dD;
|
||||
|
||||
typedef MobiusEOFAFermion<DomainWallVec5dImplRL> MobiusEOFAFermionVec5dRL;
|
||||
typedef MobiusEOFAFermion<DomainWallVec5dImplFH> MobiusEOFAFermionVec5dFH;
|
||||
typedef MobiusEOFAFermion<DomainWallVec5dImplDF> MobiusEOFAFermionVec5dDF;
|
||||
|
||||
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
|
||||
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
|
||||
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
|
||||
@ -206,6 +241,14 @@ typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
|
||||
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
|
||||
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
|
||||
|
||||
typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR;
|
||||
typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF;
|
||||
typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD;
|
||||
|
||||
typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL;
|
||||
typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH;
|
||||
typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF;
|
||||
|
||||
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
|
||||
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
|
||||
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
|
||||
@ -222,6 +265,14 @@ typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
|
||||
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
|
||||
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
|
||||
|
||||
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR;
|
||||
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF;
|
||||
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD;
|
||||
|
||||
typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL;
|
||||
typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH;
|
||||
typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF;
|
||||
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
|
||||
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;
|
||||
|
@ -557,12 +557,16 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
}
|
||||
|
||||
}
|
||||
// Fixme: Gparity prop * link
|
||||
inline void multLinkProp(SitePropagator &phi, const SiteDoubledGaugeField &U,
|
||||
const SitePropagator &chi, int mu)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
// Fixme: Gparity prop * link
|
||||
inline void multLinkProp(SitePropagator &phi, const SiteDoubledGaugeField &U,
|
||||
const SitePropagator &chi, int mu)
|
||||
{
|
||||
assert(0);
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
|
502
lib/qcd/action/fermion/MobiusEOFAFermion.cc
Normal file
502
lib/qcd/action/fermion/MobiusEOFAFermion.cc
Normal file
@ -0,0 +1,502 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
|
||||
GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3,
|
||||
RealD _shift, int _pm, RealD _M5,
|
||||
RealD _b, RealD _c, const ImplParams &p) :
|
||||
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
|
||||
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
|
||||
_shift, _pm, _M5, _b, _c, p)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
RealD eps = 1.0;
|
||||
Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
|
||||
assert(zdata->n == this->Ls);
|
||||
|
||||
std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
|
||||
",c=" << _c << ") with Ls=" << Ls << std::endl;
|
||||
this->SetCoefficientsTanh(zdata, _b, _c);
|
||||
std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
|
||||
",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
|
||||
",pm=" << _pm << ")" << std::endl;
|
||||
|
||||
Approx::zolotarev_free(zdata);
|
||||
|
||||
if(_shift != 0.0){
|
||||
SetCoefficientsPrecondShiftOps();
|
||||
} else {
|
||||
Mooee_shift.resize(Ls, 0.0);
|
||||
MooeeInv_shift_lc.resize(Ls, 0.0);
|
||||
MooeeInv_shift_norm.resize(Ls, 0.0);
|
||||
MooeeInvDag_shift_lc.resize(Ls, 0.0);
|
||||
MooeeInvDag_shift_norm.resize(Ls, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
/***************************************************************
|
||||
/* Additional EOFA operators only called outside the inverter.
|
||||
/* Since speed is not essential, simple axpby-style
|
||||
/* implementations should be fine.
|
||||
/***************************************************************/
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
RealD alpha = this->alpha;
|
||||
|
||||
Din = zero;
|
||||
if((sign == 1) && (dag == 0)) { // \Omega_{+}
|
||||
for(int s=0; s<Ls; ++s){
|
||||
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
|
||||
}
|
||||
} else if((sign == -1) && (dag == 0)) { // \Omega_{-}
|
||||
for(int s=0; s<Ls; ++s){
|
||||
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
|
||||
}
|
||||
} else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
|
||||
for(int sp=0; sp<Ls; ++sp){
|
||||
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
|
||||
}
|
||||
} else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
|
||||
for(int sp=0; sp<Ls; ++sp){
|
||||
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
|
||||
// It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
RealD b = 0.5 * ( 1.0 + this->alpha );
|
||||
RealD c = 0.5 * ( 1.0 - this->alpha );
|
||||
RealD mq1 = this->mq1;
|
||||
|
||||
for(int s=0; s<Ls; ++s){
|
||||
if(s == 0) {
|
||||
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
|
||||
} else if(s == (Ls-1)) {
|
||||
axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
|
||||
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
RealD m = this->mq1;
|
||||
RealD c = 0.5 * this->alpha;
|
||||
RealD d = 0.5;
|
||||
|
||||
RealD DtInv_p(0.0), DtInv_m(0.0);
|
||||
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
|
||||
FermionField tmp(this->FermionGrid());
|
||||
|
||||
for(int s=0; s<Ls; ++s){
|
||||
for(int sp=0; sp<Ls; ++sp){
|
||||
|
||||
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
|
||||
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
|
||||
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
|
||||
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
|
||||
|
||||
if(sp == 0){
|
||||
axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
|
||||
axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
|
||||
} else {
|
||||
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
|
||||
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
|
||||
}
|
||||
|
||||
}}
|
||||
}
|
||||
|
||||
/*****************************************************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->Meooe5D(psi, Din);
|
||||
this->DW(Din, chi, DaggerNo);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
this->M5D(psi, chi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->DW(psi, Din, DaggerYes);
|
||||
this->MeooeDag5D(Din, chi);
|
||||
this->M5Ddag(psi, chi);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
/* Performance critical fermion operators called inside the inverter
|
||||
/********************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
// half checkerboard operations
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
// coefficients of Mooee
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
for(int s=0; s<Ls; s++){
|
||||
upper[s] = -this->cee[s];
|
||||
lower[s] = -this->cee[s];
|
||||
}
|
||||
upper[Ls-1] *= -this->mq1;
|
||||
lower[0] *= -this->mq1;
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
// coefficients of MooeeDag
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
upper[s] = -this->cee[s+1];
|
||||
lower[s] = this->mq1*this->cee[Ls-1];
|
||||
} else if(s==(Ls-1)) {
|
||||
upper[s] = this->mq1*this->cee[0];
|
||||
lower[s] = -this->cee[s-1];
|
||||
} else {
|
||||
upper[s] = -this->cee[s+1];
|
||||
lower[s] = -this->cee[s-1];
|
||||
}
|
||||
}
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
/****************************************************************************************/
|
||||
|
||||
// Computes coefficients for applying Cayley preconditioned shift operators
|
||||
// (Mooee + \Delta) --> Mooee_shift
|
||||
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
|
||||
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
|
||||
// For the latter two cases, the operation takes the form
|
||||
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
|
||||
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD alpha = this->alpha;
|
||||
RealD k = this->k;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD shift = this->shift;
|
||||
|
||||
// Initialize
|
||||
Mooee_shift.resize(Ls);
|
||||
MooeeInv_shift_lc.resize(Ls);
|
||||
MooeeInv_shift_norm.resize(Ls);
|
||||
MooeeInvDag_shift_lc.resize(Ls);
|
||||
MooeeInvDag_shift_norm.resize(Ls);
|
||||
|
||||
// Construct Mooee_shift
|
||||
int idx(0);
|
||||
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
|
||||
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
|
||||
for(int s=0; s<Ls; ++s){
|
||||
idx = (pm == 1) ? (s) : (Ls-1-s);
|
||||
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
|
||||
}
|
||||
|
||||
// Tridiagonal solve for MooeeInvDag_shift_lc
|
||||
{
|
||||
Coeff_t m(0.0);
|
||||
std::vector<Coeff_t> d = Mooee_shift;
|
||||
std::vector<Coeff_t> u(Ls,0.0);
|
||||
std::vector<Coeff_t> y(Ls,0.0);
|
||||
std::vector<Coeff_t> q(Ls,0.0);
|
||||
if(pm == 1){ u[0] = 1.0; }
|
||||
else{ u[Ls-1] = 1.0; }
|
||||
|
||||
// Tridiagonal matrix algorithm + Sherman-Morrison formula
|
||||
//
|
||||
// We solve
|
||||
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
|
||||
// where Mooee' is the tridiagonal part of Mooee_{+}, and
|
||||
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
|
||||
// so that the outer-product u \otimes v gives the (0,Ls-1)
|
||||
// entry of Mooee_{+}.
|
||||
//
|
||||
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
|
||||
// and then construct the solution to the original system
|
||||
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
|
||||
if(pm == 1){
|
||||
for(int s=1; s<Ls; ++s){
|
||||
m = -this->cee[s] / this->bee[s-1];
|
||||
d[s] -= m*d[s-1];
|
||||
u[s] -= m*u[s-1];
|
||||
}
|
||||
}
|
||||
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
|
||||
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
|
||||
for(int s=Ls-2; s>=0; --s){
|
||||
if(pm == 1){
|
||||
y[s] = d[s] / this->bee[s];
|
||||
q[s] = u[s] / this->bee[s];
|
||||
} else {
|
||||
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
|
||||
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
|
||||
}
|
||||
}
|
||||
|
||||
// Construct MooeeInvDag_shift_lc
|
||||
for(int s=0; s<Ls; ++s){
|
||||
if(pm == 1){
|
||||
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
|
||||
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
|
||||
} else {
|
||||
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
|
||||
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute remaining coefficients
|
||||
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
|
||||
for(int s=0; s<Ls; ++s){
|
||||
|
||||
// MooeeInv_shift_lc
|
||||
if(pm == 1){ MooeeInv_shift_lc[s] = std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s); }
|
||||
else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); }
|
||||
|
||||
// MooeeInv_shift_norm
|
||||
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
|
||||
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N;
|
||||
|
||||
// MooeeInvDag_shift_norm
|
||||
if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) /
|
||||
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
|
||||
else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) /
|
||||
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recompute coefficients for a different value of shift constant
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
|
||||
{
|
||||
this->shift = new_shift;
|
||||
if(new_shift != 0.0){
|
||||
SetCoefficientsPrecondShiftOps();
|
||||
} else {
|
||||
int Ls = this->Ls;
|
||||
Mooee_shift.resize(Ls,0.0);
|
||||
MooeeInv_shift_lc.resize(Ls,0.0);
|
||||
MooeeInv_shift_norm.resize(Ls,0.0);
|
||||
MooeeInvDag_shift_lc.resize(Ls,0.0);
|
||||
MooeeInvDag_shift_norm.resize(Ls,0.0);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
|
||||
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
GridBase* grid = this->FermionRedBlackGrid();
|
||||
int LLs = grid->_rdimensions[0];
|
||||
|
||||
if(LLs == Ls){ return; } // Not vectorised in 5th direction
|
||||
|
||||
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
|
||||
Pplus (0,Ls-1) = this->mq1*this->cee[0];
|
||||
Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1];
|
||||
|
||||
if(this->shift != 0.0){
|
||||
RealD c = 0.5 * this->alpha;
|
||||
RealD d = 0.5;
|
||||
RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) );
|
||||
if(this->pm == 1) {
|
||||
for(int s=0; s<Ls; ++s){
|
||||
Pplus(s,Ls-1) += N * std::pow(-1.0,s) * std::pow(c-d,s) / std::pow(c+d,Ls+s+1);
|
||||
}
|
||||
} else {
|
||||
for(int s=0; s<Ls; ++s){
|
||||
Pminus(s,0) += N * std::pow(-1.0,s+1) * std::pow(c-d,Ls-1-s) / std::pow(c+d,2*Ls-s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Eigen::MatrixXcd PplusMat ;
|
||||
Eigen::MatrixXcd PminusMat;
|
||||
|
||||
if(inv) {
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
||||
const int Nsimd = Simd::Nsimd();
|
||||
Matp.resize(Ls*LLs);
|
||||
Matm.resize(Ls*LLs);
|
||||
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
int istride = LLs;
|
||||
int ostride = 1;
|
||||
Simd Vp;
|
||||
Simd Vm;
|
||||
scalar_type *sp = (scalar_type*) &Vp;
|
||||
scalar_type *sm = (scalar_type*) &Vm;
|
||||
for(int l=0; l<Nsimd; l++){
|
||||
if(switcheroo<Coeff_t>::iscomplex()) {
|
||||
sp[l] = PplusMat (l*istride+s1*ostride,s2);
|
||||
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
||||
} else {
|
||||
// if real
|
||||
scalar_type tmp;
|
||||
tmp = PplusMat (l*istride+s1*ostride,s2);
|
||||
sp[l] = scalar_type(tmp.real(),tmp.real());
|
||||
tmp = PminusMat(l*istride+s1*ostride,s2);
|
||||
sm[l] = scalar_type(tmp.real(),tmp.real());
|
||||
}
|
||||
}
|
||||
Matp[LLs*s2+s1] = Vp;
|
||||
Matm[LLs*s2+s1] = Vm;
|
||||
}}
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(MobiusEOFAFermion);
|
||||
GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
|
||||
|
||||
}}
|
133
lib/qcd/action/fermion/MobiusEOFAFermion.h
Normal file
133
lib/qcd/action/fermion/MobiusEOFAFermion.h
Normal file
@ -0,0 +1,133 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_MOBIUS_EOFA_FERMION_H
|
||||
#define GRID_QCD_MOBIUS_EOFA_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Shift operator coefficients for red-black preconditioned Mobius EOFA
|
||||
std::vector<Coeff_t> Mooee_shift;
|
||||
std::vector<Coeff_t> MooeeInv_shift_lc;
|
||||
std::vector<Coeff_t> MooeeInv_shift_norm;
|
||||
std::vector<Coeff_t> MooeeInvDag_shift_lc;
|
||||
std::vector<Coeff_t> MooeeInvDag_shift_norm;
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
|
||||
// EOFA-specific operations
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out);
|
||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField& in, FermionField& out);
|
||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeDag (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInv (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInv_shift (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInvDag (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out);
|
||||
|
||||
virtual void M5D (const FermionField& psi, FermionField& chi);
|
||||
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Instantiate different versions depending on Impl
|
||||
/////////////////////////////////////////////////////
|
||||
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs);
|
||||
|
||||
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs);
|
||||
|
||||
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
|
||||
|
||||
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift);
|
||||
|
||||
// Constructors
|
||||
MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
|
||||
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
|
||||
|
||||
protected:
|
||||
void SetCoefficientsPrecondShiftOps(void);
|
||||
};
|
||||
}}
|
||||
|
||||
#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\
|
||||
template void MobiusEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void MobiusEOFAFermion<A>::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
|
||||
template void MobiusEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void MobiusEOFAFermion<A>::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInv_shift(const FermionField& psi, FermionField& chi); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi);
|
||||
|
||||
#undef MOBIUS_EOFA_DPERP_DENSE
|
||||
#define MOBIUS_EOFA_DPERP_CACHE
|
||||
#undef MOBIUS_EOFA_DPERP_LINALG
|
||||
#define MOBIUS_EOFA_DPERP_VEC
|
||||
|
||||
#endif
|
429
lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
Normal file
429
lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
Normal file
@ -0,0 +1,429 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
for(int s=0; s<Ls; s++){
|
||||
auto tmp = psi._odata[0];
|
||||
if(s==0){
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5m(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||
std::vector<Coeff_t> &shift_coeffs)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
for(int s=0; s<Ls; s++){
|
||||
auto tmp = psi._odata[0];
|
||||
if(s==0){
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5m(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
|
||||
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
|
||||
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
auto tmp = psi._odata[0];
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5p(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||
std::vector<Coeff_t> &shift_coeffs)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
chi[ss+Ls-1] = zero;
|
||||
auto tmp = psi._odata[0];
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5p(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
|
||||
else{ spProj5m(tmp, psi._odata[ss+s]); }
|
||||
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
|
||||
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp = psi._odata[0];
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5p(tmp, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
spProj5m(tmp, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
|
||||
spProj5p(tmp, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
|
||||
}
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5m(tmp, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
auto tmp2_spProj = psi._odata[0];
|
||||
|
||||
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
|
||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||
tmp2 = MooeeInv_shift_lc[0]*psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5p(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
|
||||
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
|
||||
else{ spProj5m(tmp2_spProj, tmp2); }
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
spProj5m(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
|
||||
spProj5p(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
|
||||
}
|
||||
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
spProj5m(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
|
||||
|
||||
// Apply U^{-1} and add shift term
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
|
||||
spProj5m(tmp1, chi[ss+s]);
|
||||
chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
|
||||
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp = psi._odata[0];
|
||||
|
||||
// Apply (U^{\prime})^{-dag}
|
||||
chi[ss] = psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5m(tmp, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
|
||||
}
|
||||
|
||||
// U_m^{-\dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5p(tmp, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
|
||||
}
|
||||
|
||||
// L_m^{-\dag} D^{-dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5m(tmp, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
|
||||
}
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
|
||||
// Apply L^{-dag}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5p(tmp, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
auto tmp2_spProj = psi._odata[0];
|
||||
|
||||
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
|
||||
chi[ss] = psi[ss];
|
||||
tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5m(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
|
||||
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
|
||||
else{ spProj5m(tmp2_spProj, tmp2); }
|
||||
|
||||
// U_m^{-\dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5p(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
|
||||
}
|
||||
|
||||
// L_m^{-\dag} D^{-dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5m(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
|
||||
}
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
spProj5p(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
|
||||
|
||||
// Apply L^{-dag}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
|
||||
spProj5p(tmp1, chi[ss+s]);
|
||||
chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_CACHE
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
184
lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
Normal file
184
lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
Normal file
@ -0,0 +1,184 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
int pm = this->pm;
|
||||
RealD shift = this->shift;
|
||||
RealD alpha = this->alpha;
|
||||
RealD k = this->k;
|
||||
RealD mq1 = this->mq1;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
assert(Ls==LLs);
|
||||
|
||||
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
Pplus (0,Ls-1) = mq1*this->cee[0];
|
||||
Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
|
||||
|
||||
if(shift != 0.0){
|
||||
Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
|
||||
for(int s=0; s<Ls; ++s){
|
||||
if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
|
||||
else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
|
||||
}
|
||||
}
|
||||
|
||||
Eigen::MatrixXd PplusMat ;
|
||||
Eigen::MatrixXd PminusMat;
|
||||
|
||||
if(inv){
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
// For the non-vectorised s-direction this is simple
|
||||
|
||||
for(auto site=0; site<vol; site++){
|
||||
|
||||
SiteSpinor SiteChi;
|
||||
SiteHalfSpinor SitePplus;
|
||||
SiteHalfSpinor SitePminus;
|
||||
|
||||
for(int s1=0; s1<Ls; s1++){
|
||||
SiteChi = zero;
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
int lex2 = s2 + Ls*site;
|
||||
if(PplusMat(s1,s2) != 0.0){
|
||||
spProj5p(SitePplus,psi[lex2]);
|
||||
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
|
||||
}
|
||||
if(PminusMat(s1,s2) != 0.0){
|
||||
spProj5m(SitePminus, psi[lex2]);
|
||||
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
|
||||
}
|
||||
}
|
||||
chi[s1+Ls*site] = SiteChi*0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_DENSE
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
|
||||
|
||||
template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
|
||||
|
||||
template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
290
lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
Normal file
290
lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
Normal file
@ -0,0 +1,290 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
// Pminus fowards
|
||||
// Pplus backwards
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
|
||||
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||
axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply U^{-1} and add shift term
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
|
||||
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
// Apply (U^{\prime})^{-dagger}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply L^{-dagger}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
|
||||
axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply L^{-dagger} and add shift
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_LINALG
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
983
lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
Normal file
983
lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
Normal file
@ -0,0 +1,983 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
assert(Nc == 3);
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5m(hp, psi[ss+vp]);
|
||||
spProj5p(hm, psi[ss+vm]);
|
||||
|
||||
if (vp <= v){ rotate(hp, hp, 1); }
|
||||
if (vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = 0.5*hp;
|
||||
hm = 0.5*hm;
|
||||
|
||||
spRecon5m(fp, hp);
|
||||
spRecon5p(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v];
|
||||
chi[ss+v] = chi[ss+v] + u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] + l[v]*fm;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(2)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(2)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(2)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(3)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(3)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(3)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(0)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(0)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(0)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(1)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||
|
||||
if(vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
// Can force these to real arithmetic and save 2x.
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
#if 0
|
||||
|
||||
this->M5D(psi, phi, chi, lower, diag, upper);
|
||||
|
||||
// FIXME: possible gain from vectorizing shift operation as well?
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
Vector<iSinglet<Simd>> s(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
scalar_type* s_p = (scalar_type*) &s[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
s_p[ss] = shift_coeffs[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
assert(Nc == 3);
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
int vs = (this->pm == 1) ? LLs-1 : 0;
|
||||
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0);
|
||||
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1);
|
||||
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2);
|
||||
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0);
|
||||
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1);
|
||||
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2);
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(2)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(2)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(2)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(3)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(3)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(3)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(0)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(0)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(0)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(1)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||
|
||||
if(vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == 1 && vs <= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == -1 && vs >= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
|
||||
}
|
||||
|
||||
// Can force these to real arithmetic and save 2x.
|
||||
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
|
||||
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
|
||||
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
|
||||
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
|
||||
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
|
||||
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
|
||||
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5p(hp, psi[ss+vp]);
|
||||
spProj5m(hm, psi[ss+vm]);
|
||||
|
||||
if(vp <= v){ rotate(hp, hp, 1); }
|
||||
if(vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = hp*0.5;
|
||||
hm = hm*0.5;
|
||||
spRecon5p(fp, hp);
|
||||
spRecon5m(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0 ) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(0)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(0)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(0)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(1)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(1)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(1)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(2)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(2)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(2)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(3)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(3)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(3)(2);
|
||||
|
||||
if (vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
#if 0
|
||||
|
||||
this->M5Ddag(psi, phi, chi, lower, diag, upper);
|
||||
|
||||
// FIXME: possible gain from vectorizing shift operation as well?
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
Vector<iSinglet<Simd>> s(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
scalar_type* s_p = (scalar_type*) &s[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
s_p[ss] = shift_coeffs[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
int vs = (this->pm == 1) ? LLs-1 : 0;
|
||||
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0);
|
||||
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1);
|
||||
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2);
|
||||
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0);
|
||||
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1);
|
||||
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2);
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0 ) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(0)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(0)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(0)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(1)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(1)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(1)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(2)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(2)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(2)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(3)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(3)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(3)(2);
|
||||
|
||||
if (vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == 1 && vs <= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == -1 && vs >= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
|
||||
}
|
||||
|
||||
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
|
||||
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
|
||||
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
|
||||
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
|
||||
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
|
||||
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef AVX512
|
||||
#include<simd/Intel512common.h>
|
||||
#include<simd/Intel512avx.h>
|
||||
#include<simd/Intel512single.h>
|
||||
#endif
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
SiteHalfSpinor BcastM;
|
||||
SiteHalfSpinor SiteChiP;
|
||||
SiteHalfSpinor SiteChiM;
|
||||
|
||||
// Ls*Ls * 2 * 12 * vol flops
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
|
||||
|
||||
int s = s2 + l*LLs;
|
||||
int lex = s2 + LLs*site;
|
||||
|
||||
if( s2==0 && l==0 ){
|
||||
SiteChiP=zero;
|
||||
SiteChiM=zero;
|
||||
}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
|
||||
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
|
||||
}}
|
||||
}}
|
||||
|
||||
{
|
||||
int lex = s1 + LLs*site;
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
|
||||
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
|
||||
}}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
{
|
||||
// pointers
|
||||
// MASK_REGS;
|
||||
#define Chi_00 %%zmm1
|
||||
#define Chi_01 %%zmm2
|
||||
#define Chi_02 %%zmm3
|
||||
#define Chi_10 %%zmm4
|
||||
#define Chi_11 %%zmm5
|
||||
#define Chi_12 %%zmm6
|
||||
#define Chi_20 %%zmm7
|
||||
#define Chi_21 %%zmm8
|
||||
#define Chi_22 %%zmm9
|
||||
#define Chi_30 %%zmm10
|
||||
#define Chi_31 %%zmm11
|
||||
#define Chi_32 %%zmm12
|
||||
|
||||
#define BCAST0 %%zmm13
|
||||
#define BCAST1 %%zmm14
|
||||
#define BCAST2 %%zmm15
|
||||
#define BCAST3 %%zmm16
|
||||
#define BCAST4 %%zmm17
|
||||
#define BCAST5 %%zmm18
|
||||
#define BCAST6 %%zmm19
|
||||
#define BCAST7 %%zmm20
|
||||
#define BCAST8 %%zmm21
|
||||
#define BCAST9 %%zmm22
|
||||
#define BCAST10 %%zmm23
|
||||
#define BCAST11 %%zmm24
|
||||
|
||||
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
|
||||
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
|
||||
int lex = s2 + LLs*site;
|
||||
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
|
||||
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
|
||||
uint64_t a2 = (uint64_t) &psi[lex];
|
||||
|
||||
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
|
||||
|
||||
if((s2+l)==0) {
|
||||
asm(
|
||||
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
|
||||
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
|
||||
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
|
||||
VBCASTCDUP(0,%2,BCAST0)
|
||||
VBCASTCDUP(1,%2,BCAST1)
|
||||
VBCASTCDUP(2,%2,BCAST2)
|
||||
VBCASTCDUP(3,%2,BCAST3)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
|
||||
VMULMEM(0,%1,BCAST8,Chi_22)
|
||||
VMULMEM(0,%1,BCAST9,Chi_30)
|
||||
VMULMEM(0,%1,BCAST10,Chi_31)
|
||||
VMULMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
} else {
|
||||
asm(
|
||||
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
}
|
||||
|
||||
a0 = a0 + incr;
|
||||
a1 = a1 + incr;
|
||||
a2 = a2 + sizeof(Simd::scalar_type);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int lexa = s1+LLs*site;
|
||||
asm (
|
||||
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
|
||||
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
|
||||
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
|
||||
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
|
||||
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef Chi_00
|
||||
#undef Chi_01
|
||||
#undef Chi_02
|
||||
#undef Chi_10
|
||||
#undef Chi_11
|
||||
#undef Chi_12
|
||||
#undef Chi_20
|
||||
#undef Chi_21
|
||||
#undef Chi_22
|
||||
#undef Chi_30
|
||||
#undef Chi_31
|
||||
#undef Chi_32
|
||||
|
||||
#undef BCAST0
|
||||
#undef BCAST1
|
||||
#undef BCAST2
|
||||
#undef BCAST3
|
||||
#undef BCAST4
|
||||
#undef BCAST5
|
||||
#undef BCAST6
|
||||
#undef BCAST7
|
||||
#undef BCAST8
|
||||
#undef BCAST9
|
||||
#undef BCAST10
|
||||
#undef BCAST11
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
// Z-mobius version
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
|
||||
exit(-1);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
Vector<iSinglet<Simd>> Matp;
|
||||
Vector<iSinglet<Simd>> Matm;
|
||||
Vector<iSinglet<Simd>>* _Matp;
|
||||
Vector<iSinglet<Simd>>* _Matm;
|
||||
|
||||
// MooeeInternalCompute(dag,inv,Matp,Matm);
|
||||
if(inv && dag){
|
||||
_Matp = &this->MatpInvDag;
|
||||
_Matm = &this->MatmInvDag;
|
||||
}
|
||||
|
||||
if(inv && (!dag)){
|
||||
_Matp = &this->MatpInv;
|
||||
_Matm = &this->MatmInv;
|
||||
}
|
||||
|
||||
if(!inv){
|
||||
MooeeInternalCompute(dag, inv, Matp, Matm);
|
||||
_Matp = &Matp;
|
||||
_Matm = &Matm;
|
||||
}
|
||||
|
||||
assert(_Matp->size() == Ls*LLs);
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
if(switcheroo<Coeff_t>::iscomplex()){
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
} else {
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_VEC
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH);
|
||||
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -30,60 +30,181 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHIMU \
|
||||
{const SiteSpinor & ref (in._odata[offset]); \
|
||||
Chimu_00=ref()(0)(0);\
|
||||
Chimu_01=ref()(0)(1);\
|
||||
Chimu_02=ref()(0)(2);\
|
||||
Chimu_10=ref()(1)(0);\
|
||||
Chimu_11=ref()(1)(1);\
|
||||
Chimu_12=ref()(1)(2);\
|
||||
Chimu_20=ref()(2)(0);\
|
||||
Chimu_21=ref()(2)(1);\
|
||||
Chimu_22=ref()(2)(2);\
|
||||
Chimu_30=ref()(3)(0);\
|
||||
Chimu_31=ref()(3)(1);\
|
||||
Chimu_32=ref()(3)(2);}
|
||||
#define LOAD_CHIMU_BODY(F) \
|
||||
Chimu_00=ref(F)(0)(0); \
|
||||
Chimu_01=ref(F)(0)(1); \
|
||||
Chimu_02=ref(F)(0)(2); \
|
||||
Chimu_10=ref(F)(1)(0); \
|
||||
Chimu_11=ref(F)(1)(1); \
|
||||
Chimu_12=ref(F)(1)(2); \
|
||||
Chimu_20=ref(F)(2)(0); \
|
||||
Chimu_21=ref(F)(2)(1); \
|
||||
Chimu_22=ref(F)(2)(2); \
|
||||
Chimu_30=ref(F)(3)(0); \
|
||||
Chimu_31=ref(F)(3)(1); \
|
||||
Chimu_32=ref(F)(3)(2)
|
||||
|
||||
#define LOAD_CHI\
|
||||
{const SiteHalfSpinor &ref(buf[offset]); \
|
||||
Chi_00 = ref()(0)(0);\
|
||||
Chi_01 = ref()(0)(1);\
|
||||
Chi_02 = ref()(0)(2);\
|
||||
Chi_10 = ref()(1)(0);\
|
||||
Chi_11 = ref()(1)(1);\
|
||||
Chi_12 = ref()(1)(2);}
|
||||
#define LOAD_CHIMU(DIR,F,PERM) \
|
||||
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
|
||||
|
||||
#define LOAD_CHI_BODY(F) \
|
||||
Chi_00 = ref(F)(0)(0);\
|
||||
Chi_01 = ref(F)(0)(1);\
|
||||
Chi_02 = ref(F)(0)(2);\
|
||||
Chi_10 = ref(F)(1)(0);\
|
||||
Chi_11 = ref(F)(1)(1);\
|
||||
Chi_12 = ref(F)(1)(2)
|
||||
|
||||
#define LOAD_CHI(DIR,F,PERM) \
|
||||
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
|
||||
|
||||
|
||||
//G-parity implementations using in-place intrinsic ops
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
//0h,1l -> 1l,0h
|
||||
//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
|
||||
//Pulled fermion through forwards face, GPBC on upper component
|
||||
//Need 0= 0l 1h 1= 1l 0h
|
||||
//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
|
||||
//Pulled fermion through backwards face, GPBC on lower component
|
||||
//Need 0= 1l 0h 1= 0l 1h
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(1)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
//0l 0h -> 0h 0l
|
||||
//1l 1h, 0h 0l -> 1l 0h, 1h 0l
|
||||
#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(0)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
|
||||
|
||||
|
||||
#define LOAD_CHI_SETUP(DIR,F) \
|
||||
g = F; \
|
||||
direction = st._directions[DIR]; \
|
||||
distance = st._distances[DIR]; \
|
||||
sl = st._grid->_simd_layout[direction]; \
|
||||
inplace_twist = 0; \
|
||||
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
|
||||
if(sl == 1){ \
|
||||
g = (F+1) % 2; \
|
||||
}else{ \
|
||||
inplace_twist = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteSpinor &ref(in._odata[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHIMU_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteHalfSpinor &ref(buf[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHI_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT_2SPIN(A)\
|
||||
{auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00;\
|
||||
UChi_10 = U_00*Chi_10;\
|
||||
UChi_01 = U_10*Chi_00;\
|
||||
UChi_11 = U_10*Chi_10;\
|
||||
UChi_02 = U_20*Chi_00;\
|
||||
UChi_12 = U_20*Chi_10;\
|
||||
UChi_00+= U_01*Chi_01;\
|
||||
UChi_10+= U_01*Chi_11;\
|
||||
UChi_01+= U_11*Chi_01;\
|
||||
UChi_11+= U_11*Chi_11;\
|
||||
UChi_02+= U_21*Chi_01;\
|
||||
UChi_12+= U_21*Chi_11;\
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02;\
|
||||
UChi_10+= U_00*Chi_12;\
|
||||
UChi_01+= U_10*Chi_02;\
|
||||
UChi_11+= U_10*Chi_12;\
|
||||
UChi_02+= U_20*Chi_02;\
|
||||
UChi_12+= U_20*Chi_12;}
|
||||
#define MULT_2SPIN_BODY \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00; \
|
||||
UChi_10 = U_00*Chi_10; \
|
||||
UChi_01 = U_10*Chi_00; \
|
||||
UChi_11 = U_10*Chi_10; \
|
||||
UChi_02 = U_20*Chi_00; \
|
||||
UChi_12 = U_20*Chi_10; \
|
||||
UChi_00+= U_01*Chi_01; \
|
||||
UChi_10+= U_01*Chi_11; \
|
||||
UChi_01+= U_11*Chi_01; \
|
||||
UChi_11+= U_11*Chi_11; \
|
||||
UChi_02+= U_21*Chi_01; \
|
||||
UChi_12+= U_21*Chi_11; \
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02; \
|
||||
UChi_10+= U_00*Chi_12; \
|
||||
UChi_01+= U_10*Chi_02; \
|
||||
UChi_11+= U_10*Chi_12; \
|
||||
UChi_02+= U_20*Chi_02; \
|
||||
UChi_12+= U_20*Chi_12
|
||||
|
||||
|
||||
#define MULT_2SPIN(A,F) \
|
||||
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
|
||||
|
||||
#define MULT_2SPIN_GPARITY(A,F) \
|
||||
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
@ -307,84 +428,87 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
result_31-= UChi_11; \
|
||||
result_32-= UChi_12;
|
||||
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU; \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI; \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
} \
|
||||
MULT_2SPIN(DIR); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON;
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU; \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else if ( st.same_node[DIR] ) { \
|
||||
LOAD_CHI; \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
} \
|
||||
if (local || st.same_node[DIR] ) { \
|
||||
MULT_2SPIN(DIR); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
||||
LOAD_CHI; \
|
||||
MULT_2SPIN(DIR); \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define HAND_RESULT(ss) \
|
||||
#define HAND_RESULT(ss,F) \
|
||||
{ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
vstream(ref()(0)(0),result_00); \
|
||||
vstream(ref()(0)(1),result_01); \
|
||||
vstream(ref()(0)(2),result_02); \
|
||||
vstream(ref()(1)(0),result_10); \
|
||||
vstream(ref()(1)(1),result_11); \
|
||||
vstream(ref()(1)(2),result_12); \
|
||||
vstream(ref()(2)(0),result_20); \
|
||||
vstream(ref()(2)(1),result_21); \
|
||||
vstream(ref()(2)(2),result_22); \
|
||||
vstream(ref()(3)(0),result_30); \
|
||||
vstream(ref()(3)(1),result_31); \
|
||||
vstream(ref()(3)(2),result_32); \
|
||||
vstream(ref(F)(0)(0),result_00); \
|
||||
vstream(ref(F)(0)(1),result_01); \
|
||||
vstream(ref(F)(0)(2),result_02); \
|
||||
vstream(ref(F)(1)(0),result_10); \
|
||||
vstream(ref(F)(1)(1),result_11); \
|
||||
vstream(ref(F)(1)(2),result_12); \
|
||||
vstream(ref(F)(2)(0),result_20); \
|
||||
vstream(ref(F)(2)(1),result_21); \
|
||||
vstream(ref(F)(2)(2),result_22); \
|
||||
vstream(ref(F)(3)(0),result_30); \
|
||||
vstream(ref(F)(3)(1),result_31); \
|
||||
vstream(ref(F)(3)(2),result_32); \
|
||||
}
|
||||
|
||||
#define HAND_RESULT_EXT(ss) \
|
||||
#define HAND_RESULT_EXT(ss,F) \
|
||||
if (nmu){ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
ref()(0)(0)+=result_00; \
|
||||
ref()(0)(1)+=result_01; \
|
||||
ref()(0)(2)+=result_02; \
|
||||
ref()(1)(0)+=result_10; \
|
||||
ref()(1)(1)+=result_11; \
|
||||
ref()(1)(2)+=result_12; \
|
||||
ref()(2)(0)+=result_20; \
|
||||
ref()(2)(1)+=result_21; \
|
||||
ref()(2)(2)+=result_22; \
|
||||
ref()(3)(0)+=result_30; \
|
||||
ref()(3)(1)+=result_31; \
|
||||
ref()(3)(2)+=result_32; \
|
||||
ref(F)(0)(0)+=result_00; \
|
||||
ref(F)(0)(1)+=result_01; \
|
||||
ref(F)(0)(2)+=result_02; \
|
||||
ref(F)(1)(0)+=result_10; \
|
||||
ref(F)(1)(1)+=result_11; \
|
||||
ref(F)(1)(2)+=result_12; \
|
||||
ref(F)(2)(0)+=result_20; \
|
||||
ref(F)(2)(1)+=result_21; \
|
||||
ref(F)(2)(2)+=result_22; \
|
||||
ref(F)(3)(0)+=result_30; \
|
||||
ref(F)(3)(1)+=result_31; \
|
||||
ref(F)(3)(2)+=result_32; \
|
||||
}
|
||||
|
||||
|
||||
@ -463,15 +587,18 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -485,16 +612,19 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
@ -509,16 +639,20 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -532,16 +666,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
@ -557,16 +695,20 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
int nmu=0;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -581,16 +723,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int nmu=0;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
@ -646,11 +792,124 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplF);
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplFH);
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplDF);
|
||||
|
||||
|
||||
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
}
|
||||
|
||||
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
|
264
lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h
Normal file
264
lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h
Normal file
@ -0,0 +1,264 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Implementation of exact one flavour algorithm (EOFA) //
|
||||
// using fermion classes defined in: //
|
||||
// Grid/qcd/action/fermion/DomainWallEOFAFermion.h (Shamir) //
|
||||
// Grid/qcd/action/fermion/MobiusEOFAFermion.h (Mobius) //
|
||||
// arXiv: 1403.1683, 1706.05843 //
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
|
||||
#define QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Exact one flavour implementation of DWF determinant ratio //
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Impl>
|
||||
class ExactOneFlavourRatioPseudoFermionAction : public Action<typename Impl::GaugeField>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef OneFlavourRationalParams Params;
|
||||
Params param;
|
||||
MultiShiftFunction PowerNegHalf;
|
||||
|
||||
private:
|
||||
bool use_heatbath_forecasting;
|
||||
AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
|
||||
AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
|
||||
SchurRedBlackDiagMooeeSolve<FermionField> Solver;
|
||||
FermionField Phi; // the pseudofermion field for this trajectory
|
||||
|
||||
public:
|
||||
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
|
||||
OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), Solver(S),
|
||||
Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
|
||||
{
|
||||
AlgRemez remez(param.lo, param.hi, param.precision);
|
||||
|
||||
// MdagM^(+- 1/2)
|
||||
std::cout << GridLogMessage << "Generating degree " << param.degree << " for x^(-1/2)" << std::endl;
|
||||
remez.generateApprox(param.degree, 1, 2);
|
||||
PowerNegHalf.Init(remez, param.tolerance, true);
|
||||
};
|
||||
|
||||
virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; }
|
||||
|
||||
virtual std::string LogParameters() {
|
||||
std::stringstream sstream;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Low :" << param.lo << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] High :" << param.hi << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Max iterations :" << param.MaxIter << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Tolerance :" << param.tolerance << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Degree :" << param.degree << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Precision :" << param.precision << std::endl;
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
// Spin projection
|
||||
void spProj(const FermionField& in, FermionField& out, int sign, int Ls)
|
||||
{
|
||||
if(sign == 1){ for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(out, 0.0, in, 1.0, in, s, s); } }
|
||||
else{ for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(out, 0.0, in, 1.0, in, s, s); } }
|
||||
}
|
||||
|
||||
// EOFA heatbath: see Eqn. (29) of arXiv:1706.05843
|
||||
// We generate a Gaussian noise vector \eta, and then compute
|
||||
// \Phi = M_{\rm EOFA}^{-1/2} * \eta
|
||||
// using a rational approximation to the inverse square root
|
||||
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
|
||||
{
|
||||
Lop.ImportGauge(U);
|
||||
Rop.ImportGauge(U);
|
||||
|
||||
FermionField eta (Lop.FermionGrid());
|
||||
FermionField CG_src (Lop.FermionGrid());
|
||||
FermionField CG_soln (Lop.FermionGrid());
|
||||
FermionField Forecast_src(Lop.FermionGrid());
|
||||
std::vector<FermionField> tmp(2, Lop.FermionGrid());
|
||||
|
||||
// Use chronological inverter to forecast solutions across poles
|
||||
std::vector<FermionField> prev_solns;
|
||||
if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); }
|
||||
ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast;
|
||||
|
||||
// Seed with Gaussian noise vector (var = 0.5)
|
||||
RealD scale = std::sqrt(0.5);
|
||||
gaussian(pRNG,eta);
|
||||
eta = eta * scale;
|
||||
printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
|
||||
|
||||
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
|
||||
RealD N(PowerNegHalf.norm);
|
||||
for(int k=0; k<param.degree; ++k){ N += PowerNegHalf.residues[k] / ( 1.0 + PowerNegHalf.poles[k] ); }
|
||||
Phi = eta * N;
|
||||
|
||||
// LH terms:
|
||||
// \Phi = \Phi + k \sum_{k=1}^{N_{p}} P_{-} \Omega_{-}^{\dagger} ( H(mf)
|
||||
// - \gamma_{l} \Delta_{-}(mf,mb) P_{-} )^{-1} \Omega_{-} P_{-} \eta
|
||||
RealD gamma_l(0.0);
|
||||
spProj(eta, tmp[0], -1, Lop.Ls);
|
||||
Lop.Omega(tmp[0], tmp[1], -1, 0);
|
||||
G5R5(CG_src, tmp[1]);
|
||||
tmp[1] = zero;
|
||||
for(int k=0; k<param.degree; ++k){
|
||||
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
|
||||
Lop.RefreshShiftCoefficients(-gamma_l);
|
||||
if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
|
||||
Lop.Mdag(CG_src, Forecast_src);
|
||||
CG_soln = Forecast(Lop, Forecast_src, prev_solns);
|
||||
Solver(Lop, CG_src, CG_soln);
|
||||
prev_solns.push_back(CG_soln);
|
||||
} else {
|
||||
CG_soln = zero; // Just use zero as the initial guess
|
||||
Solver(Lop, CG_src, CG_soln);
|
||||
}
|
||||
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
|
||||
tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
|
||||
}
|
||||
Lop.Omega(tmp[1], tmp[0], -1, 1);
|
||||
spProj(tmp[0], tmp[1], -1, Lop.Ls);
|
||||
Phi = Phi + tmp[1];
|
||||
|
||||
// RH terms:
|
||||
// \Phi = \Phi - k \sum_{k=1}^{N_{p}} P_{+} \Omega_{+}^{\dagger} ( H(mb)
|
||||
// + \gamma_{l} \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \eta
|
||||
spProj(eta, tmp[0], 1, Rop.Ls);
|
||||
Rop.Omega(tmp[0], tmp[1], 1, 0);
|
||||
G5R5(CG_src, tmp[1]);
|
||||
tmp[1] = zero;
|
||||
if(use_heatbath_forecasting){ prev_solns.clear(); } // empirically, LH solns don't help for RH solves
|
||||
for(int k=0; k<param.degree; ++k){
|
||||
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
|
||||
Rop.RefreshShiftCoefficients(-gamma_l*PowerNegHalf.poles[k]);
|
||||
if(use_heatbath_forecasting){
|
||||
Rop.Mdag(CG_src, Forecast_src);
|
||||
CG_soln = Forecast(Rop, Forecast_src, prev_solns);
|
||||
Solver(Rop, CG_src, CG_soln);
|
||||
prev_solns.push_back(CG_soln);
|
||||
} else {
|
||||
CG_soln = zero;
|
||||
Solver(Rop, CG_src, CG_soln);
|
||||
}
|
||||
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
|
||||
tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
|
||||
}
|
||||
Rop.Omega(tmp[1], tmp[0], 1, 1);
|
||||
spProj(tmp[0], tmp[1], 1, Rop.Ls);
|
||||
Phi = Phi + tmp[1];
|
||||
|
||||
// Reset shift coefficients for energy and force evals
|
||||
Lop.RefreshShiftCoefficients(0.0);
|
||||
Rop.RefreshShiftCoefficients(-1.0);
|
||||
};
|
||||
|
||||
// EOFA action: see Eqn. (10) of arXiv:1706.05843
|
||||
virtual RealD S(const GaugeField& U)
|
||||
{
|
||||
Lop.ImportGauge(U);
|
||||
Rop.ImportGauge(U);
|
||||
|
||||
FermionField spProj_Phi(Lop.FermionGrid());
|
||||
std::vector<FermionField> tmp(2, Lop.FermionGrid());
|
||||
|
||||
// S = <\Phi|\Phi>
|
||||
RealD action(norm2(Phi));
|
||||
|
||||
// LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
|
||||
spProj(Phi, spProj_Phi, -1, Lop.Ls);
|
||||
Lop.Omega(spProj_Phi, tmp[0], -1, 0);
|
||||
G5R5(tmp[1], tmp[0]);
|
||||
tmp[0] = zero;
|
||||
Solver(Lop, tmp[1], tmp[0]);
|
||||
Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
|
||||
Lop.Omega(tmp[1], tmp[0], -1, 1);
|
||||
action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
|
||||
|
||||
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
|
||||
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
|
||||
spProj(Phi, spProj_Phi, 1, Rop.Ls);
|
||||
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
|
||||
G5R5(tmp[1], tmp[0]);
|
||||
tmp[0] = zero;
|
||||
Solver(Rop, tmp[1], tmp[0]);
|
||||
Rop.Dtilde(tmp[0], tmp[1]);
|
||||
Rop.Omega(tmp[1], tmp[0], 1, 1);
|
||||
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
|
||||
|
||||
return action;
|
||||
};
|
||||
|
||||
// EOFA pseudofermion force: see Eqns. (34)-(36) of arXiv:1706.05843
|
||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU)
|
||||
{
|
||||
Lop.ImportGauge(U);
|
||||
Rop.ImportGauge(U);
|
||||
|
||||
FermionField spProj_Phi (Lop.FermionGrid());
|
||||
FermionField Omega_spProj_Phi(Lop.FermionGrid());
|
||||
FermionField CG_src (Lop.FermionGrid());
|
||||
FermionField Chi (Lop.FermionGrid());
|
||||
FermionField g5_R5_Chi (Lop.FermionGrid());
|
||||
|
||||
GaugeField force(Lop.GaugeGrid());
|
||||
|
||||
// LH: dSdU = k \chi_{L}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{L}
|
||||
// \chi_{L} = H(mf)^{-1} \Omega_{-} P_{-} \Phi
|
||||
spProj(Phi, spProj_Phi, -1, Lop.Ls);
|
||||
Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
|
||||
G5R5(CG_src, Omega_spProj_Phi);
|
||||
spProj_Phi = zero;
|
||||
Solver(Lop, CG_src, spProj_Phi);
|
||||
Lop.Dtilde(spProj_Phi, Chi);
|
||||
G5R5(g5_R5_Chi, Chi);
|
||||
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
|
||||
dSdU = Lop.k * force;
|
||||
|
||||
// RH: dSdU = dSdU - k \chi_{R}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{}
|
||||
// \chi_{R} = ( H(mb) - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \Phi
|
||||
spProj(Phi, spProj_Phi, 1, Rop.Ls);
|
||||
Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
|
||||
G5R5(CG_src, Omega_spProj_Phi);
|
||||
spProj_Phi = zero;
|
||||
Solver(Rop, CG_src, spProj_Phi);
|
||||
Rop.Dtilde(spProj_Phi, Chi);
|
||||
G5R5(g5_R5_Chi, Chi);
|
||||
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
|
||||
dSdU = dSdU - Rop.k * force;
|
||||
};
|
||||
};
|
||||
}}
|
||||
|
||||
#endif
|
@ -38,5 +38,6 @@ directory
|
||||
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
|
||||
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
|
||||
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
|
||||
#include <Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h>
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user